3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.List;
31 import java.util.SortedMap;
32 import java.util.SortedSet;
33 import java.util.TreeMap;
34 import java.util.TreeSet;
36 import org.forester.species.Species;
37 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
38 import org.forester.util.ForesterUtil;
40 public class PrintableDomainSimilarity implements DomainSimilarity {
42 final public static String SPECIES_SEPARATOR = " ";
43 final private static int EQUAL = 0;
44 final private static String NO_SPECIES = " ";
45 private static final boolean PRINT_MORE_INFO = false;
46 final private double _min;
47 final private double _max;
48 final private double _mean;
49 final private double _sd;
51 private final int _max_difference_in_counts;
52 private final int _max_difference;
53 final private CombinableDomains _combinable_domains;
54 final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
55 private List<Species> _species_order;
56 private DomainSimilarityCalculator.Detailedness _detailedness;
57 private final boolean _treat_as_binary_comparison;
59 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
66 final int max_difference_in_counts,
67 final int max_difference,
68 final SortedMap<Species, SpeciesSpecificDcData> species_data,
69 final boolean sort_by_species_count_first,
70 final boolean treat_as_binary_comparison ) {
71 if ( combinable_domains == null ) {
72 throw new IllegalArgumentException( "attempt to use null combinable domains" );
74 if ( species_data == null ) {
75 throw new IllegalArgumentException( "attempt to use null species data" );
77 if ( species_data.size() < 1 ) {
78 throw new IllegalArgumentException( "attempt to use empty species data" );
81 throw new IllegalArgumentException( "attempt to use N less than 0" );
83 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
84 throw new IllegalArgumentException( "attempt to use N less than 1" );
87 throw new IllegalArgumentException( "attempt to use negative SD" );
90 throw new IllegalArgumentException( "attempt to use max smaller than min" );
93 _combinable_domains = combinable_domains;
99 _max_difference_in_counts = max_difference_in_counts;
100 _max_difference = max_difference;
101 _species_data = species_data;
102 _treat_as_binary_comparison = treat_as_binary_comparison;
103 final int s = species_data.size();
104 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
105 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
106 + " for domain " + combinable_domains.getKeyDomain() );
109 if ( getMaximalDifferenceInCounts() < 0 ) {
110 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
112 if ( getMaximalDifference() < 0 ) {
113 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
118 private void addSpeciesSpecificDomainData( final StringBuffer sb,
119 final Species species,
121 final Map<String, Integer> tax_code_to_id_map ) {
123 addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map );
126 sb.append( species.getSpeciesId() );
128 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
130 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
136 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
140 private void addTaxWithLink( final StringBuffer sb,
141 final String tax_code,
142 final Map<String, Integer> tax_code_to_id_map ) {
144 if ( !ForesterUtil.isEmpty( tax_code )
145 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
146 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK + tax_code_to_id_map.get( tax_code )
147 + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
150 sb.append( tax_code );
155 private int compareByDomainId( final DomainSimilarity other ) {
156 return getDomainId().compareToIgnoreCase( other.getDomainId() );
160 public int compareTo( final DomainSimilarity domain_similarity ) {
161 if ( this == domain_similarity ) {
164 else if ( domain_similarity == null ) {
165 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
167 else if ( domain_similarity.getClass() != this.getClass() ) {
168 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
169 + domain_similarity.getClass() );
171 return compareByDomainId( domain_similarity );
175 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
176 final SortedSet<String> sorted_ids = new TreeSet<String>();
177 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
178 for( final String id : getSpeciesData().get( species_of_combinable_domain )
179 .getCombinableDomainIdToCountsMap().keySet() ) {
180 sorted_ids.add( id );
186 private CombinableDomains getCombinableDomains() {
187 return _combinable_domains;
190 private DomainSimilarityCalculator.Detailedness getDetaildness() {
191 return _detailedness;
195 public String getDomainId() {
196 return getCombinableDomains().getKeyDomain();
200 public int getMaximalDifference() {
201 return _max_difference;
205 public int getMaximalDifferenceInCounts() {
206 return _max_difference_in_counts;
210 public double getMaximalSimilarityScore() {
215 public double getMeanSimilarityScore() {
220 public double getMinimalSimilarityScore() {
230 public SortedSet<Species> getSpecies() {
231 final SortedSet<Species> species = new TreeSet<Species>();
232 for( final Species s : getSpeciesData().keySet() ) {
238 public List<Species> getSpeciesCustomOrder() {
239 return _species_order;
243 public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
244 return _species_data;
247 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
248 final Map<String, Integer> tax_code_to_id_map ) {
249 final StringBuffer sb = new StringBuffer();
250 for( final Species species : getSpeciesData().keySet() ) {
251 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map );
256 private StringBuffer getDomainDataInAlphabeticalOrder() {
257 final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
258 final StringBuffer sb = new StringBuffer();
259 for( final Species species : getSpeciesData().keySet() ) {
260 for( final String combable_dom : getCombinableDomainIds( species ) ) {
261 if ( !m.containsKey( combable_dom ) ) {
262 m.put( combable_dom, new TreeSet<String>() );
264 m.get( combable_dom ).add( species.getSpeciesId() );
267 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
268 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
270 for( final String s : e.getValue() ) {
279 private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
280 final StringBuffer sb = new StringBuffer();
281 for( final Species order_species : getSpeciesCustomOrder() ) {
282 if ( getSpeciesData().keySet().contains( order_species ) ) {
283 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map );
286 sb.append( PrintableDomainSimilarity.NO_SPECIES );
287 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
294 public double getStandardDeviationOfSimilarityScore() {
298 private void init() {
299 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
302 private boolean isTreatAsBinaryComparison() {
303 return _treat_as_binary_comparison;
306 public void setDetailedness( final Detailedness detailedness ) {
307 _detailedness = detailedness;
310 public void setSpeciesOrder( final List<Species> species_order ) {
311 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
312 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
314 _species_order = species_order;
318 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
319 final Map<String, Integer> tax_code_to_id_map ) {
320 switch ( print_option ) {
321 case SIMPLE_TAB_DELIMITED:
322 return toStringBufferSimpleTabDelimited();
324 return toStringBufferDetailedHTML( tax_code_to_id_map );
326 throw new AssertionError( "Unknown print option: " + print_option );
330 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map ) {
331 final StringBuffer sb = new StringBuffer();
335 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
336 + getDomainId() + "</a>" );
338 sb.append( "<a name=\"" + getDomainId() + "\">" );
339 sb.append( "</td>" );
341 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
342 + "\" target=\"gs_window\">gs</a>" );
343 sb.append( "</td>" );
345 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
346 sb.append( "</td>" );
347 if ( PRINT_MORE_INFO ) {
348 if ( !isTreatAsBinaryComparison() ) {
351 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
353 sb.append( "</td>" );
356 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
358 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
360 sb.append( "</td>" );
364 sb.append( getMaximalDifference() );
365 sb.append( "</td>" );
367 if ( isTreatAsBinaryComparison() ) {
368 sb.append( getMaximalDifferenceInCounts() );
371 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
373 sb.append( "</td>" );
374 if ( !isTreatAsBinaryComparison() ) {
377 sb.append( getSpeciesData().size() );
379 sb.append( "</td>" );
381 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
383 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
384 sb.append( getDomainDataInAlphabeticalOrder() );
385 sb.append( "</td>" );
389 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
390 sb.append( getDomainDataInAlphabeticalOrder() );
391 sb.append( "</td>" );
393 sb.append( "</tr>" );
397 private StringBuffer toStringBufferSimpleTabDelimited() {
398 final StringBuffer sb = new StringBuffer();
399 sb.append( getDomainId() );
403 public static enum PRINT_OPTION {
404 SIMPLE_TAB_DELIMITED, HTML;