3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.awt.Color;
30 import java.util.List;
32 import java.util.SortedMap;
33 import java.util.SortedSet;
34 import java.util.TreeMap;
35 import java.util.TreeSet;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyNode;
39 import org.forester.species.Species;
40 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
41 import org.forester.util.ForesterUtil;
43 public class PrintableDomainSimilarity implements DomainSimilarity {
45 final public static String SPECIES_SEPARATOR = " ";
46 final private static int EQUAL = 0;
47 final private static String NO_SPECIES = " ";
48 final private double _min;
49 final private double _max;
50 final private double _mean;
51 final private double _sd;
53 private final int _max_difference_in_counts;
54 private final int _max_difference;
55 final private CombinableDomains _combinable_domains;
56 final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
57 private List<Species> _species_order;
58 private DomainSimilarityCalculator.Detailedness _detailedness;
59 private final boolean _treat_as_binary_comparison;
61 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
68 final int max_difference_in_counts,
69 final int max_difference,
70 final SortedMap<Species, SpeciesSpecificDcData> species_data,
71 final boolean sort_by_species_count_first,
72 final boolean treat_as_binary_comparison ) {
73 if ( combinable_domains == null ) {
74 throw new IllegalArgumentException( "attempt to use null combinable domains" );
76 if ( species_data == null ) {
77 throw new IllegalArgumentException( "attempt to use null species data" );
79 if ( species_data.size() < 1 ) {
80 throw new IllegalArgumentException( "attempt to use empty species data" );
83 throw new IllegalArgumentException( "attempt to use N less than 0" );
85 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
86 throw new IllegalArgumentException( "attempt to use N less than 1" );
89 throw new IllegalArgumentException( "attempt to use negative SD" );
92 throw new IllegalArgumentException( "attempt to use max smaller than min" );
95 _combinable_domains = combinable_domains;
101 _max_difference_in_counts = max_difference_in_counts;
102 _max_difference = max_difference;
103 _species_data = species_data;
104 _treat_as_binary_comparison = treat_as_binary_comparison;
105 final int s = species_data.size();
106 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
107 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
108 + " for domain " + combinable_domains.getKeyDomain() );
111 if ( getMaximalDifferenceInCounts() < 0 ) {
112 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
114 if ( getMaximalDifference() < 0 ) {
115 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
120 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
121 final int max_difference_in_counts,
122 final int max_difference,
123 final SortedMap<Species, SpeciesSpecificDcData> species_data,
124 final boolean sort_by_species_count_first,
125 final boolean treat_as_binary_comparison ) {
126 if ( combinable_domains == null ) {
127 throw new IllegalArgumentException( "attempt to use null combinable domains" );
129 if ( species_data == null ) {
130 throw new IllegalArgumentException( "attempt to use null species data" );
132 if ( species_data.size() < 1 ) {
133 throw new IllegalArgumentException( "attempt to use empty species data" );
136 _combinable_domains = combinable_domains;
142 _max_difference_in_counts = max_difference_in_counts;
143 _max_difference = max_difference;
144 _species_data = species_data;
145 _treat_as_binary_comparison = treat_as_binary_comparison;
146 final int s = species_data.size();
148 if ( getMaximalDifferenceInCounts() < 0 ) {
149 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
151 if ( getMaximalDifference() < 0 ) {
152 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
157 private void addSpeciesSpecificDomainData( final StringBuffer sb,
158 final Species species,
160 final Map<String, Integer> tax_code_to_id_map,
161 final Phylogeny phy ) {
163 addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
166 sb.append( species.getSpeciesId() );
168 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
175 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
185 private void addTaxWithLink( final StringBuffer sb,
186 final String tax_code,
187 final Map<String, Integer> tax_code_to_id_map,
188 final Phylogeny phy ) {
190 if ( phy != null && !phy.isEmpty() ) {
191 c = getColorDependingOnTaxonomy( tax_code, phy );
194 c = new Color( 0, 0, 0 );
196 final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
198 if ( !ForesterUtil.isEmpty( tax_code )
199 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
200 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK + tax_code_to_id_map.get( tax_code )
201 + "\" target=\"taxonomy_window\" color=\"" + hex + "\">" + tax_code + "</a>" );
204 sb.append( tax_code );
209 private Color getColorDependingOnTaxonomy( final String tax_code, final Phylogeny phy ) {
210 List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
212 if ( nodes == null || nodes.isEmpty() ) {
213 throw new RuntimeException( tax_code + " is not found" );
215 if ( nodes.size() != 1 ) {
216 throw new RuntimeException( tax_code + " is not unique" );
218 PhylogenyNode n = nodes.get( 0 );
219 while ( n != null ) {
221 if ( n.getNodeData().isHasTaxonomy()
222 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
223 c = SurfacingUtil.getColorForTaxCode( n.getNodeData().getTaxonomy().getScientificName() );
225 if ( c == null && !ForesterUtil.isEmpty( n.getName() ) ) {
226 c = SurfacingUtil.getColorForTaxCode( n.getName() );
236 private int compareByDomainId( final DomainSimilarity other ) {
237 return getDomainId().compareToIgnoreCase( other.getDomainId() );
241 public int compareTo( final DomainSimilarity domain_similarity ) {
242 if ( this == domain_similarity ) {
245 else if ( domain_similarity == null ) {
246 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
248 else if ( domain_similarity.getClass() != this.getClass() ) {
249 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
250 + domain_similarity.getClass() );
252 return compareByDomainId( domain_similarity );
256 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
257 final SortedSet<String> sorted_ids = new TreeSet<String>();
258 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
259 for( final String id : getSpeciesData().get( species_of_combinable_domain )
260 .getCombinableDomainIdToCountsMap().keySet() ) {
261 sorted_ids.add( id );
267 private CombinableDomains getCombinableDomains() {
268 return _combinable_domains;
271 private DomainSimilarityCalculator.Detailedness getDetaildness() {
272 return _detailedness;
276 public String getDomainId() {
277 return getCombinableDomains().getKeyDomain();
281 public int getMaximalDifference() {
282 return _max_difference;
286 public int getMaximalDifferenceInCounts() {
287 return _max_difference_in_counts;
291 public double getMaximalSimilarityScore() {
296 public double getMeanSimilarityScore() {
301 public double getMinimalSimilarityScore() {
311 public SortedSet<Species> getSpecies() {
312 final SortedSet<Species> species = new TreeSet<Species>();
313 for( final Species s : getSpeciesData().keySet() ) {
319 public List<Species> getSpeciesCustomOrder() {
320 return _species_order;
324 public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
325 return _species_data;
328 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
329 final Map<String, Integer> tax_code_to_id_map,
330 final Phylogeny phy ) {
331 final StringBuffer sb = new StringBuffer();
332 for( final Species species : getSpeciesData().keySet() ) {
333 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
338 private StringBuffer getDomainDataInAlphabeticalOrder() {
339 final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
340 final StringBuffer sb = new StringBuffer();
341 for( final Species species : getSpeciesData().keySet() ) {
342 for( final String combable_dom : getCombinableDomainIds( species ) ) {
343 if ( !m.containsKey( combable_dom ) ) {
344 m.put( combable_dom, new TreeSet<String>() );
346 m.get( combable_dom ).add( species.getSpeciesId() );
349 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
350 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
352 for( final String s : e.getValue() ) {
361 private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
362 final Map<String, Integer> tax_code_to_id_map,
363 final Phylogeny phy ) {
364 final StringBuffer sb = new StringBuffer();
365 for( final Species order_species : getSpeciesCustomOrder() ) {
366 if ( getSpeciesData().keySet().contains( order_species ) ) {
367 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
370 sb.append( PrintableDomainSimilarity.NO_SPECIES );
371 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
378 public double getStandardDeviationOfSimilarityScore() {
382 private void init() {
383 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
386 private boolean isTreatAsBinaryComparison() {
387 return _treat_as_binary_comparison;
390 public void setDetailedness( final Detailedness detailedness ) {
391 _detailedness = detailedness;
394 public void setSpeciesOrder( final List<Species> species_order ) {
395 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
396 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
398 _species_order = species_order;
402 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
403 final Map<String, Integer> tax_code_to_id_map,
405 switch ( print_option ) {
406 case SIMPLE_TAB_DELIMITED:
407 return toStringBufferSimpleTabDelimited();
409 return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
411 throw new AssertionError( "Unknown print option: " + print_option );
415 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, Phylogeny phy ) {
416 final StringBuffer sb = new StringBuffer();
420 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
421 + getDomainId() + "</a>" );
423 sb.append( "<a name=\"" + getDomainId() + "\">" );
424 sb.append( "</td>" );
426 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
427 + "\" target=\"gs_window\">gs</a>" );
428 sb.append( "</td>" );
429 if ( getMaximalSimilarityScore() > 0 ) {
431 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
432 sb.append( "</td>" );
433 if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
434 if ( !isTreatAsBinaryComparison() ) {
437 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
439 sb.append( "</td>" );
442 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
444 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
446 sb.append( "</td>" );
451 sb.append( getMaximalDifference() );
452 sb.append( "</td>" );
454 if ( isTreatAsBinaryComparison() ) {
455 sb.append( getMaximalDifferenceInCounts() );
458 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
460 sb.append( "</td>" );
461 if ( !isTreatAsBinaryComparison() ) {
464 sb.append( getSpeciesData().size() );
466 sb.append( "</td>" );
468 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
470 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
471 sb.append( getDomainDataInAlphabeticalOrder() );
472 sb.append( "</td>" );
476 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
477 sb.append( getDomainDataInAlphabeticalOrder() );
478 sb.append( "</td>" );
480 sb.append( "</tr>" );
484 private StringBuffer toStringBufferSimpleTabDelimited() {
485 final StringBuffer sb = new StringBuffer();
486 sb.append( getDomainId() );
488 sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
493 public static enum PRINT_OPTION {
494 SIMPLE_TAB_DELIMITED, HTML;