3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.awt.Color;
30 import java.util.HashMap;
31 import java.util.List;
33 import java.util.SortedMap;
34 import java.util.SortedSet;
35 import java.util.TreeMap;
36 import java.util.TreeSet;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.species.Species;
41 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
42 import org.forester.util.ForesterUtil;
44 public class PrintableDomainSimilarity implements DomainSimilarity {
46 final public static String SPECIES_SEPARATOR = " ";
47 final private static int EQUAL = 0;
48 final private static String NO_SPECIES = " ";
49 final private double _min;
50 final private double _max;
51 final private double _mean;
52 final private double _sd;
54 private final int _max_difference_in_counts;
55 private final int _max_difference;
56 final private CombinableDomains _combinable_domains;
57 final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
58 private List<Species> _species_order;
59 private DomainSimilarityCalculator.Detailedness _detailedness;
60 private final boolean _treat_as_binary_comparison;
61 private final static Map<String, String> _TAXCODE_HEXCOLORSTRING_MAP = new HashMap<String, String>();
63 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
70 final int max_difference_in_counts,
71 final int max_difference,
72 final SortedMap<Species, SpeciesSpecificDcData> species_data,
73 final boolean sort_by_species_count_first,
74 final boolean treat_as_binary_comparison ) {
75 if ( combinable_domains == null ) {
76 throw new IllegalArgumentException( "attempt to use null combinable domains" );
78 if ( species_data == null ) {
79 throw new IllegalArgumentException( "attempt to use null species data" );
81 if ( species_data.size() < 1 ) {
82 throw new IllegalArgumentException( "attempt to use empty species data" );
85 throw new IllegalArgumentException( "attempt to use N less than 0" );
87 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
88 throw new IllegalArgumentException( "attempt to use N less than 1" );
91 throw new IllegalArgumentException( "attempt to use negative SD" );
94 throw new IllegalArgumentException( "attempt to use max smaller than min" );
97 _combinable_domains = combinable_domains;
103 _max_difference_in_counts = max_difference_in_counts;
104 _max_difference = max_difference;
105 _species_data = species_data;
106 _treat_as_binary_comparison = treat_as_binary_comparison;
107 final int s = species_data.size();
108 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
109 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
110 + " for domain " + combinable_domains.getKeyDomain() );
113 if ( getMaximalDifferenceInCounts() < 0 ) {
114 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
116 if ( getMaximalDifference() < 0 ) {
117 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
122 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
123 final int max_difference_in_counts,
124 final int max_difference,
125 final SortedMap<Species, SpeciesSpecificDcData> species_data,
126 final boolean sort_by_species_count_first,
127 final boolean treat_as_binary_comparison ) {
128 if ( combinable_domains == null ) {
129 throw new IllegalArgumentException( "attempt to use null combinable domains" );
131 if ( species_data == null ) {
132 throw new IllegalArgumentException( "attempt to use null species data" );
134 if ( species_data.size() < 1 ) {
135 throw new IllegalArgumentException( "attempt to use empty species data" );
138 _combinable_domains = combinable_domains;
144 _max_difference_in_counts = max_difference_in_counts;
145 _max_difference = max_difference;
146 _species_data = species_data;
147 _treat_as_binary_comparison = treat_as_binary_comparison;
148 final int s = species_data.size();
150 if ( getMaximalDifferenceInCounts() < 0 ) {
151 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
153 if ( getMaximalDifference() < 0 ) {
154 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
159 private void addSpeciesSpecificDomainData( final StringBuffer sb,
160 final Species species,
162 final Map<String, Integer> tax_code_to_id_map,
163 final Phylogeny phy ) {
165 addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
168 sb.append( species.getSpeciesId() );
170 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
177 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
187 private void addTaxWithLink( final StringBuffer sb,
188 final String tax_code,
189 final Map<String, Integer> tax_code_to_id_map,
190 final Phylogeny phy ) {
192 if ( phy != null && !phy.isEmpty() ) {
193 hex = obtainHexColorStringDependingOnTaxonomyGroup( tax_code, phy );
196 if ( !ForesterUtil.isEmpty( tax_code )
197 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
198 if ( !ForesterUtil.isEmpty( hex ) ) {
199 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
200 + tax_code_to_id_map.get( tax_code ) + "\" target=\"t_w\"><font color=\"" + hex + "\">"
201 + tax_code + "</font></a>" );
204 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
205 + tax_code_to_id_map.get( tax_code ) + "\" target=\"t_w\">" + tax_code + "</a>" );
209 sb.append( tax_code );
214 private String obtainHexColorStringDependingOnTaxonomyGroup( final String tax_code, final Phylogeny phy ) {
215 if ( phy != null && !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) {
216 List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
218 if ( nodes == null || nodes.isEmpty() ) {
219 throw new RuntimeException( tax_code + " is not found" );
221 if ( nodes.size() != 1 ) {
222 throw new RuntimeException( tax_code + " is not unique" );
224 PhylogenyNode n = nodes.get( 0 );
225 while ( n != null ) {
226 if ( n.getNodeData().isHasTaxonomy()
227 && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
228 c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
229 .getScientificName() );
231 if ( c == null && !ForesterUtil.isEmpty( n.getName() ) ) {
232 c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName() );
240 throw new RuntimeException( "no color found for taxonomy code \"" + tax_code + "\"" );
242 final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
243 _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex );
245 return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code );
248 private int compareByDomainId( final DomainSimilarity other ) {
249 return getDomainId().compareToIgnoreCase( other.getDomainId() );
253 public int compareTo( final DomainSimilarity domain_similarity ) {
254 if ( this == domain_similarity ) {
257 else if ( domain_similarity == null ) {
258 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
260 else if ( domain_similarity.getClass() != this.getClass() ) {
261 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
262 + domain_similarity.getClass() );
264 return compareByDomainId( domain_similarity );
268 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
269 final SortedSet<String> sorted_ids = new TreeSet<String>();
270 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
271 for( final String id : getSpeciesData().get( species_of_combinable_domain )
272 .getCombinableDomainIdToCountsMap().keySet() ) {
273 sorted_ids.add( id );
279 private CombinableDomains getCombinableDomains() {
280 return _combinable_domains;
283 private DomainSimilarityCalculator.Detailedness getDetaildness() {
284 return _detailedness;
288 public String getDomainId() {
289 return getCombinableDomains().getKeyDomain();
293 public int getMaximalDifference() {
294 return _max_difference;
298 public int getMaximalDifferenceInCounts() {
299 return _max_difference_in_counts;
303 public double getMaximalSimilarityScore() {
308 public double getMeanSimilarityScore() {
313 public double getMinimalSimilarityScore() {
323 public SortedSet<Species> getSpecies() {
324 final SortedSet<Species> species = new TreeSet<Species>();
325 for( final Species s : getSpeciesData().keySet() ) {
331 public List<Species> getSpeciesCustomOrder() {
332 return _species_order;
336 public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
337 return _species_data;
340 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
341 final Map<String, Integer> tax_code_to_id_map,
342 final Phylogeny phy ) {
343 final StringBuffer sb = new StringBuffer();
344 for( final Species species : getSpeciesData().keySet() ) {
345 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
350 private StringBuffer getDomainDataInAlphabeticalOrder() {
351 final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
352 final StringBuffer sb = new StringBuffer();
353 for( final Species species : getSpeciesData().keySet() ) {
354 for( final String combable_dom : getCombinableDomainIds( species ) ) {
355 if ( !m.containsKey( combable_dom ) ) {
356 m.put( combable_dom, new TreeSet<String>() );
358 m.get( combable_dom ).add( species.getSpeciesId() );
361 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
362 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
364 for( final String s : e.getValue() ) {
365 final String hex = obtainHexColorStringDependingOnTaxonomyGroup( s, null );
366 if ( !ForesterUtil.isEmpty( hex ) ) {
367 sb.append( "<font color=\"" + hex + "\">" + s + "</font>" );
379 private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
380 final Map<String, Integer> tax_code_to_id_map,
381 final Phylogeny phy ) {
382 final StringBuffer sb = new StringBuffer();
383 for( final Species order_species : getSpeciesCustomOrder() ) {
384 if ( getSpeciesData().keySet().contains( order_species ) ) {
385 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
388 sb.append( PrintableDomainSimilarity.NO_SPECIES );
389 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
396 public double getStandardDeviationOfSimilarityScore() {
400 private void init() {
401 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
404 private boolean isTreatAsBinaryComparison() {
405 return _treat_as_binary_comparison;
408 public void setDetailedness( final Detailedness detailedness ) {
409 _detailedness = detailedness;
412 public void setSpeciesOrder( final List<Species> species_order ) {
413 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
414 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
416 _species_order = species_order;
420 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
421 final Map<String, Integer> tax_code_to_id_map,
423 switch ( print_option ) {
424 case SIMPLE_TAB_DELIMITED:
425 return toStringBufferSimpleTabDelimited();
427 return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
429 throw new AssertionError( "Unknown print option: " + print_option );
433 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, Phylogeny phy ) {
434 final StringBuffer sb = new StringBuffer();
438 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
439 + getDomainId() + "</a>" );
441 sb.append( "<a name=\"" + getDomainId() + "\">" );
442 sb.append( "</td>" );
444 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
445 + "\" target=\"gs_window\">gs</a>" );
446 sb.append( "</td>" );
447 if ( getMaximalSimilarityScore() > 0 ) {
449 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
450 sb.append( "</td>" );
451 if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
452 if ( !isTreatAsBinaryComparison() ) {
455 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
457 sb.append( "</td>" );
460 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
462 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
464 sb.append( "</td>" );
469 sb.append( getMaximalDifference() );
470 sb.append( "</td>" );
472 if ( isTreatAsBinaryComparison() ) {
473 sb.append( getMaximalDifferenceInCounts() );
476 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
478 sb.append( "</td>" );
479 if ( !isTreatAsBinaryComparison() ) {
482 sb.append( getSpeciesData().size() );
484 sb.append( "</td>" );
486 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
488 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
489 sb.append( getDomainDataInAlphabeticalOrder() );
490 sb.append( "</td>" );
494 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
495 sb.append( getDomainDataInAlphabeticalOrder() );
496 sb.append( "</td>" );
498 sb.append( "</tr>" );
502 private StringBuffer toStringBufferSimpleTabDelimited() {
503 final StringBuffer sb = new StringBuffer();
504 sb.append( getDomainId() );
506 sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
511 public static enum PRINT_OPTION {
512 SIMPLE_TAB_DELIMITED, HTML;