3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.HashMap;
30 import java.util.List;
32 import java.util.SortedMap;
33 import java.util.SortedSet;
34 import java.util.TreeMap;
35 import java.util.TreeSet;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.species.Species;
39 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
40 import org.forester.util.ForesterUtil;
42 public class PrintableDomainSimilarity implements DomainSimilarity {
44 final public static String SPECIES_SEPARATOR = " ";
45 final private static int EQUAL = 0;
46 final private static String NO_SPECIES = " ";
47 final private CombinableDomains _combinable_domains;
48 private DomainSimilarityCalculator.Detailedness _detailedness;
49 final private double _max;
50 private final int _max_difference;
51 private final int _max_difference_in_counts;
52 final private double _mean;
53 final private double _min;
55 final private double _sd;
56 final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
57 private List<Species> _species_order;
58 private final boolean _treat_as_binary_comparison;
60 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
67 final int max_difference_in_counts,
68 final int max_difference,
69 final SortedMap<Species, SpeciesSpecificDcData> species_data,
70 final boolean sort_by_species_count_first,
71 final boolean treat_as_binary_comparison ) {
72 if ( combinable_domains == null ) {
73 throw new IllegalArgumentException( "attempt to use null combinable domains" );
75 if ( species_data == null ) {
76 throw new IllegalArgumentException( "attempt to use null species data" );
78 if ( species_data.size() < 1 ) {
79 throw new IllegalArgumentException( "attempt to use empty species data" );
82 throw new IllegalArgumentException( "attempt to use N less than 0" );
84 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
85 throw new IllegalArgumentException( "attempt to use N less than 1" );
88 throw new IllegalArgumentException( "attempt to use negative SD" );
91 throw new IllegalArgumentException( "attempt to use max smaller than min" );
94 _combinable_domains = combinable_domains;
100 _max_difference_in_counts = max_difference_in_counts;
101 _max_difference = max_difference;
102 _species_data = species_data;
103 _treat_as_binary_comparison = treat_as_binary_comparison;
104 final int s = species_data.size();
105 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
106 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
107 + " for domain " + combinable_domains.getKeyDomain() );
110 if ( getMaximalDifferenceInCounts() < 0 ) {
111 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
113 if ( getMaximalDifference() < 0 ) {
114 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
119 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
120 final int max_difference_in_counts,
121 final int max_difference,
122 final SortedMap<Species, SpeciesSpecificDcData> species_data,
123 final boolean sort_by_species_count_first,
124 final boolean treat_as_binary_comparison ) {
125 if ( combinable_domains == null ) {
126 throw new IllegalArgumentException( "attempt to use null combinable domains" );
128 if ( species_data == null ) {
129 throw new IllegalArgumentException( "attempt to use null species data" );
131 if ( species_data.size() < 1 ) {
132 throw new IllegalArgumentException( "attempt to use empty species data" );
135 _combinable_domains = combinable_domains;
141 _max_difference_in_counts = max_difference_in_counts;
142 _max_difference = max_difference;
143 _species_data = species_data;
144 _treat_as_binary_comparison = treat_as_binary_comparison;
145 final int s = species_data.size();
147 if ( getMaximalDifferenceInCounts() < 0 ) {
148 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
150 if ( getMaximalDifference() < 0 ) {
151 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
157 public int compareTo( final DomainSimilarity domain_similarity ) {
158 if ( this == domain_similarity ) {
161 else if ( domain_similarity == null ) {
162 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
164 else if ( domain_similarity.getClass() != this.getClass() ) {
165 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
166 + domain_similarity.getClass() );
168 return compareByDomainId( domain_similarity );
172 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
173 final SortedSet<String> sorted_ids = new TreeSet<String>();
174 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
175 for( final String id : getSpeciesData().get( species_of_combinable_domain )
176 .getCombinableDomainIdToCountsMap().keySet() ) {
177 sorted_ids.add( id );
184 public String getDomainId() {
185 return getCombinableDomains().getKeyDomain();
189 public int getMaximalDifference() {
190 return _max_difference;
194 public int getMaximalDifferenceInCounts() {
195 return _max_difference_in_counts;
199 public double getMaximalSimilarityScore() {
204 public double getMeanSimilarityScore() {
209 public double getMinimalSimilarityScore() {
219 public SortedSet<Species> getSpecies() {
220 final SortedSet<Species> species = new TreeSet<Species>();
221 for( final Species s : getSpeciesData().keySet() ) {
227 public List<Species> getSpeciesCustomOrder() {
228 return _species_order;
232 public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
233 return _species_data;
237 public double getStandardDeviationOfSimilarityScore() {
241 public void setDetailedness( final Detailedness detailedness ) {
242 _detailedness = detailedness;
245 public void setSpeciesOrder( final List<Species> species_order ) {
246 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
247 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
249 _species_order = species_order;
253 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
254 final Map<String, Integer> tax_code_to_id_map,
255 final Phylogeny phy ) {
256 switch ( print_option ) {
257 case SIMPLE_TAB_DELIMITED:
258 return toStringBufferSimpleTabDelimited();
260 return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
262 throw new AssertionError( "Unknown print option: " + print_option );
266 private void addSpeciesSpecificDomainData( final StringBuffer sb,
267 final Species species,
269 final Map<String, Integer> tax_code_to_id_map,
270 final Phylogeny phy ) {
272 addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
275 sb.append( species.getSpeciesId() );
277 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
284 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
294 private void addTaxWithLink( final StringBuffer sb,
295 final String tax_code,
296 final Map<String, Integer> tax_code_to_id_map,
297 final Phylogeny phy ) {
299 if ( ( phy != null ) && !phy.isEmpty() ) {
300 hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax_code, phy );
303 if ( !ForesterUtil.isEmpty( tax_code )
304 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
305 if ( !ForesterUtil.isEmpty( hex ) ) {
306 sb.append( "<a href=\"" );
307 sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
308 sb.append( tax_code_to_id_map.get( tax_code ) );
309 sb.append( "\" target=\"tw\"><span style=\"color:" );
312 sb.append( tax_code );
313 sb.append( "</span></a>" );
316 sb.append( "<a href=\"" );
317 sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
318 sb.append( tax_code_to_id_map.get( tax_code ) );
319 sb.append( "\" target=\"tw\">" );
320 sb.append( tax_code );
325 sb.append( tax_code );
330 private int compareByDomainId( final DomainSimilarity other ) {
331 return getDomainId().compareToIgnoreCase( other.getDomainId() );
334 private CombinableDomains getCombinableDomains() {
335 return _combinable_domains;
338 private DomainSimilarityCalculator.Detailedness getDetaildness() {
339 return _detailedness;
342 private StringBuffer getDomainDataInAlphabeticalOrder() {
343 final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
344 final StringBuffer sb = new StringBuffer();
345 for( final Species species : getSpeciesData().keySet() ) {
346 for( final String combable_dom : getCombinableDomainIds( species ) ) {
347 if ( !m.containsKey( combable_dom ) ) {
348 m.put( combable_dom, new TreeSet<String>() );
350 m.get( combable_dom ).add( species.getSpeciesId() );
353 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
354 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
356 sb.append( "<span style=\"font-size:7px\">" );
357 for( final String tax : e.getValue() ) {
358 final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
359 if ( !ForesterUtil.isEmpty( hex ) ) {
360 sb.append( "<span style=\"color:" );
364 sb.append( "</span>" );
371 sb.append( "</span>" );
372 sb.append( "<br>\n" );
379 private StringBuffer getTaxonomyGroupDistribution( Phylogeny tol ) {
382 final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
383 for( final Species species : getSpeciesData().keySet() ) {
384 for( final String combable_dom : getCombinableDomainIds( species ) ) {
385 if ( !m.containsKey( combable_dom ) ) {
386 m.put( combable_dom, new TreeSet<String>() );
388 m.get( combable_dom ).add( species.getSpeciesId() );
391 Map<String,Integer> countz = new HashMap<String,Integer>();
392 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
393 for( final String tax_code : e.getValue() ) {
394 final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
395 if ( !ForesterUtil.isEmpty( group ) ) {
396 if ( !countz.containsKey( group ) ) {
397 countz.put( group, 1 );
400 countz.put( group, countz.get( group) + 1 );
411 final StringBuffer sb = new StringBuffer();
413 // i am just a template and need to be modified for "printout" TODO
414 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
415 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
417 sb.append( "<span style=\"font-size:8px\">" );
418 for( final String tax : e.getValue() ) {
419 final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
420 if ( !ForesterUtil.isEmpty( hex ) ) {
421 sb.append( "<span style=\"color:" );
425 sb.append( "</span>" );
432 sb.append( "</span>" );
433 sb.append( "<br>\n" );
439 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
440 final Map<String, Integer> tax_code_to_id_map,
441 final Phylogeny phy ) {
442 final StringBuffer sb = new StringBuffer();
443 for( final Species species : getSpeciesData().keySet() ) {
444 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
449 private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
450 final Map<String, Integer> tax_code_to_id_map,
451 final Phylogeny phy ) {
452 final StringBuffer sb = new StringBuffer();
453 for( final Species order_species : getSpeciesCustomOrder() ) {
454 if ( getSpeciesData().keySet().contains( order_species ) ) {
455 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
458 sb.append( PrintableDomainSimilarity.NO_SPECIES );
459 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
465 private void init() {
466 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
469 private boolean isTreatAsBinaryComparison() {
470 return _treat_as_binary_comparison;
473 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, final Phylogeny phy ) {
474 final StringBuffer sb = new StringBuffer();
478 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
479 + getDomainId() + "</a>" );
481 sb.append( "<a name=\"" + getDomainId() + "\">" );
482 sb.append( "</td>" );
484 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
485 + "\" target=\"gs_window\">gs</a>" );
486 sb.append( "</td>" );
487 if ( getMaximalSimilarityScore() > 0 ) {
489 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
490 sb.append( "</td>" );
491 if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
492 if ( !isTreatAsBinaryComparison() ) {
495 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
497 sb.append( "</td>" );
500 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
502 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
504 sb.append( "</td>" );
509 sb.append( getMaximalDifference() );
510 sb.append( "</td>" );
512 if ( isTreatAsBinaryComparison() ) {
513 sb.append( getMaximalDifferenceInCounts() );
516 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
518 sb.append( "</td>" );
519 if ( !isTreatAsBinaryComparison() ) {
522 sb.append( getSpeciesData().size() );
524 sb.append( "</td>" );
526 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
528 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
529 sb.append( getDomainDataInAlphabeticalOrder() );
530 sb.append( "</td>" );
534 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
535 sb.append( getDomainDataInAlphabeticalOrder() );
536 sb.append( "</td>" );
538 sb.append( "</tr>" );
542 private StringBuffer toStringBufferSimpleTabDelimited() {
543 final StringBuffer sb = new StringBuffer();
544 sb.append( getDomainId() );
546 sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
551 public static enum PRINT_OPTION {
552 HTML, SIMPLE_TAB_DELIMITED;