3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.awt.Color;
30 import java.util.Comparator;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.List;
36 import java.util.SortedMap;
37 import java.util.SortedSet;
38 import java.util.TreeMap;
39 import java.util.TreeSet;
41 import org.forester.phylogeny.Phylogeny;
42 import org.forester.species.Species;
43 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
44 import org.forester.util.ForesterUtil;
46 public class PrintableDomainSimilarity implements DomainSimilarity {
48 final public static String SPECIES_SEPARATOR = " ";
49 final private static int EQUAL = 0;
50 final private static String NO_SPECIES = " ";
51 final private CombinableDomains _combinable_domains;
52 private DomainSimilarityCalculator.Detailedness _detailedness;
53 final private double _max;
54 private final int _max_difference;
55 private final int _max_difference_in_counts;
56 final private double _mean;
57 final private double _min;
59 final private double _sd;
60 final private SortedMap<Species, SpeciesSpecificDcData> _species_data;
61 private List<Species> _species_order;
62 private final boolean _treat_as_binary_comparison;
64 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
71 final int max_difference_in_counts,
72 final int max_difference,
73 final SortedMap<Species, SpeciesSpecificDcData> species_data,
74 final boolean sort_by_species_count_first,
75 final boolean treat_as_binary_comparison ) {
76 if ( combinable_domains == null ) {
77 throw new IllegalArgumentException( "attempt to use null combinable domains" );
79 if ( species_data == null ) {
80 throw new IllegalArgumentException( "attempt to use null species data" );
82 if ( species_data.size() < 1 ) {
83 throw new IllegalArgumentException( "attempt to use empty species data" );
86 throw new IllegalArgumentException( "attempt to use N less than 0" );
88 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
89 throw new IllegalArgumentException( "attempt to use N less than 1" );
92 throw new IllegalArgumentException( "attempt to use negative SD" );
95 throw new IllegalArgumentException( "attempt to use max smaller than min" );
98 _combinable_domains = combinable_domains;
104 _max_difference_in_counts = max_difference_in_counts;
105 _max_difference = max_difference;
106 _species_data = species_data;
107 _treat_as_binary_comparison = treat_as_binary_comparison;
108 final int s = species_data.size();
109 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
110 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
111 + " for domain " + combinable_domains.getKeyDomain() );
114 if ( getMaximalDifferenceInCounts() < 0 ) {
115 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
117 if ( getMaximalDifference() < 0 ) {
118 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
123 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
124 final int max_difference_in_counts,
125 final int max_difference,
126 final SortedMap<Species, SpeciesSpecificDcData> species_data,
127 final boolean sort_by_species_count_first,
128 final boolean treat_as_binary_comparison ) {
129 if ( combinable_domains == null ) {
130 throw new IllegalArgumentException( "attempt to use null combinable domains" );
132 if ( species_data == null ) {
133 throw new IllegalArgumentException( "attempt to use null species data" );
135 if ( species_data.size() < 1 ) {
136 throw new IllegalArgumentException( "attempt to use empty species data" );
139 _combinable_domains = combinable_domains;
145 _max_difference_in_counts = max_difference_in_counts;
146 _max_difference = max_difference;
147 _species_data = species_data;
148 _treat_as_binary_comparison = treat_as_binary_comparison;
149 final int s = species_data.size();
151 if ( getMaximalDifferenceInCounts() < 0 ) {
152 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
154 if ( getMaximalDifference() < 0 ) {
155 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
161 public int compareTo( final DomainSimilarity domain_similarity ) {
162 if ( this == domain_similarity ) {
165 else if ( domain_similarity == null ) {
166 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
168 else if ( domain_similarity.getClass() != this.getClass() ) {
169 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
170 + domain_similarity.getClass() );
172 return compareByDomainId( domain_similarity );
176 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
177 final SortedSet<String> sorted_ids = new TreeSet<String>();
178 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
179 for( final String id : getSpeciesData().get( species_of_combinable_domain )
180 .getCombinableDomainIdToCountsMap().keySet() ) {
181 sorted_ids.add( id );
188 public String getDomainId() {
189 return getCombinableDomains().getKeyDomain();
193 public int getMaximalDifference() {
194 return _max_difference;
198 public int getMaximalDifferenceInCounts() {
199 return _max_difference_in_counts;
203 public double getMaximalSimilarityScore() {
208 public double getMeanSimilarityScore() {
213 public double getMinimalSimilarityScore() {
223 public SortedSet<Species> getSpecies() {
224 final SortedSet<Species> species = new TreeSet<Species>();
225 for( final Species s : getSpeciesData().keySet() ) {
231 public List<Species> getSpeciesCustomOrder() {
232 return _species_order;
236 public SortedMap<Species, SpeciesSpecificDcData> getSpeciesData() {
237 return _species_data;
241 public double getStandardDeviationOfSimilarityScore() {
245 public void setDetailedness( final Detailedness detailedness ) {
246 _detailedness = detailedness;
249 public void setSpeciesOrder( final List<Species> species_order ) {
250 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
251 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
253 _species_order = species_order;
257 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
258 final Map<String, Integer> tax_code_to_id_map,
259 final Phylogeny phy ) {
260 switch ( print_option ) {
261 case SIMPLE_TAB_DELIMITED:
262 return toStringBufferSimpleTabDelimited();
264 return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
266 throw new AssertionError( "Unknown print option: " + print_option );
270 private void addSpeciesSpecificDomainData( final StringBuffer sb,
271 final Species species,
273 final Map<String, Integer> tax_code_to_id_map,
274 final Phylogeny phy ) {
276 addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
279 sb.append( species.getSpeciesId() );
281 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
288 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
298 private void addTaxWithLink( final StringBuffer sb,
299 final String tax_code,
300 final Map<String, Integer> tax_code_to_id_map,
301 final Phylogeny phy ) {
303 if ( ( phy != null ) && !phy.isEmpty() ) {
304 hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax_code, phy );
307 if ( !ForesterUtil.isEmpty( tax_code )
308 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
309 if ( !ForesterUtil.isEmpty( hex ) ) {
310 sb.append( "<a href=\"" );
311 sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
312 sb.append( tax_code_to_id_map.get( tax_code ) );
313 sb.append( "\" target=\"tw\"><span style=\"color:" );
316 sb.append( tax_code );
317 sb.append( "</span></a>" );
320 sb.append( "<a href=\"" );
321 sb.append( SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK );
322 sb.append( tax_code_to_id_map.get( tax_code ) );
323 sb.append( "\" target=\"tw\">" );
324 sb.append( tax_code );
329 sb.append( tax_code );
334 private int compareByDomainId( final DomainSimilarity other ) {
335 return getDomainId().compareToIgnoreCase( other.getDomainId() );
338 private CombinableDomains getCombinableDomains() {
339 return _combinable_domains;
342 private DomainSimilarityCalculator.Detailedness getDetaildness() {
343 return _detailedness;
346 private StringBuffer getDomainDataInAlphabeticalOrder() {
347 final SortedMap<String, SortedSet<String>> m = new TreeMap<String, SortedSet<String>>();
348 final StringBuffer sb = new StringBuffer();
349 for( final Species species : getSpeciesData().keySet() ) {
350 for( final String combable_dom : getCombinableDomainIds( species ) ) {
351 if ( !m.containsKey( combable_dom ) ) {
352 m.put( combable_dom, new TreeSet<String>() );
354 m.get( combable_dom ).add( species.getSpeciesId() );
357 for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
358 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
360 sb.append( "<span style=\"font-size:7px\">" );
361 for( final String tax : e.getValue() ) {
362 final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
363 if ( !ForesterUtil.isEmpty( hex ) ) {
364 sb.append( "<span style=\"color:" );
368 sb.append( "</span>" );
375 sb.append( "</span>" );
376 sb.append( "<br>\n" );
381 private StringBuffer getTaxonomyGroupDistribution( Phylogeny tol ) {
383 final SortedMap<String, Set<String>> domain_to_species_set_map = new TreeMap<String, Set<String>>();
384 for( final Species species : getSpeciesData().keySet() ) {
385 for( final String combable_dom : getCombinableDomainIds( species ) ) {
386 if ( !domain_to_species_set_map.containsKey( combable_dom ) ) {
387 domain_to_species_set_map.put( combable_dom, new HashSet<String>() );
389 domain_to_species_set_map.get( combable_dom ).add( species.getSpeciesId() );
392 final StringBuffer sb = new StringBuffer();
393 for( final Map.Entry<String, Set<String>> domain_to_species_set : domain_to_species_set_map.entrySet() ) {
394 final Map<String, Integer> countz = new HashMap<String, Integer>();
395 final ValueComparator bvc = new ValueComparator( countz );
396 final SortedMap<String, Integer> sorted_countz = new TreeMap<String, Integer>( bvc );
397 for( final String tax_code : domain_to_species_set.getValue() ) {
398 final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol );
399 if ( !ForesterUtil.isEmpty( group ) ) {
400 if ( !countz.containsKey( group ) ) {
401 countz.put( group, 1 );
404 countz.put( group, countz.get( group ) + 1 );
411 sorted_countz.putAll( countz );
412 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_to_species_set.getKey() + "\">" + domain_to_species_set.getKey() + "</a>" );
414 sb.append( "<span style=\"font-size:8px\">" );
415 for( final Map.Entry<String, Integer> group_to_counts : sorted_countz.entrySet() ) {
416 final String group = group_to_counts.getKey();
417 final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
419 throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" );
421 final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
423 sb.append( "<span style=\"color:" );
428 sb.append( group_to_counts.getValue() );
429 sb.append( "</span>" );
431 sb.append( "<br>\n" );
433 sb.append( "</span>" );
436 // i am just a template and need to be modified for "printout" TODO
437 // for( final Map.Entry<String, SortedSet<String>> e : m.entrySet() ) {
438 // sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + e.getKey() + "\">" + e.getKey() + "</a>" );
439 // sb.append( ": " );
440 // sb.append( "<span style=\"font-size:8px\">" );
441 // for( final String tax : e.getValue() ) {
442 // final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null );
443 // if ( !ForesterUtil.isEmpty( hex ) ) {
444 // sb.append( "<span style=\"color:" );
446 // sb.append( "\">" );
448 // sb.append( "</span>" );
455 // sb.append( "</span>" );
456 // sb.append( "<br>\n" );
462 public class Testing {
464 public static void main(String[] args) {
466 HashMap<String,Double> map = new HashMap<String,Double>();
467 ValueComparator bvc = new ValueComparator(map);
468 TreeMap<String,Double> sorted_map = new TreeMap<String,Double>(bvc);
475 System.out.println("unsorted map: "+map);
477 sorted_map.putAll(map);
479 System.out.println("results: "+sorted_map);
486 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
487 final Map<String, Integer> tax_code_to_id_map,
488 final Phylogeny phy ) {
489 final StringBuffer sb = new StringBuffer();
490 for( final Species species : getSpeciesData().keySet() ) {
491 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
496 private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
497 final Map<String, Integer> tax_code_to_id_map,
498 final Phylogeny phy ) {
499 final StringBuffer sb = new StringBuffer();
500 for( final Species order_species : getSpeciesCustomOrder() ) {
501 if ( getSpeciesData().keySet().contains( order_species ) ) {
502 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
505 sb.append( PrintableDomainSimilarity.NO_SPECIES );
506 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
512 private void init() {
513 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
516 private boolean isTreatAsBinaryComparison() {
517 return _treat_as_binary_comparison;
520 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, final Phylogeny phy ) {
521 final StringBuffer sb = new StringBuffer();
525 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
526 + getDomainId() + "</a>" );
528 sb.append( "<a name=\"" + getDomainId() + "\">" );
529 sb.append( "</td>" );
531 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
532 + "\" target=\"gs_window\">gs</a>" );
533 sb.append( "</td>" );
534 if ( getMaximalSimilarityScore() > 0 ) {
536 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
537 sb.append( "</td>" );
538 if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
539 if ( !isTreatAsBinaryComparison() ) {
542 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
544 sb.append( "</td>" );
547 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
549 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
551 sb.append( "</td>" );
556 sb.append( getMaximalDifference() );
557 sb.append( "</td>" );
559 if ( isTreatAsBinaryComparison() ) {
560 sb.append( getMaximalDifferenceInCounts() );
563 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
565 sb.append( "</td>" );
566 if ( !isTreatAsBinaryComparison() ) {
569 sb.append( getSpeciesData().size() );
571 sb.append( "</td>" );
573 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
575 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
576 sb.append( getDomainDataInAlphabeticalOrder() );
577 sb.append( getTaxonomyGroupDistribution( phy ) );
578 sb.append( "</td>" );
582 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
583 sb.append( getDomainDataInAlphabeticalOrder() );
584 sb.append( getTaxonomyGroupDistribution( phy ) );
586 sb.append( "</td>" );
588 sb.append( "</tr>" );
592 private StringBuffer toStringBufferSimpleTabDelimited() {
593 final StringBuffer sb = new StringBuffer();
594 sb.append( getDomainId() );
596 sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
601 public static enum PRINT_OPTION {
602 HTML, SIMPLE_TAB_DELIMITED;
605 class ValueComparator implements Comparator<String> {
607 final private Map<String, Integer> _base;
609 public ValueComparator( final Map<String, Integer> base ) {
613 public int compare( final String a, final String b ) {
614 if ( _base.get( a ) >= _base.get( b ) ) {
619 } // returning 0 would merge keys