3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.List;
31 import java.util.SortedMap;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
35 import org.forester.species.Species;
36 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
37 import org.forester.util.ForesterUtil;
39 public class PrintableDomainSimilarity implements DomainSimilarity {
41 final public static String SPECIES_SEPARATOR = " ";
42 final private static char TAB = '\t';
43 final private static int BEFORE = -1;
44 final private static int EQUAL = 0;
45 final private static int AFTER = 1;
46 final private static String NO_SPECIES = " ";
47 final private double _min;
48 final private double _max;
49 final private double _mean;
50 final private double _sd;
52 private final int _max_difference_in_counts;
53 private final int _max_difference;
54 final private CombinableDomains _combinable_domains;
55 final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
56 final private DomainSimilaritySortField _sort_field;
57 private List<Species> _species_order;
58 private final boolean _sort_by_species_count_first;
59 private DomainSimilarityCalculator.Detailedness _detailedness;
60 private final boolean _treat_as_binary_comparison;
63 * If go_id_to_term_map not null, detailed GO information is written,
64 * only GO ids otherwise.
68 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
75 final int max_difference_in_counts,
76 final int max_difference,
77 final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
78 final DomainSimilaritySortField sort_field,
79 final boolean sort_by_species_count_first,
80 final boolean treat_as_binary_comparison ) {
81 if ( combinable_domains == null ) {
82 throw new IllegalArgumentException( "attempt to use null combinable domains" );
84 if ( sort_field == null ) {
85 throw new IllegalArgumentException( "attempt to use null sorting" );
87 if ( species_data == null ) {
88 throw new IllegalArgumentException( "attempt to use null species data" );
90 if ( species_data.size() < 1 ) {
91 throw new IllegalArgumentException( "attempt to use empty species data" );
94 throw new IllegalArgumentException( "attempt to use N less than 0" );
96 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
97 throw new IllegalArgumentException( "attempt to use N less than 1" );
100 throw new IllegalArgumentException( "attempt to use negative SD" );
103 throw new IllegalArgumentException( "attempt to use max smaller than min" );
106 _combinable_domains = combinable_domains;
112 _max_difference_in_counts = max_difference_in_counts;
113 _max_difference = max_difference;
114 _species_data = species_data;
115 _sort_field = sort_field;
116 _sort_by_species_count_first = sort_by_species_count_first;
117 _treat_as_binary_comparison = treat_as_binary_comparison;
118 final int s = species_data.size();
119 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
120 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
121 + " for domain " + combinable_domains.getKeyDomain() );
124 if ( getMaximalDifferenceInCounts() < 0 ) {
125 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
127 if ( getMaximalDifference() < 0 ) {
128 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
133 private void addSpeciesSpecificDomainData( final StringBuffer sb,
134 final Species species,
136 final Map<String, Integer> tax_code_to_id_map ) {
137 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
142 final String tax_code = species.getSpeciesId();
143 if ( !ForesterUtil.isEmpty( tax_code )
144 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
145 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
146 + tax_code_to_id_map.get( tax_code ) + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
149 sb.append( tax_code );
154 sb.append( species.getSpeciesId() );
156 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
158 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
164 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
167 private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
168 if ( getSortField() == sort_field ) {
173 private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
174 if ( getSortField() == sort_field ) {
179 private int compareByDomainId( final DomainSimilarity other ) {
180 return getDomainId().compareTo( other.getDomainId() );
183 private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
184 final int s_this = getSpeciesData().size();
185 final int s_other = domain_similarity.getSpeciesData().size();
186 if ( s_this < s_other ) {
187 return PrintableDomainSimilarity.BEFORE;
189 else if ( s_this > s_other ) {
190 return PrintableDomainSimilarity.AFTER;
193 return PrintableDomainSimilarity.EQUAL;
198 public int compareTo( final DomainSimilarity domain_similarity ) {
199 if ( this == domain_similarity ) {
200 return PrintableDomainSimilarity.EQUAL;
202 else if ( domain_similarity == null ) {
203 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
205 else if ( domain_similarity.getClass() != this.getClass() ) {
206 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
207 + domain_similarity.getClass() );
209 switch ( getSortField() ) {
211 if ( isSortBySpeciesCountFirst() ) {
212 final int i = compareBySpeciesCount( domain_similarity );
213 if ( i != PrintableDomainSimilarity.EQUAL ) {
217 if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
218 return PrintableDomainSimilarity.BEFORE;
220 else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
221 return PrintableDomainSimilarity.AFTER;
224 return compareByDomainId( domain_similarity );
227 if ( isSortBySpeciesCountFirst() ) {
228 final int i = compareBySpeciesCount( domain_similarity );
229 if ( i != PrintableDomainSimilarity.EQUAL ) {
233 if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
234 return PrintableDomainSimilarity.BEFORE;
236 else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
237 return PrintableDomainSimilarity.AFTER;
240 return compareByDomainId( domain_similarity );
243 if ( isSortBySpeciesCountFirst() ) {
244 final int i = compareBySpeciesCount( domain_similarity );
245 if ( i != PrintableDomainSimilarity.EQUAL ) {
249 if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
250 return PrintableDomainSimilarity.BEFORE;
252 else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
253 return PrintableDomainSimilarity.AFTER;
256 return compareByDomainId( domain_similarity );
259 if ( isSortBySpeciesCountFirst() ) {
260 final int i = compareBySpeciesCount( domain_similarity );
261 if ( i != PrintableDomainSimilarity.EQUAL ) {
265 if ( getStandardDeviationOfSimilarityScore() < domain_similarity
266 .getStandardDeviationOfSimilarityScore() ) {
267 return PrintableDomainSimilarity.BEFORE;
269 else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
270 .getStandardDeviationOfSimilarityScore() ) {
271 return PrintableDomainSimilarity.AFTER;
274 return compareByDomainId( domain_similarity );
277 if ( isSortBySpeciesCountFirst() ) {
278 final int i = compareBySpeciesCount( domain_similarity );
279 if ( i != PrintableDomainSimilarity.EQUAL ) {
283 if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
284 return PrintableDomainSimilarity.BEFORE;
286 else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
287 return PrintableDomainSimilarity.AFTER;
290 return compareByDomainId( domain_similarity );
292 case ABS_MAX_COUNTS_DIFFERENCE:
293 if ( isSortBySpeciesCountFirst() ) {
294 final int i = compareBySpeciesCount( domain_similarity );
295 if ( i != PrintableDomainSimilarity.EQUAL ) {
299 if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
300 .getMaximalDifferenceInCounts() ) ) {
301 return PrintableDomainSimilarity.BEFORE;
303 else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
304 .getMaximalDifferenceInCounts() ) ) {
305 return PrintableDomainSimilarity.AFTER;
308 return compareByDomainId( domain_similarity );
310 case MAX_COUNTS_DIFFERENCE:
311 if ( getSpeciesData().size() != 2 ) {
312 throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
314 if ( isSortBySpeciesCountFirst() ) {
315 final int i = compareBySpeciesCount( domain_similarity );
316 if ( i != PrintableDomainSimilarity.EQUAL ) {
320 if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
321 return PrintableDomainSimilarity.BEFORE;
323 else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
324 return PrintableDomainSimilarity.AFTER;
327 return compareByDomainId( domain_similarity );
330 final int i = compareBySpeciesCount( domain_similarity );
331 if ( i != PrintableDomainSimilarity.EQUAL ) {
335 return compareByDomainId( domain_similarity );
338 return compareByDomainId( domain_similarity );
340 throw new AssertionError( "Unknown sort method: " + getSortField() );
344 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
345 final SortedSet<String> sorted_ids = new TreeSet<String>();
346 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
347 for( final String id : getSpeciesData().get( species_of_combinable_domain )
348 .getCombinableDomainIdToCountsMap().keySet() ) {
349 sorted_ids.add( id );
355 private CombinableDomains getCombinableDomains() {
356 return _combinable_domains;
359 private DomainSimilarityCalculator.Detailedness getDetaildness() {
360 return _detailedness;
364 public String getDomainId() {
365 return getCombinableDomains().getKeyDomain();
369 public int getMaximalDifference() {
370 return _max_difference;
374 public int getMaximalDifferenceInCounts() {
375 return _max_difference_in_counts;
379 public double getMaximalSimilarityScore() {
384 public double getMeanSimilarityScore() {
389 public double getMinimalSimilarityScore() {
398 private DomainSimilaritySortField getSortField() {
403 public SortedSet<Species> getSpecies() {
404 final SortedSet<Species> species = new TreeSet<Species>();
405 for( final Species s : getSpeciesData().keySet() ) {
411 public List<Species> getSpeciesCustomOrder() {
412 return _species_order;
416 public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData() {
417 return _species_data;
420 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
421 final Map<String, Integer> tax_code_to_id_map ) {
422 final StringBuffer sb = new StringBuffer();
423 for( final Species species : getSpeciesData().keySet() ) {
424 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map );
429 private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
430 final StringBuffer sb = new StringBuffer();
431 for( final Species order_species : getSpeciesCustomOrder() ) {
432 if ( getSpeciesData().keySet().contains( order_species ) ) {
433 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map );
436 sb.append( PrintableDomainSimilarity.NO_SPECIES );
437 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
444 public double getStandardDeviationOfSimilarityScore() {
448 private void init() {
449 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
452 private boolean isSortBySpeciesCountFirst() {
453 return _sort_by_species_count_first;
456 private boolean isTreatAsBinaryComparison() {
457 return _treat_as_binary_comparison;
460 public void setDetailedness( final Detailedness detailedness ) {
461 _detailedness = detailedness;
464 public void setSpeciesOrder( final List<Species> species_order ) {
465 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
466 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
468 _species_order = species_order;
472 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
473 final Map<String, Integer> tax_code_to_id_map ) {
474 switch ( print_option ) {
475 case SIMPLE_TAB_DELIMITED:
476 return toStringBufferSimpleTabDelimited();
478 return toStringBufferDetailedHTML( tax_code_to_id_map );
480 throw new AssertionError( "Unknown print option: " + print_option );
484 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map ) {
485 final StringBuffer sb = new StringBuffer();
488 boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
489 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
490 + getDomainId() + "</a>" );
491 boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
492 sb.append( "<a name=\"" + getDomainId() + "\">" );
493 sb.append( "</td>" );
495 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
496 + "\" target=\"gs_window\">gs</a>" );
497 sb.append( "</td>" );
499 boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
500 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
501 boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
502 sb.append( "</td>" );
503 if ( !isTreatAsBinaryComparison() ) {
506 boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
507 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
508 boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
510 sb.append( "</td>" );
513 boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
514 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
515 boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
517 boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
518 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
519 boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
521 sb.append( "</td>" );
524 boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
525 sb.append( getMaximalDifference() );
526 boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
527 sb.append( "</td>" );
529 if ( isTreatAsBinaryComparison() ) {
530 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
531 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
532 sb.append( getMaximalDifferenceInCounts() );
533 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
534 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
537 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
538 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
539 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
540 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
541 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
543 sb.append( "</td>" );
544 if ( !isTreatAsBinaryComparison() ) {
546 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
549 sb.append( getSpeciesData().size() );
550 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
553 sb.append( "</td>" );
555 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
557 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
558 sb.append( "</td>" );
562 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
563 sb.append( "</td>" );
565 sb.append( "</tr>" );
569 private StringBuffer toStringBufferSimpleTabDelimited() {
570 final StringBuffer sb = new StringBuffer();
571 sb.append( getDomainId() );
572 switch ( getSortField() ) {
575 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
579 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
583 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
587 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
591 sb.append( getMaximalDifference() );
592 case ABS_MAX_COUNTS_DIFFERENCE:
593 case MAX_COUNTS_DIFFERENCE:
595 if ( isTreatAsBinaryComparison() ) {
596 sb.append( getMaximalDifferenceInCounts() );
599 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
604 sb.append( getSpeciesData().size() );
609 throw new AssertionError( "Unknown sort method: " + getSortField() );
611 // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
612 // ^^ sb.append( TAB );
613 // ^^ addGoInformation( sb, true, false );
618 public static enum PRINT_OPTION {
619 SIMPLE_TAB_DELIMITED, HTML;