3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.surfacing;
29 import java.util.List;
31 import java.util.SortedMap;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
35 import org.forester.go.GoId;
36 import org.forester.go.GoNameSpace;
37 import org.forester.go.GoTerm;
38 import org.forester.go.GoXRef;
39 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
40 import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
41 import org.forester.util.ForesterUtil;
43 public class PrintableDomainSimilarity implements DomainSimilarity {
45 final public static String SPECIES_SEPARATOR = " ";
46 final private static char TAB = '\t';
47 final private static int BEFORE = -1;
48 final private static int EQUAL = 0;
49 final private static int AFTER = 1;
50 final private static String NO_SPECIES = " ";
51 final private double _min;
52 final private double _max;
53 final private double _mean;
54 final private double _sd;
56 private final int _max_difference_in_counts;
57 private final int _max_difference;
58 private DomainSimilarityCalculator.GoAnnotationOutput _go_annotation_output;
59 final private CombinableDomains _combinable_domains;
60 final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
61 final private DomainSimilaritySortField _sort_field;
62 private List<Species> _species_order;
63 private final boolean _sort_by_species_count_first;
64 private DomainSimilarityCalculator.Detailedness _detailedness;
65 private Map<GoId, GoTerm> _go_id_to_term_map;
66 private GoNameSpace _go_namespace_limit;
67 private final boolean _treat_as_binary_comparison;
70 * If go_id_to_term_map not null, detailed GO information is written,
71 * only GO ids otherwise.
75 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
82 final int max_difference_in_counts,
83 final int max_difference,
84 final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
85 final DomainSimilaritySortField sort_field,
86 final boolean sort_by_species_count_first,
87 final boolean treat_as_binary_comparison ) {
88 if ( combinable_domains == null ) {
89 throw new IllegalArgumentException( "attempt to use null combinable domains" );
91 if ( sort_field == null ) {
92 throw new IllegalArgumentException( "attempt to use null sorting" );
94 if ( species_data == null ) {
95 throw new IllegalArgumentException( "attempt to use null species data" );
97 if ( species_data.size() < 1 ) {
98 throw new IllegalArgumentException( "attempt to use empty species data" );
101 throw new IllegalArgumentException( "attempt to use N less than 0" );
103 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
104 throw new IllegalArgumentException( "attempt to use N less than 1" );
107 throw new IllegalArgumentException( "attempt to use negative SD" );
110 throw new IllegalArgumentException( "attempt to use max smaller than min" );
113 _combinable_domains = combinable_domains;
119 _max_difference_in_counts = max_difference_in_counts;
120 _max_difference = max_difference;
121 _species_data = species_data;
122 _sort_field = sort_field;
123 _sort_by_species_count_first = sort_by_species_count_first;
124 _treat_as_binary_comparison = treat_as_binary_comparison;
125 final int s = species_data.size();
126 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
127 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
128 + " for domain " + combinable_domains.getKeyDomain() );
131 if ( getMaximalDifferenceInCounts() < 0 ) {
132 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
134 if ( getMaximalDifference() < 0 ) {
135 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
140 private void addGoInformation( final StringBuffer sb, final boolean for_table, final boolean html ) {
144 switch ( getGoAnnotationOutput() ) {
146 final int go_ids = getCombinableDomains().getKeyDomain().getNumberOfGoIds();
147 boolean first = true;
148 for( int i = 0; i < go_ids; ++i ) {
149 final GoId go_id = getCombinableDomains().getKeyDomain().getGoId( i );
150 if ( getGoIdToTermMap() != null ) {
151 if ( getGoIdToTermMap().containsKey( go_id ) ) {
152 first = appendGoTerm( sb, getGoIdToTermMap().get( go_id ), first, html );
155 sb.append( "go id \"" + go_id + "\" not found ["
156 + getCombinableDomains().getKeyDomain().getId() + "]" );
164 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id
165 + "\" target=\"amigo_window\">" + go_id + "</a>" );
179 throw new RuntimeException( "unknown " + getGoAnnotationOutput() );
186 private void addSpeciesSpecificDomainData( final StringBuffer sb, final Species species, final boolean html ) {
187 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
192 if ( ( SurfacingConstants.TAXONOMY_LINK != null ) && ( species.getSpeciesId().length() > 2 )
193 && ( species.getSpeciesId().length() < 6 ) ) {
194 sb.append( "<a href=\"" + SurfacingConstants.TAXONOMY_LINK + species.getSpeciesId()
195 + "\" target=\"taxonomy_window\">" + species.getSpeciesId() + "</a>" );
198 sb.append( species.getSpeciesId() );
203 sb.append( species.getSpeciesId() );
205 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
207 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
213 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
216 private boolean appendGoTerm( final StringBuffer sb, final GoTerm go_term, final boolean first, final boolean html ) {
217 if ( ( getGoNamespaceLimit() == null ) || getGoNamespaceLimit().equals( go_term.getGoNameSpace() ) ) {
221 final GoId go_id = go_term.getGoId();
223 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
230 sb.append( go_term.getName() );
232 if ( getGoNamespaceLimit() == null ) {
234 sb.append( go_term.getGoNameSpace().toString() );
236 for( final GoXRef xref : go_term.getGoXRefs() ) {
238 sb.append( xref.toString() );
246 private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
247 if ( getSortField() == sort_field ) {
252 private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
253 if ( getSortField() == sort_field ) {
258 private int compareByDomainId( final DomainSimilarity other ) {
259 return getDomainId().compareTo( other.getDomainId() );
262 private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
263 final int s_this = getSpeciesData().size();
264 final int s_other = domain_similarity.getSpeciesData().size();
265 if ( s_this < s_other ) {
266 return PrintableDomainSimilarity.BEFORE;
268 else if ( s_this > s_other ) {
269 return PrintableDomainSimilarity.AFTER;
272 return PrintableDomainSimilarity.EQUAL;
277 public int compareTo( final DomainSimilarity domain_similarity ) {
278 if ( this == domain_similarity ) {
279 return PrintableDomainSimilarity.EQUAL;
281 else if ( domain_similarity == null ) {
282 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
284 else if ( domain_similarity.getClass() != this.getClass() ) {
285 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
286 + domain_similarity.getClass() );
288 switch ( getSortField() ) {
290 if ( isSortBySpeciesCountFirst() ) {
291 final int i = compareBySpeciesCount( domain_similarity );
292 if ( i != PrintableDomainSimilarity.EQUAL ) {
296 if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
297 return PrintableDomainSimilarity.BEFORE;
299 else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
300 return PrintableDomainSimilarity.AFTER;
303 return compareByDomainId( domain_similarity );
306 if ( isSortBySpeciesCountFirst() ) {
307 final int i = compareBySpeciesCount( domain_similarity );
308 if ( i != PrintableDomainSimilarity.EQUAL ) {
312 if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
313 return PrintableDomainSimilarity.BEFORE;
315 else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
316 return PrintableDomainSimilarity.AFTER;
319 return compareByDomainId( domain_similarity );
322 if ( isSortBySpeciesCountFirst() ) {
323 final int i = compareBySpeciesCount( domain_similarity );
324 if ( i != PrintableDomainSimilarity.EQUAL ) {
328 if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
329 return PrintableDomainSimilarity.BEFORE;
331 else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
332 return PrintableDomainSimilarity.AFTER;
335 return compareByDomainId( domain_similarity );
338 if ( isSortBySpeciesCountFirst() ) {
339 final int i = compareBySpeciesCount( domain_similarity );
340 if ( i != PrintableDomainSimilarity.EQUAL ) {
344 if ( getStandardDeviationOfSimilarityScore() < domain_similarity
345 .getStandardDeviationOfSimilarityScore() ) {
346 return PrintableDomainSimilarity.BEFORE;
348 else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
349 .getStandardDeviationOfSimilarityScore() ) {
350 return PrintableDomainSimilarity.AFTER;
353 return compareByDomainId( domain_similarity );
356 if ( isSortBySpeciesCountFirst() ) {
357 final int i = compareBySpeciesCount( domain_similarity );
358 if ( i != PrintableDomainSimilarity.EQUAL ) {
362 if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
363 return PrintableDomainSimilarity.BEFORE;
365 else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
366 return PrintableDomainSimilarity.AFTER;
369 return compareByDomainId( domain_similarity );
371 case ABS_MAX_COUNTS_DIFFERENCE:
372 if ( isSortBySpeciesCountFirst() ) {
373 final int i = compareBySpeciesCount( domain_similarity );
374 if ( i != PrintableDomainSimilarity.EQUAL ) {
378 if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
379 .getMaximalDifferenceInCounts() ) ) {
380 return PrintableDomainSimilarity.BEFORE;
382 else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
383 .getMaximalDifferenceInCounts() ) ) {
384 return PrintableDomainSimilarity.AFTER;
387 return compareByDomainId( domain_similarity );
389 case MAX_COUNTS_DIFFERENCE:
390 if ( getSpeciesData().size() != 2 ) {
391 throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
393 if ( isSortBySpeciesCountFirst() ) {
394 final int i = compareBySpeciesCount( domain_similarity );
395 if ( i != PrintableDomainSimilarity.EQUAL ) {
399 if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
400 return PrintableDomainSimilarity.BEFORE;
402 else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
403 return PrintableDomainSimilarity.AFTER;
406 return compareByDomainId( domain_similarity );
409 final int i = compareBySpeciesCount( domain_similarity );
410 if ( i != PrintableDomainSimilarity.EQUAL ) {
414 return compareByDomainId( domain_similarity );
417 return compareByDomainId( domain_similarity );
419 throw new AssertionError( "Unknown sort method: " + getSortField() );
423 public SortedSet<DomainId> getCombinableDomainIds( final Species species_of_combinable_domain ) {
424 final SortedSet<DomainId> sorted_ids = new TreeSet<DomainId>();
425 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
426 for( final DomainId id : getSpeciesData().get( species_of_combinable_domain )
427 .getCombinableDomainIdToCountsMap().keySet() ) {
428 sorted_ids.add( id );
434 private CombinableDomains getCombinableDomains() {
435 return _combinable_domains;
438 private DomainSimilarityCalculator.Detailedness getDetaildness() {
439 return _detailedness;
443 public DomainId getDomainId() {
444 return getCombinableDomains().getKeyDomain();
447 private DomainSimilarityCalculator.GoAnnotationOutput getGoAnnotationOutput() {
448 return _go_annotation_output;
451 private Map<GoId, GoTerm> getGoIdToTermMap() {
452 return _go_id_to_term_map;
455 public GoNameSpace getGoNamespaceLimit() {
456 return _go_namespace_limit;
460 public int getMaximalDifference() {
461 return _max_difference;
465 public int getMaximalDifferenceInCounts() {
466 return _max_difference_in_counts;
470 public double getMaximalSimilarityScore() {
475 public double getMeanSimilarityScore() {
480 public double getMinimalSimilarityScore() {
489 private DomainSimilaritySortField getSortField() {
494 public SortedSet<Species> getSpecies() {
495 final SortedSet<Species> species = new TreeSet<Species>();
496 for( final Species s : getSpeciesData().keySet() ) {
502 public List<Species> getSpeciesCustomOrder() {
503 return _species_order;
507 public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData() {
508 return _species_data;
511 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html ) {
512 final StringBuffer sb = new StringBuffer();
513 for( final Species species : getSpeciesData().keySet() ) {
514 addSpeciesSpecificDomainData( sb, species, html );
519 private StringBuffer getSpeciesDataInCustomOrder( final boolean html ) {
520 final StringBuffer sb = new StringBuffer();
521 for( final Species order_species : getSpeciesCustomOrder() ) {
522 if ( getSpeciesData().keySet().contains( order_species ) ) {
523 addSpeciesSpecificDomainData( sb, order_species, html );
526 sb.append( PrintableDomainSimilarity.NO_SPECIES );
527 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
534 public double getStandardDeviationOfSimilarityScore() {
538 private void init() {
539 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
540 _go_annotation_output = null;
541 _go_id_to_term_map = null;
544 private boolean isSortBySpeciesCountFirst() {
545 return _sort_by_species_count_first;
548 private boolean isTreatAsBinaryComparison() {
549 return _treat_as_binary_comparison;
552 public void setDetailedness( final Detailedness detailedness ) {
553 _detailedness = detailedness;
556 public void setGoAnnotationOutput( final GoAnnotationOutput go_annotation_output ) {
557 _go_annotation_output = go_annotation_output;
560 public void setGoIdToTermMap( final Map<GoId, GoTerm> go_id_to_term_map ) {
561 _go_id_to_term_map = go_id_to_term_map;
564 public void setGoNamespaceLimit( final GoNameSpace go_namespace_limit ) {
565 _go_namespace_limit = go_namespace_limit;
568 public void setSpeciesOrder( final List<Species> species_order ) {
569 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
570 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
572 _species_order = species_order;
576 public String toString() {
577 return toStringBuffer( null ).toString();
581 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option ) {
582 switch ( print_option ) {
583 case SIMPLE_TAB_DELIMITED:
584 return toStringBufferSimpleTabDelimited();
586 return toStringBufferDetailedHTML();
588 throw new AssertionError( "Unknown print option: " + print_option );
592 private StringBuffer toStringBufferDetailedHTML() {
593 final StringBuffer sb = new StringBuffer();
596 boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
597 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\">" + getDomainId()
599 boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
600 sb.append( "</td>" );
602 boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
603 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
604 boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
605 sb.append( "</td>" );
606 if ( !isTreatAsBinaryComparison() ) {
609 boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
610 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
611 boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
613 sb.append( "</td>" );
616 boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
617 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
618 boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
620 boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
621 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
622 boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
624 sb.append( "</td>" );
627 boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
628 sb.append( getMaximalDifference() );
629 boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
630 sb.append( "</td>" );
632 if ( isTreatAsBinaryComparison() ) {
633 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
634 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
635 sb.append( getMaximalDifferenceInCounts() );
636 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
637 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
640 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
641 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
642 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
643 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
644 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
646 sb.append( "</td>" );
647 if ( !isTreatAsBinaryComparison() ) {
649 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
652 sb.append( getSpeciesData().size() );
653 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
656 sb.append( "</td>" );
658 if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
660 addGoInformation( sb, true, true );
661 sb.append( "</td>" );
663 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
665 sb.append( getSpeciesDataInAlphabeticalOrder( true ) );
666 sb.append( "</td>" );
670 sb.append( getSpeciesDataInCustomOrder( true ) );
671 sb.append( "</td>" );
673 sb.append( "</tr>" );
677 private StringBuffer toStringBufferSimpleTabDelimited() {
678 final StringBuffer sb = new StringBuffer();
679 sb.append( getDomainId() );
680 switch ( getSortField() ) {
683 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
687 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
691 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
695 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
699 sb.append( getMaximalDifference() );
700 case ABS_MAX_COUNTS_DIFFERENCE:
701 case MAX_COUNTS_DIFFERENCE:
703 if ( isTreatAsBinaryComparison() ) {
704 sb.append( getMaximalDifferenceInCounts() );
707 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
712 sb.append( getSpeciesData().size() );
717 throw new AssertionError( "Unknown sort method: " + getSortField() );
719 if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
721 addGoInformation( sb, true, false );
726 public static enum PRINT_OPTION {
727 SIMPLE_TAB_DELIMITED, HTML;