3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.List;
31 import java.util.SortedMap;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
35 import org.forester.go.GoId;
36 import org.forester.go.GoNameSpace;
37 import org.forester.go.GoTerm;
38 import org.forester.go.GoXRef;
39 import org.forester.protein.DomainId;
40 import org.forester.species.Species;
41 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
42 import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
43 import org.forester.util.ForesterUtil;
45 public class PrintableDomainSimilarity implements DomainSimilarity {
47 final public static String SPECIES_SEPARATOR = " ";
48 final private static char TAB = '\t';
49 final private static int BEFORE = -1;
50 final private static int EQUAL = 0;
51 final private static int AFTER = 1;
52 final private static String NO_SPECIES = " ";
53 final private double _min;
54 final private double _max;
55 final private double _mean;
56 final private double _sd;
58 private final int _max_difference_in_counts;
59 private final int _max_difference;
60 private DomainSimilarityCalculator.GoAnnotationOutput _go_annotation_output;
61 final private CombinableDomains _combinable_domains;
62 final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
63 final private DomainSimilaritySortField _sort_field;
64 private List<Species> _species_order;
65 private final boolean _sort_by_species_count_first;
66 private DomainSimilarityCalculator.Detailedness _detailedness;
67 private Map<GoId, GoTerm> _go_id_to_term_map;
68 private GoNameSpace _go_namespace_limit;
69 private final boolean _treat_as_binary_comparison;
72 * If go_id_to_term_map not null, detailed GO information is written,
73 * only GO ids otherwise.
77 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
84 final int max_difference_in_counts,
85 final int max_difference,
86 final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
87 final DomainSimilaritySortField sort_field,
88 final boolean sort_by_species_count_first,
89 final boolean treat_as_binary_comparison ) {
90 if ( combinable_domains == null ) {
91 throw new IllegalArgumentException( "attempt to use null combinable domains" );
93 if ( sort_field == null ) {
94 throw new IllegalArgumentException( "attempt to use null sorting" );
96 if ( species_data == null ) {
97 throw new IllegalArgumentException( "attempt to use null species data" );
99 if ( species_data.size() < 1 ) {
100 throw new IllegalArgumentException( "attempt to use empty species data" );
103 throw new IllegalArgumentException( "attempt to use N less than 0" );
105 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
106 throw new IllegalArgumentException( "attempt to use N less than 1" );
109 throw new IllegalArgumentException( "attempt to use negative SD" );
112 throw new IllegalArgumentException( "attempt to use max smaller than min" );
115 _combinable_domains = combinable_domains;
121 _max_difference_in_counts = max_difference_in_counts;
122 _max_difference = max_difference;
123 _species_data = species_data;
124 _sort_field = sort_field;
125 _sort_by_species_count_first = sort_by_species_count_first;
126 _treat_as_binary_comparison = treat_as_binary_comparison;
127 final int s = species_data.size();
128 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
129 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
130 + " for domain " + combinable_domains.getKeyDomain() );
133 if ( getMaximalDifferenceInCounts() < 0 ) {
134 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
136 if ( getMaximalDifference() < 0 ) {
137 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
142 private void addGoInformation( final StringBuffer sb, final boolean for_table, final boolean html ) {
146 switch ( getGoAnnotationOutput() ) {
148 final int go_ids = getCombinableDomains().getKeyDomain().getNumberOfGoIds();
149 boolean first = true;
150 for( int i = 0; i < go_ids; ++i ) {
151 final GoId go_id = getCombinableDomains().getKeyDomain().getGoId( i );
152 if ( getGoIdToTermMap() != null ) {
153 if ( getGoIdToTermMap().containsKey( go_id ) ) {
154 first = appendGoTerm( sb, getGoIdToTermMap().get( go_id ), first, html );
157 sb.append( "go id \"" + go_id + "\" not found ["
158 + getCombinableDomains().getKeyDomain().getId() + "]" );
166 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id
167 + "\" target=\"amigo_window\">" + go_id + "</a>" );
181 throw new RuntimeException( "unknown " + getGoAnnotationOutput() );
188 private void addSpeciesSpecificDomainData( final StringBuffer sb,
189 final Species species,
191 final Map<String, Integer> tax_code_to_id_map ) {
192 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
197 final String tax_code = species.getSpeciesId();
198 if ( !ForesterUtil.isEmpty( tax_code )
199 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
200 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
201 + tax_code_to_id_map.get( tax_code ) + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
204 sb.append( tax_code );
209 sb.append( species.getSpeciesId() );
211 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
213 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
219 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
222 private boolean appendGoTerm( final StringBuffer sb, final GoTerm go_term, final boolean first, final boolean html ) {
223 if ( ( getGoNamespaceLimit() == null ) || getGoNamespaceLimit().equals( go_term.getGoNameSpace() ) ) {
227 final GoId go_id = go_term.getGoId();
229 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
236 sb.append( go_term.getName() );
238 if ( getGoNamespaceLimit() == null ) {
240 sb.append( go_term.getGoNameSpace().toString() );
242 for( final GoXRef xref : go_term.getGoXRefs() ) {
244 sb.append( xref.toString() );
252 private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
253 if ( getSortField() == sort_field ) {
258 private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
259 if ( getSortField() == sort_field ) {
264 private int compareByDomainId( final DomainSimilarity other ) {
265 return getDomainId().compareTo( other.getDomainId() );
268 private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
269 final int s_this = getSpeciesData().size();
270 final int s_other = domain_similarity.getSpeciesData().size();
271 if ( s_this < s_other ) {
272 return PrintableDomainSimilarity.BEFORE;
274 else if ( s_this > s_other ) {
275 return PrintableDomainSimilarity.AFTER;
278 return PrintableDomainSimilarity.EQUAL;
283 public int compareTo( final DomainSimilarity domain_similarity ) {
284 if ( this == domain_similarity ) {
285 return PrintableDomainSimilarity.EQUAL;
287 else if ( domain_similarity == null ) {
288 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
290 else if ( domain_similarity.getClass() != this.getClass() ) {
291 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
292 + domain_similarity.getClass() );
294 switch ( getSortField() ) {
296 if ( isSortBySpeciesCountFirst() ) {
297 final int i = compareBySpeciesCount( domain_similarity );
298 if ( i != PrintableDomainSimilarity.EQUAL ) {
302 if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
303 return PrintableDomainSimilarity.BEFORE;
305 else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
306 return PrintableDomainSimilarity.AFTER;
309 return compareByDomainId( domain_similarity );
312 if ( isSortBySpeciesCountFirst() ) {
313 final int i = compareBySpeciesCount( domain_similarity );
314 if ( i != PrintableDomainSimilarity.EQUAL ) {
318 if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
319 return PrintableDomainSimilarity.BEFORE;
321 else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
322 return PrintableDomainSimilarity.AFTER;
325 return compareByDomainId( domain_similarity );
328 if ( isSortBySpeciesCountFirst() ) {
329 final int i = compareBySpeciesCount( domain_similarity );
330 if ( i != PrintableDomainSimilarity.EQUAL ) {
334 if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
335 return PrintableDomainSimilarity.BEFORE;
337 else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
338 return PrintableDomainSimilarity.AFTER;
341 return compareByDomainId( domain_similarity );
344 if ( isSortBySpeciesCountFirst() ) {
345 final int i = compareBySpeciesCount( domain_similarity );
346 if ( i != PrintableDomainSimilarity.EQUAL ) {
350 if ( getStandardDeviationOfSimilarityScore() < domain_similarity
351 .getStandardDeviationOfSimilarityScore() ) {
352 return PrintableDomainSimilarity.BEFORE;
354 else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
355 .getStandardDeviationOfSimilarityScore() ) {
356 return PrintableDomainSimilarity.AFTER;
359 return compareByDomainId( domain_similarity );
362 if ( isSortBySpeciesCountFirst() ) {
363 final int i = compareBySpeciesCount( domain_similarity );
364 if ( i != PrintableDomainSimilarity.EQUAL ) {
368 if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
369 return PrintableDomainSimilarity.BEFORE;
371 else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
372 return PrintableDomainSimilarity.AFTER;
375 return compareByDomainId( domain_similarity );
377 case ABS_MAX_COUNTS_DIFFERENCE:
378 if ( isSortBySpeciesCountFirst() ) {
379 final int i = compareBySpeciesCount( domain_similarity );
380 if ( i != PrintableDomainSimilarity.EQUAL ) {
384 if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
385 .getMaximalDifferenceInCounts() ) ) {
386 return PrintableDomainSimilarity.BEFORE;
388 else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
389 .getMaximalDifferenceInCounts() ) ) {
390 return PrintableDomainSimilarity.AFTER;
393 return compareByDomainId( domain_similarity );
395 case MAX_COUNTS_DIFFERENCE:
396 if ( getSpeciesData().size() != 2 ) {
397 throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
399 if ( isSortBySpeciesCountFirst() ) {
400 final int i = compareBySpeciesCount( domain_similarity );
401 if ( i != PrintableDomainSimilarity.EQUAL ) {
405 if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
406 return PrintableDomainSimilarity.BEFORE;
408 else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
409 return PrintableDomainSimilarity.AFTER;
412 return compareByDomainId( domain_similarity );
415 final int i = compareBySpeciesCount( domain_similarity );
416 if ( i != PrintableDomainSimilarity.EQUAL ) {
420 return compareByDomainId( domain_similarity );
423 return compareByDomainId( domain_similarity );
425 throw new AssertionError( "Unknown sort method: " + getSortField() );
429 public SortedSet<DomainId> getCombinableDomainIds( final Species species_of_combinable_domain ) {
430 final SortedSet<DomainId> sorted_ids = new TreeSet<DomainId>();
431 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
432 for( final DomainId id : getSpeciesData().get( species_of_combinable_domain )
433 .getCombinableDomainIdToCountsMap().keySet() ) {
434 sorted_ids.add( id );
440 private CombinableDomains getCombinableDomains() {
441 return _combinable_domains;
444 private DomainSimilarityCalculator.Detailedness getDetaildness() {
445 return _detailedness;
449 public DomainId getDomainId() {
450 return getCombinableDomains().getKeyDomain();
453 private DomainSimilarityCalculator.GoAnnotationOutput getGoAnnotationOutput() {
454 return _go_annotation_output;
457 private Map<GoId, GoTerm> getGoIdToTermMap() {
458 return _go_id_to_term_map;
461 public GoNameSpace getGoNamespaceLimit() {
462 return _go_namespace_limit;
466 public int getMaximalDifference() {
467 return _max_difference;
471 public int getMaximalDifferenceInCounts() {
472 return _max_difference_in_counts;
476 public double getMaximalSimilarityScore() {
481 public double getMeanSimilarityScore() {
486 public double getMinimalSimilarityScore() {
495 private DomainSimilaritySortField getSortField() {
500 public SortedSet<Species> getSpecies() {
501 final SortedSet<Species> species = new TreeSet<Species>();
502 for( final Species s : getSpeciesData().keySet() ) {
508 public List<Species> getSpeciesCustomOrder() {
509 return _species_order;
513 public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData() {
514 return _species_data;
517 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
518 final Map<String, Integer> tax_code_to_id_map ) {
519 final StringBuffer sb = new StringBuffer();
520 for( final Species species : getSpeciesData().keySet() ) {
521 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map );
526 private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
527 final StringBuffer sb = new StringBuffer();
528 for( final Species order_species : getSpeciesCustomOrder() ) {
529 if ( getSpeciesData().keySet().contains( order_species ) ) {
530 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map );
533 sb.append( PrintableDomainSimilarity.NO_SPECIES );
534 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
541 public double getStandardDeviationOfSimilarityScore() {
545 private void init() {
546 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
547 _go_annotation_output = null;
548 _go_id_to_term_map = null;
551 private boolean isSortBySpeciesCountFirst() {
552 return _sort_by_species_count_first;
555 private boolean isTreatAsBinaryComparison() {
556 return _treat_as_binary_comparison;
559 public void setDetailedness( final Detailedness detailedness ) {
560 _detailedness = detailedness;
563 public void setGoAnnotationOutput( final GoAnnotationOutput go_annotation_output ) {
564 _go_annotation_output = go_annotation_output;
567 public void setGoIdToTermMap( final Map<GoId, GoTerm> go_id_to_term_map ) {
568 _go_id_to_term_map = go_id_to_term_map;
571 public void setGoNamespaceLimit( final GoNameSpace go_namespace_limit ) {
572 _go_namespace_limit = go_namespace_limit;
575 public void setSpeciesOrder( final List<Species> species_order ) {
576 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
577 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
579 _species_order = species_order;
583 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
584 final Map<String, Integer> tax_code_to_id_map ) {
585 switch ( print_option ) {
586 case SIMPLE_TAB_DELIMITED:
587 return toStringBufferSimpleTabDelimited();
589 return toStringBufferDetailedHTML( tax_code_to_id_map );
591 throw new AssertionError( "Unknown print option: " + print_option );
595 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map ) {
596 final StringBuffer sb = new StringBuffer();
599 boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
600 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
601 + getDomainId() + "</a>" );
602 boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
603 sb.append( "</td>" );
605 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
606 + "\" target=\"gs_window\">gs</a>" );
607 sb.append( "</td>" );
609 boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
610 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
611 boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
612 sb.append( "</td>" );
613 if ( !isTreatAsBinaryComparison() ) {
616 boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
617 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
618 boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
620 sb.append( "</td>" );
623 boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
624 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
625 boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
627 boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
628 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
629 boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
631 sb.append( "</td>" );
634 boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
635 sb.append( getMaximalDifference() );
636 boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
637 sb.append( "</td>" );
639 if ( isTreatAsBinaryComparison() ) {
640 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
641 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
642 sb.append( getMaximalDifferenceInCounts() );
643 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
644 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
647 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
648 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
649 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
650 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
651 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
653 sb.append( "</td>" );
654 if ( !isTreatAsBinaryComparison() ) {
656 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
659 sb.append( getSpeciesData().size() );
660 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
663 sb.append( "</td>" );
665 if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
667 addGoInformation( sb, true, true );
668 sb.append( "</td>" );
670 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
672 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
673 sb.append( "</td>" );
677 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
678 sb.append( "</td>" );
680 sb.append( "</tr>" );
684 private StringBuffer toStringBufferSimpleTabDelimited() {
685 final StringBuffer sb = new StringBuffer();
686 sb.append( getDomainId() );
687 switch ( getSortField() ) {
690 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
694 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
698 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
702 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
706 sb.append( getMaximalDifference() );
707 case ABS_MAX_COUNTS_DIFFERENCE:
708 case MAX_COUNTS_DIFFERENCE:
710 if ( isTreatAsBinaryComparison() ) {
711 sb.append( getMaximalDifferenceInCounts() );
714 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
719 sb.append( getSpeciesData().size() );
724 throw new AssertionError( "Unknown sort method: " + getSortField() );
726 if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
728 addGoInformation( sb, true, false );
733 public static enum PRINT_OPTION {
734 SIMPLE_TAB_DELIMITED, HTML;