3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
27 package org.forester.surfacing;
29 import java.util.List;
31 import java.util.SortedMap;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
35 import org.forester.go.GoId;
36 import org.forester.go.GoNameSpace;
37 import org.forester.go.GoTerm;
38 import org.forester.go.GoXRef;
39 import org.forester.species.Species;
40 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
41 import org.forester.surfacing.DomainSimilarityCalculator.GoAnnotationOutput;
42 import org.forester.util.ForesterUtil;
44 public class PrintableDomainSimilarity implements DomainSimilarity {
46 final public static String SPECIES_SEPARATOR = " ";
47 final private static char TAB = '\t';
48 final private static int BEFORE = -1;
49 final private static int EQUAL = 0;
50 final private static int AFTER = 1;
51 final private static String NO_SPECIES = " ";
52 final private double _min;
53 final private double _max;
54 final private double _mean;
55 final private double _sd;
57 private final int _max_difference_in_counts;
58 private final int _max_difference;
59 private DomainSimilarityCalculator.GoAnnotationOutput _go_annotation_output;
60 final private CombinableDomains _combinable_domains;
61 final private SortedMap<Species, SpeciesSpecificDomainSimilariyData> _species_data;
62 final private DomainSimilaritySortField _sort_field;
63 private List<Species> _species_order;
64 private final boolean _sort_by_species_count_first;
65 private DomainSimilarityCalculator.Detailedness _detailedness;
66 private Map<GoId, GoTerm> _go_id_to_term_map;
67 private GoNameSpace _go_namespace_limit;
68 private final boolean _treat_as_binary_comparison;
71 * If go_id_to_term_map not null, detailed GO information is written,
72 * only GO ids otherwise.
76 public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
83 final int max_difference_in_counts,
84 final int max_difference,
85 final SortedMap<Species, SpeciesSpecificDomainSimilariyData> species_data,
86 final DomainSimilaritySortField sort_field,
87 final boolean sort_by_species_count_first,
88 final boolean treat_as_binary_comparison ) {
89 if ( combinable_domains == null ) {
90 throw new IllegalArgumentException( "attempt to use null combinable domains" );
92 if ( sort_field == null ) {
93 throw new IllegalArgumentException( "attempt to use null sorting" );
95 if ( species_data == null ) {
96 throw new IllegalArgumentException( "attempt to use null species data" );
98 if ( species_data.size() < 1 ) {
99 throw new IllegalArgumentException( "attempt to use empty species data" );
102 throw new IllegalArgumentException( "attempt to use N less than 0" );
104 if ( ( species_data.size() > 1 ) && ( n < 1 ) ) {
105 throw new IllegalArgumentException( "attempt to use N less than 1" );
108 throw new IllegalArgumentException( "attempt to use negative SD" );
111 throw new IllegalArgumentException( "attempt to use max smaller than min" );
114 _combinable_domains = combinable_domains;
120 _max_difference_in_counts = max_difference_in_counts;
121 _max_difference = max_difference;
122 _species_data = species_data;
123 _sort_field = sort_field;
124 _sort_by_species_count_first = sort_by_species_count_first;
125 _treat_as_binary_comparison = treat_as_binary_comparison;
126 final int s = species_data.size();
127 if ( ( ( s * s ) - s ) != ( getN() * 2 ) ) {
128 throw new IllegalArgumentException( "illegal species count and n: species count:" + s + ", n:" + _n
129 + " for domain " + combinable_domains.getKeyDomain() );
132 if ( getMaximalDifferenceInCounts() < 0 ) {
133 throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
135 if ( getMaximalDifference() < 0 ) {
136 throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
141 // private void addGoInformation( final StringBuffer sb, final boolean for_table, final boolean html ) {
142 // if ( !for_table ) {
145 // switch ( getGoAnnotationOutput() ) {
147 // final int go_ids = getCombinableDomains().getKeyDomain().getNumberOfGoIds();
148 // boolean first = true;
149 // for( int i = 0; i < go_ids; ++i ) {
150 // final GoId go_id = getCombinableDomains().getKeyDomain().getGoId( i );
151 // if ( getGoIdToTermMap() != null ) {
152 // if ( getGoIdToTermMap().containsKey( go_id ) ) {
153 // first = appendGoTerm( sb, getGoIdToTermMap().get( go_id ), first, html );
156 // sb.append( "go id \"" + go_id + "\" not found ["
157 // + getCombinableDomains().getKeyDomain().getId() + "]" );
162 // sb.append( ", " );
165 // sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id
166 // + "\" target=\"amigo_window\">" + go_id + "</a>" );
169 // sb.append( go_id );
180 // throw new RuntimeException( "unknown " + getGoAnnotationOutput() );
182 // if ( !for_table ) {
183 // sb.append( ">: " );
186 private void addSpeciesSpecificDomainData( final StringBuffer sb,
187 final Species species,
189 final Map<String, Integer> tax_code_to_id_map ) {
190 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
195 final String tax_code = species.getSpeciesId();
196 if ( !ForesterUtil.isEmpty( tax_code )
197 && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
198 sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK
199 + tax_code_to_id_map.get( tax_code ) + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
202 sb.append( tax_code );
207 sb.append( species.getSpeciesId() );
209 if ( getDetaildness() != DomainSimilarityCalculator.Detailedness.BASIC ) {
211 sb.append( getSpeciesData().get( species ).toStringBuffer( getDetaildness(), html ) );
217 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
220 private boolean appendGoTerm( final StringBuffer sb, final GoTerm go_term, final boolean first, final boolean html ) {
221 if ( ( getGoNamespaceLimit() == null ) || getGoNamespaceLimit().equals( go_term.getGoNameSpace() ) ) {
225 final GoId go_id = go_term.getGoId();
227 sb.append( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id + "\" target=\"amigo_window\">" + go_id
234 sb.append( go_term.getName() );
236 if ( getGoNamespaceLimit() == null ) {
238 sb.append( go_term.getGoNameSpace().toString() );
240 for( final GoXRef xref : go_term.getGoXRefs() ) {
242 sb.append( xref.toString() );
250 private void boldEndIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
251 if ( getSortField() == sort_field ) {
256 private void boldStartIfSortedBy( final DomainSimilaritySortField sort_field, final StringBuffer sb ) {
257 if ( getSortField() == sort_field ) {
262 private int compareByDomainId( final DomainSimilarity other ) {
263 return getDomainId().compareTo( other.getDomainId() );
266 private int compareBySpeciesCount( final DomainSimilarity domain_similarity ) {
267 final int s_this = getSpeciesData().size();
268 final int s_other = domain_similarity.getSpeciesData().size();
269 if ( s_this < s_other ) {
270 return PrintableDomainSimilarity.BEFORE;
272 else if ( s_this > s_other ) {
273 return PrintableDomainSimilarity.AFTER;
276 return PrintableDomainSimilarity.EQUAL;
281 public int compareTo( final DomainSimilarity domain_similarity ) {
282 if ( this == domain_similarity ) {
283 return PrintableDomainSimilarity.EQUAL;
285 else if ( domain_similarity == null ) {
286 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to null" );
288 else if ( domain_similarity.getClass() != this.getClass() ) {
289 throw new IllegalArgumentException( "attempt to compare " + this.getClass() + " to "
290 + domain_similarity.getClass() );
292 switch ( getSortField() ) {
294 if ( isSortBySpeciesCountFirst() ) {
295 final int i = compareBySpeciesCount( domain_similarity );
296 if ( i != PrintableDomainSimilarity.EQUAL ) {
300 if ( getMinimalSimilarityScore() < domain_similarity.getMinimalSimilarityScore() ) {
301 return PrintableDomainSimilarity.BEFORE;
303 else if ( getMinimalSimilarityScore() > domain_similarity.getMinimalSimilarityScore() ) {
304 return PrintableDomainSimilarity.AFTER;
307 return compareByDomainId( domain_similarity );
310 if ( isSortBySpeciesCountFirst() ) {
311 final int i = compareBySpeciesCount( domain_similarity );
312 if ( i != PrintableDomainSimilarity.EQUAL ) {
316 if ( getMaximalSimilarityScore() < domain_similarity.getMaximalSimilarityScore() ) {
317 return PrintableDomainSimilarity.BEFORE;
319 else if ( getMaximalSimilarityScore() > domain_similarity.getMaximalSimilarityScore() ) {
320 return PrintableDomainSimilarity.AFTER;
323 return compareByDomainId( domain_similarity );
326 if ( isSortBySpeciesCountFirst() ) {
327 final int i = compareBySpeciesCount( domain_similarity );
328 if ( i != PrintableDomainSimilarity.EQUAL ) {
332 if ( getMeanSimilarityScore() < domain_similarity.getMeanSimilarityScore() ) {
333 return PrintableDomainSimilarity.BEFORE;
335 else if ( getMeanSimilarityScore() > domain_similarity.getMeanSimilarityScore() ) {
336 return PrintableDomainSimilarity.AFTER;
339 return compareByDomainId( domain_similarity );
342 if ( isSortBySpeciesCountFirst() ) {
343 final int i = compareBySpeciesCount( domain_similarity );
344 if ( i != PrintableDomainSimilarity.EQUAL ) {
348 if ( getStandardDeviationOfSimilarityScore() < domain_similarity
349 .getStandardDeviationOfSimilarityScore() ) {
350 return PrintableDomainSimilarity.BEFORE;
352 else if ( getStandardDeviationOfSimilarityScore() > domain_similarity
353 .getStandardDeviationOfSimilarityScore() ) {
354 return PrintableDomainSimilarity.AFTER;
357 return compareByDomainId( domain_similarity );
360 if ( isSortBySpeciesCountFirst() ) {
361 final int i = compareBySpeciesCount( domain_similarity );
362 if ( i != PrintableDomainSimilarity.EQUAL ) {
366 if ( getMaximalDifference() > domain_similarity.getMaximalDifference() ) {
367 return PrintableDomainSimilarity.BEFORE;
369 else if ( getMaximalDifference() < domain_similarity.getMaximalDifference() ) {
370 return PrintableDomainSimilarity.AFTER;
373 return compareByDomainId( domain_similarity );
375 case ABS_MAX_COUNTS_DIFFERENCE:
376 if ( isSortBySpeciesCountFirst() ) {
377 final int i = compareBySpeciesCount( domain_similarity );
378 if ( i != PrintableDomainSimilarity.EQUAL ) {
382 if ( Math.abs( getMaximalDifferenceInCounts() ) > Math.abs( domain_similarity
383 .getMaximalDifferenceInCounts() ) ) {
384 return PrintableDomainSimilarity.BEFORE;
386 else if ( Math.abs( getMaximalDifferenceInCounts() ) < Math.abs( domain_similarity
387 .getMaximalDifferenceInCounts() ) ) {
388 return PrintableDomainSimilarity.AFTER;
391 return compareByDomainId( domain_similarity );
393 case MAX_COUNTS_DIFFERENCE:
394 if ( getSpeciesData().size() != 2 ) {
395 throw new RuntimeException( "attempt to sort by maximal difference with species not equal to two" );
397 if ( isSortBySpeciesCountFirst() ) {
398 final int i = compareBySpeciesCount( domain_similarity );
399 if ( i != PrintableDomainSimilarity.EQUAL ) {
403 if ( getMaximalDifferenceInCounts() > domain_similarity.getMaximalDifferenceInCounts() ) {
404 return PrintableDomainSimilarity.BEFORE;
406 else if ( getMaximalDifferenceInCounts() < domain_similarity.getMaximalDifferenceInCounts() ) {
407 return PrintableDomainSimilarity.AFTER;
410 return compareByDomainId( domain_similarity );
413 final int i = compareBySpeciesCount( domain_similarity );
414 if ( i != PrintableDomainSimilarity.EQUAL ) {
418 return compareByDomainId( domain_similarity );
421 return compareByDomainId( domain_similarity );
423 throw new AssertionError( "Unknown sort method: " + getSortField() );
427 public SortedSet<String> getCombinableDomainIds( final Species species_of_combinable_domain ) {
428 final SortedSet<String> sorted_ids = new TreeSet<String>();
429 if ( getSpeciesData().containsKey( species_of_combinable_domain ) ) {
430 for( final String id : getSpeciesData().get( species_of_combinable_domain )
431 .getCombinableDomainIdToCountsMap().keySet() ) {
432 sorted_ids.add( id );
438 private CombinableDomains getCombinableDomains() {
439 return _combinable_domains;
442 private DomainSimilarityCalculator.Detailedness getDetaildness() {
443 return _detailedness;
447 public String getDomainId() {
448 return getCombinableDomains().getKeyDomain();
451 private DomainSimilarityCalculator.GoAnnotationOutput getGoAnnotationOutput() {
452 return _go_annotation_output;
455 private Map<GoId, GoTerm> getGoIdToTermMap() {
456 return _go_id_to_term_map;
459 public GoNameSpace getGoNamespaceLimit() {
460 return _go_namespace_limit;
464 public int getMaximalDifference() {
465 return _max_difference;
469 public int getMaximalDifferenceInCounts() {
470 return _max_difference_in_counts;
474 public double getMaximalSimilarityScore() {
479 public double getMeanSimilarityScore() {
484 public double getMinimalSimilarityScore() {
493 private DomainSimilaritySortField getSortField() {
498 public SortedSet<Species> getSpecies() {
499 final SortedSet<Species> species = new TreeSet<Species>();
500 for( final Species s : getSpeciesData().keySet() ) {
506 public List<Species> getSpeciesCustomOrder() {
507 return _species_order;
511 public SortedMap<Species, SpeciesSpecificDomainSimilariyData> getSpeciesData() {
512 return _species_data;
515 private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
516 final Map<String, Integer> tax_code_to_id_map ) {
517 final StringBuffer sb = new StringBuffer();
518 for( final Species species : getSpeciesData().keySet() ) {
519 addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map );
524 private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
525 final StringBuffer sb = new StringBuffer();
526 for( final Species order_species : getSpeciesCustomOrder() ) {
527 if ( getSpeciesData().keySet().contains( order_species ) ) {
528 addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map );
531 sb.append( PrintableDomainSimilarity.NO_SPECIES );
532 sb.append( PrintableDomainSimilarity.SPECIES_SEPARATOR );
539 public double getStandardDeviationOfSimilarityScore() {
543 private void init() {
544 _detailedness = DomainSimilarityCalculator.Detailedness.PUNCTILIOUS;
545 _go_annotation_output = null;
546 _go_id_to_term_map = null;
549 private boolean isSortBySpeciesCountFirst() {
550 return _sort_by_species_count_first;
553 private boolean isTreatAsBinaryComparison() {
554 return _treat_as_binary_comparison;
557 public void setDetailedness( final Detailedness detailedness ) {
558 _detailedness = detailedness;
561 public void setGoAnnotationOutput( final GoAnnotationOutput go_annotation_output ) {
562 _go_annotation_output = go_annotation_output;
565 public void setGoIdToTermMap( final Map<GoId, GoTerm> go_id_to_term_map ) {
566 _go_id_to_term_map = go_id_to_term_map;
569 public void setGoNamespaceLimit( final GoNameSpace go_namespace_limit ) {
570 _go_namespace_limit = go_namespace_limit;
573 public void setSpeciesOrder( final List<Species> species_order ) {
574 if ( !species_order.containsAll( getSpeciesData().keySet() ) ) {
575 throw new IllegalArgumentException( "list to order species must contain all species of multiple combinable domains similarity" );
577 _species_order = species_order;
581 public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
582 final Map<String, Integer> tax_code_to_id_map ) {
583 switch ( print_option ) {
584 case SIMPLE_TAB_DELIMITED:
585 return toStringBufferSimpleTabDelimited();
587 return toStringBufferDetailedHTML( tax_code_to_id_map );
589 throw new AssertionError( "Unknown print option: " + print_option );
593 private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map ) {
594 final StringBuffer sb = new StringBuffer();
597 boldStartIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
598 sb.append( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + getDomainId() + "\" target=\"pfam_window\">"
599 + getDomainId() + "</a>" );
600 boldEndIfSortedBy( DomainSimilaritySortField.DOMAIN_ID, sb );
601 sb.append( "</td>" );
603 sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
604 + "\" target=\"gs_window\">gs</a>" );
605 sb.append( "</td>" );
607 boldStartIfSortedBy( DomainSimilaritySortField.MEAN, sb );
608 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
609 boldEndIfSortedBy( DomainSimilaritySortField.MEAN, sb );
610 sb.append( "</td>" );
611 if ( !isTreatAsBinaryComparison() ) {
614 boldStartIfSortedBy( DomainSimilaritySortField.SD, sb );
615 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
616 boldEndIfSortedBy( DomainSimilaritySortField.SD, sb );
618 sb.append( "</td>" );
621 boldStartIfSortedBy( DomainSimilaritySortField.MIN, sb );
622 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
623 boldEndIfSortedBy( DomainSimilaritySortField.MIN, sb );
625 boldStartIfSortedBy( DomainSimilaritySortField.MAX, sb );
626 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
627 boldEndIfSortedBy( DomainSimilaritySortField.MAX, sb );
629 sb.append( "</td>" );
632 boldStartIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
633 sb.append( getMaximalDifference() );
634 boldEndIfSortedBy( DomainSimilaritySortField.MAX_DIFFERENCE, sb );
635 sb.append( "</td>" );
637 if ( isTreatAsBinaryComparison() ) {
638 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
639 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
640 sb.append( getMaximalDifferenceInCounts() );
641 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
642 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
645 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
646 boldStartIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
647 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
648 boldEndIfSortedBy( DomainSimilaritySortField.ABS_MAX_COUNTS_DIFFERENCE, sb );
649 boldStartIfSortedBy( DomainSimilaritySortField.MAX_COUNTS_DIFFERENCE, sb );
651 sb.append( "</td>" );
652 if ( !isTreatAsBinaryComparison() ) {
654 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
657 sb.append( getSpeciesData().size() );
658 if ( ( getSortField() == DomainSimilaritySortField.SPECIES_COUNT ) || isSortBySpeciesCountFirst() ) {
661 sb.append( "</td>" );
663 // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
664 // ^^ sb.append( "<td>" );
665 // ^^ addGoInformation( sb, true, true );
666 // ^^ sb.append( "</td>" );
668 if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
670 sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
671 sb.append( "</td>" );
675 sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
676 sb.append( "</td>" );
678 sb.append( "</tr>" );
682 private StringBuffer toStringBufferSimpleTabDelimited() {
683 final StringBuffer sb = new StringBuffer();
684 sb.append( getDomainId() );
685 switch ( getSortField() ) {
688 sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
692 sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
696 sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
700 sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
704 sb.append( getMaximalDifference() );
705 case ABS_MAX_COUNTS_DIFFERENCE:
706 case MAX_COUNTS_DIFFERENCE:
708 if ( isTreatAsBinaryComparison() ) {
709 sb.append( getMaximalDifferenceInCounts() );
712 sb.append( Math.abs( getMaximalDifferenceInCounts() ) );
717 sb.append( getSpeciesData().size() );
722 throw new AssertionError( "Unknown sort method: " + getSortField() );
724 // ^^ if ( getGoAnnotationOutput() != DomainSimilarityCalculator.GoAnnotationOutput.NONE ) {
725 // ^^ sb.append( TAB );
726 // ^^ addGoInformation( sb, true, false );
731 public static enum PRINT_OPTION {
732 SIMPLE_TAB_DELIMITED, HTML;