X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FBasicDomainSimilarityCalculator.java;h=b245cbc292b41bfc2d03603321283ef227e7253a;hb=612e51e63eb66025a04439fc380384a945a4a30f;hp=3b7b5873c547a73568893b5c64301601d445a316;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java index 3b7b587..b245cbc 100644 --- a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java +++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java @@ -23,10 +23,11 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; @@ -34,21 +35,26 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator { final DomainSimilarity.DomainSimilaritySortField _sort; + private final boolean _calc_similarity_score; private final boolean _sort_by_species_count_first; private final boolean _treat_as_binary_comparison; public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort, final boolean sort_by_species_count_first, - final boolean treat_as_binary_comparison ) { + final boolean treat_as_binary_comparison, + final boolean calc_similarity_score ) { _sort = sort; _sort_by_species_count_first = sort_by_species_count_first; _treat_as_binary_comparison = treat_as_binary_comparison; + _calc_similarity_score = calc_similarity_score; } @Override @@ -60,11 +66,16 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" ); } final SortedSet similarities = new TreeSet(); - final SortedSet keys = new TreeSet(); + final SortedSet keys = new TreeSet(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() ); } - for( final DomainId key : keys ) { + final DecimalFormat pf = new java.text.DecimalFormat( "000000" ); + int counter = 1; + System.out.println( keys.size() ); + for( final String key : keys ) { + ForesterUtil.updateProgress( counter, pf ); + counter++; final List same_id_cd_list = new ArrayList( cdc_list.size() ); final List species_with_key_id_domain = new ArrayList(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { @@ -85,9 +96,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat continue; } } - // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55 - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // OLD: if ( same_id_cd_list.size() > 1 ) { if ( same_id_cd_list.size() > 0 ) { if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) { final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list ); @@ -99,50 +107,52 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat } } } - // ~~~ NEW: else { throw new RuntimeException( "this should not have happened" ); } - // ~~~ OLD: - // else if ( same_id_cd_list.size() == 1 ) { - // TODO need to go in file - // System.out.println( "only in one species [" + - // species_with_key_id_domain.get( 0 ) + "]: " + key_id ); - //} - //else { - // throw new RuntimeException( "this should not have happened" ); - // } } + System.out.println(); return similarities; } + public boolean isCalcSimilarityScore() { + return _calc_similarity_score; + } + private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List domains_list ) { if ( domains_list.size() == 1 ) { - // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55 - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // ~~~OLD: - //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" ); - // ~~~new: - final SortedMap species_data = new TreeMap(); + final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); - return new PrintableDomainSimilarity( domains_list.get( 0 ), - 1.0, - 1.0, - 1.0, - 1.0, - 0.0, - 0, - 0, - 0, - species_data, - getSort(), - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( !isCalcSimilarityScore() ) { + return new DomainSimilarity( domains_list.get( 0 ), + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + else { + return new DomainSimilarity( domains_list.get( 0 ), + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0, + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + } + DescriptiveStatistics stat = null; + if ( isCalcSimilarityScore() ) { + stat = new BasicDescriptiveStatistics(); } - final DescriptiveStatistics stat = new BasicDescriptiveStatistics(); - final SortedMap species_data = new TreeMap(); + final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); int max_difference_in_counts = 0; @@ -170,59 +180,67 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat if ( Math.abs( difference ) > Math.abs( max_difference ) ) { max_difference = difference; } - stat.addValue( pairwise_similarity.getSimilarityScore() ); + if ( isCalcSimilarityScore() ) { + stat.addValue( pairwise_similarity.getSimilarityScore() ); + } } } - if ( stat.getN() < 1 ) { - throw new AssertionError( "empty descriptive statistics: this should not have happened" ); - } - if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { - throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); + if ( isCalcSimilarityScore() ) { + if ( stat.getN() < 1 ) { + throw new RuntimeException( "empty descriptive statistics: this should not have happened" ); + } + if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { + throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); + } } - if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) { + if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) { max_difference_in_counts = Math.abs( max_difference_in_counts ); if ( !is_domain_combination_based ) { - max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based. + max_difference = Math.abs( max_difference ); } } DomainSimilarity similarity = null; - if ( stat.getN() == 1 ) { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - 0.0, - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - getSort(), - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( !isCalcSimilarityScore() ) { + similarity = new DomainSimilarity( domains_list.get( 0 ), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } else { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - stat.sampleStandardDeviation(), - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - getSort(), - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( stat.getN() == 1 ) { + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + 0.0, + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + else { + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + stat.sampleStandardDeviation(), + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } } return similarity; } - private DomainSimilarity.DomainSimilaritySortField getSort() { - return _sort; - } - private boolean isSortBySpeciesCountFirst() { return _sort_by_species_count_first; } @@ -231,12 +249,13 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat return _treat_as_binary_comparison; } - private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { - final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd.getKeyDomainProteinsCount(), - cd.getKeyDomainCount(), - cd.getNumberOfCombinableDomains(), - cd.getKeyDomainConfidenceDescriptiveStatistics() ); - for( final DomainId domain : cd.getCombinableDomains() ) { + private static SpeciesSpecificDcData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { + final SpeciesSpecificDcData sd = new PrintableSpeciesSpecificDcData( cd.getKeyDomainCount(), + cd.getNumberOfCombinableDomains() ); + for( final String prot : cd.getKeyDomainProteins() ) { + sd.addKeyDomainProtein( prot ); + } + for( final String domain : cd.getCombinableDomains() ) { sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) ); } return sd;