X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FBasicDomainSimilarityCalculator.java;h=b245cbc292b41bfc2d03603321283ef227e7253a;hb=612e51e63eb66025a04439fc380384a945a4a30f;hp=042b785df6a84b5557bd3203b1ba14edc12860f5;hpb=08a92c65e8b969a3ce2a4b511055cd6029357831;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java index 042b785..b245cbc 100644 --- a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java +++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java @@ -27,6 +27,7 @@ package org.forester.surfacing; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; @@ -37,19 +38,23 @@ import java.util.TreeSet; import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator { final DomainSimilarity.DomainSimilaritySortField _sort; + private final boolean _calc_similarity_score; private final boolean _sort_by_species_count_first; private final boolean _treat_as_binary_comparison; public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort, final boolean sort_by_species_count_first, - final boolean treat_as_binary_comparison ) { + final boolean treat_as_binary_comparison, + final boolean calc_similarity_score ) { _sort = sort; _sort_by_species_count_first = sort_by_species_count_first; _treat_as_binary_comparison = treat_as_binary_comparison; + _calc_similarity_score = calc_similarity_score; } @Override @@ -65,7 +70,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat for( final GenomeWideCombinableDomains cdc : cdc_list ) { keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() ); } + final DecimalFormat pf = new java.text.DecimalFormat( "000000" ); + int counter = 1; + System.out.println( keys.size() ); for( final String key : keys ) { + ForesterUtil.updateProgress( counter, pf ); + counter++; final List same_id_cd_list = new ArrayList( cdc_list.size() ); final List species_with_key_id_domain = new ArrayList(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { @@ -86,9 +96,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat continue; } } - // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55 - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // OLD: if ( same_id_cd_list.size() > 1 ) { if ( same_id_cd_list.size() > 0 ) { if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) { final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list ); @@ -100,48 +107,51 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat } } } - // ~~~ NEW: else { throw new RuntimeException( "this should not have happened" ); } - // ~~~ OLD: - // else if ( same_id_cd_list.size() == 1 ) { - // TODO need to go in file - // System.out.println( "only in one species [" + - // species_with_key_id_domain.get( 0 ) + "]: " + key_id ); - //} - //else { - // throw new RuntimeException( "this should not have happened" ); - // } } + System.out.println(); return similarities; } + public boolean isCalcSimilarityScore() { + return _calc_similarity_score; + } + private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List domains_list ) { if ( domains_list.size() == 1 ) { - // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55 - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // ~~~OLD: - //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" ); - // ~~~new: final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); - return new PrintableDomainSimilarity( domains_list.get( 0 ), - 1.0, - 1.0, - 1.0, - 1.0, - 0.0, - 0, - 0, - 0, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( !isCalcSimilarityScore() ) { + return new DomainSimilarity( domains_list.get( 0 ), + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + else { + return new DomainSimilarity( domains_list.get( 0 ), + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0, + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + } + DescriptiveStatistics stat = null; + if ( isCalcSimilarityScore() ) { + stat = new BasicDescriptiveStatistics(); } - final DescriptiveStatistics stat = new BasicDescriptiveStatistics(); final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); @@ -170,49 +180,63 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat if ( Math.abs( difference ) > Math.abs( max_difference ) ) { max_difference = difference; } - stat.addValue( pairwise_similarity.getSimilarityScore() ); + if ( isCalcSimilarityScore() ) { + stat.addValue( pairwise_similarity.getSimilarityScore() ); + } } } - if ( stat.getN() < 1 ) { - throw new AssertionError( "empty descriptive statistics: this should not have happened" ); - } - if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { - throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); + if ( isCalcSimilarityScore() ) { + if ( stat.getN() < 1 ) { + throw new RuntimeException( "empty descriptive statistics: this should not have happened" ); + } + if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { + throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); + } } - if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) { + if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) { max_difference_in_counts = Math.abs( max_difference_in_counts ); if ( !is_domain_combination_based ) { - max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based. + max_difference = Math.abs( max_difference ); } } DomainSimilarity similarity = null; - if ( stat.getN() == 1 ) { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - 0.0, - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( !isCalcSimilarityScore() ) { + similarity = new DomainSimilarity( domains_list.get( 0 ), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } else { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - stat.sampleStandardDeviation(), - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( stat.getN() == 1 ) { + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + 0.0, + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + else { + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + stat.sampleStandardDeviation(), + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } } return similarity; }