X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FBasicDomainSimilarityCalculator.java;h=b245cbc292b41bfc2d03603321283ef227e7253a;hb=5ea47511ea9c077b4b4709bed68ac31d6eee0477;hp=5b5a91e39b90c4dbb5cf7f4fc9eec99b497e00da;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java index 5b5a91e..b245cbc 100644 --- a/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java +++ b/forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java @@ -7,7 +7,7 @@ // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -17,16 +17,17 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; @@ -34,23 +35,29 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import org.forester.species.Species; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator { final DomainSimilarity.DomainSimilaritySortField _sort; + private final boolean _calc_similarity_score; private final boolean _sort_by_species_count_first; private final boolean _treat_as_binary_comparison; public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort, final boolean sort_by_species_count_first, - final boolean treat_as_binary_comparison ) { + final boolean treat_as_binary_comparison, + final boolean calc_similarity_score ) { _sort = sort; _sort_by_species_count_first = sort_by_species_count_first; _treat_as_binary_comparison = treat_as_binary_comparison; + _calc_similarity_score = calc_similarity_score; } + @Override public SortedSet calculateSimilarities( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List cdc_list, final boolean ignore_domains_without_combinations_in_any_genome, @@ -59,11 +66,16 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinale domains collections" ); } final SortedSet similarities = new TreeSet(); - final SortedSet keys = new TreeSet(); + final SortedSet keys = new TreeSet(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() ); } - for( final DomainId key : keys ) { + final DecimalFormat pf = new java.text.DecimalFormat( "000000" ); + int counter = 1; + System.out.println( keys.size() ); + for( final String key : keys ) { + ForesterUtil.updateProgress( counter, pf ); + counter++; final List same_id_cd_list = new ArrayList( cdc_list.size() ); final List species_with_key_id_domain = new ArrayList(); for( final GenomeWideCombinableDomains cdc : cdc_list ) { @@ -84,9 +96,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat continue; } } - // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55 - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // OLD: if ( same_id_cd_list.size() > 1 ) { if ( same_id_cd_list.size() > 0 ) { if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) { final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list ); @@ -98,52 +107,54 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat } } } - // ~~~ NEW: else { throw new RuntimeException( "this should not have happened" ); } - // ~~~ OLD: - // else if ( same_id_cd_list.size() == 1 ) { - // TODO need to go in file - // System.out.println( "only in one species [" + - // species_with_key_id_domain.get( 0 ) + "]: " + key_id ); - //} - //else { - // throw new RuntimeException( "this should not have happened" ); - // } } + System.out.println(); return similarities; } + public boolean isCalcSimilarityScore() { + return _calc_similarity_score; + } + private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator, final List domains_list ) { if ( domains_list.size() == 1 ) { - // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55 - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // ~~~OLD: - //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" ); - // ~~~new: - final SortedMap species_data = new TreeMap(); + final SortedMap species_data = new TreeMap(); species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); - return new PrintableDomainSimilarity( domains_list.get( 0 ), - 1.0, - 1.0, - 1.0, - 1.0, - 0.0, - 0, - 0, - 0, - species_data, - getSort(), - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( !isCalcSimilarityScore() ) { + return new DomainSimilarity( domains_list.get( 0 ), + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + else { + return new DomainSimilarity( domains_list.get( 0 ), + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0, + 0, + 0, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } } - final DescriptiveStatistics stat = new BasicDescriptiveStatistics(); - final SortedMap species_data = new TreeMap(); - species_data.put( domains_list.get( 0 ).getSpecies(), createSpeciesSpecificDomainSimilariyData( domains_list - .get( 0 ) ) ); + DescriptiveStatistics stat = null; + if ( isCalcSimilarityScore() ) { + stat = new BasicDescriptiveStatistics(); + } + final SortedMap species_data = new TreeMap(); + species_data.put( domains_list.get( 0 ).getSpecies(), + createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) ); int max_difference_in_counts = 0; int max_difference = 0; final boolean is_domain_combination_based = pairwise_calculator instanceof CombinationsBasedPairwiseDomainSimilarityCalculator; @@ -169,59 +180,67 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat if ( Math.abs( difference ) > Math.abs( max_difference ) ) { max_difference = difference; } - stat.addValue( pairwise_similarity.getSimilarityScore() ); + if ( isCalcSimilarityScore() ) { + stat.addValue( pairwise_similarity.getSimilarityScore() ); + } } } - if ( stat.getN() < 1 ) { - throw new AssertionError( "empty descriptive statistics: this should not have happened" ); - } - if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { - throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); + if ( isCalcSimilarityScore() ) { + if ( stat.getN() < 1 ) { + throw new RuntimeException( "empty descriptive statistics: this should not have happened" ); + } + if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) { + throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" ); + } } - if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) { + if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) { max_difference_in_counts = Math.abs( max_difference_in_counts ); if ( !is_domain_combination_based ) { - max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based. + max_difference = Math.abs( max_difference ); } } DomainSimilarity similarity = null; - if ( stat.getN() == 1 ) { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - 0.0, - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - getSort(), - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( !isCalcSimilarityScore() ) { + similarity = new DomainSimilarity( domains_list.get( 0 ), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); } else { - similarity = new PrintableDomainSimilarity( domains_list.get( 0 ), - stat.getMin(), - stat.getMax(), - stat.arithmeticMean(), - stat.median(), - stat.sampleStandardDeviation(), - stat.getN(), - max_difference_in_counts, - max_difference, - species_data, - getSort(), - isSortBySpeciesCountFirst(), - isTreatAsBinaryComparison() ); + if ( stat.getN() == 1 ) { + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + 0.0, + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } + else { + similarity = new DomainSimilarity( domains_list.get( 0 ), + stat.getMin(), + stat.getMax(), + stat.arithmeticMean(), + stat.median(), + stat.sampleStandardDeviation(), + stat.getN(), + max_difference_in_counts, + max_difference, + species_data, + isSortBySpeciesCountFirst(), + isTreatAsBinaryComparison() ); + } } return similarity; } - private DomainSimilarity.DomainSimilaritySortField getSort() { - return _sort; - } - private boolean isSortBySpeciesCountFirst() { return _sort_by_species_count_first; } @@ -230,11 +249,13 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat return _treat_as_binary_comparison; } - private static SpeciesSpecificDomainSimilariyData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { - final SpeciesSpecificDomainSimilariyData sd = new PrintableSpeciesSpecificDomainSimilariyData( cd - .getKeyDomainProteinsCount(), cd.getKeyDomainCount(), cd.getNumberOfCombinableDomains(), cd - .getKeyDomainConfidenceDescriptiveStatistics() ); - for( final DomainId domain : cd.getCombinableDomains() ) { + private static SpeciesSpecificDcData createSpeciesSpecificDomainSimilariyData( final CombinableDomains cd ) { + final SpeciesSpecificDcData sd = new PrintableSpeciesSpecificDcData( cd.getKeyDomainCount(), + cd.getNumberOfCombinableDomains() ); + for( final String prot : cd.getKeyDomainProteins() ) { + sd.addKeyDomainProtein( prot ); + } + for( final String domain : cd.getCombinableDomains() ) { sd.addProteinsExhibitingCombinationCount( domain, cd.getNumberOfProteinsExhibitingCombination( domain ) ); } return sd;