X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FPairwiseGenomeComparator.java;h=ed4eae73769feba8241f994b8aff130554b6ab49;hb=3d5864a39739960c126f2ab5585162fd52d1f47d;hp=e9ee8b3d4864eb0d8507ff56473c496a5be65c80;hpb=f47d5c382bc4e329ff6977ebcc46b75b57ffc901;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index e9ee8b3..ed4eae7 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -43,6 +43,7 @@ import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; +import org.forester.phylogeny.Phylogeny; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.DescriptiveStatistics; @@ -51,10 +52,9 @@ import org.forester.util.ForesterUtil; public class PairwiseGenomeComparator { private List _domain_distance_scores_means; - private List _shared_domains_based_distances; private List _shared_binary_combinations_based_distances; + private List _shared_domains_based_distances; - //private List _histogram_datas; public PairwiseGenomeComparator() { init(); } @@ -63,9 +63,6 @@ public class PairwiseGenomeComparator { return _domain_distance_scores_means; } - //public List getHistogramDatas() { - // return _histogram_datas; - //} public List getSharedBinaryCombinationsBasedDistances() { return _shared_binary_combinations_based_distances; } @@ -74,20 +71,13 @@ public class PairwiseGenomeComparator { return _shared_domains_based_distances; } - private void init() { - //_histogram_datas = new ArrayList(); - _domain_distance_scores_means = new ArrayList(); - _shared_domains_based_distances = new ArrayList(); - _shared_binary_combinations_based_distances = new ArrayList(); - } - public void performPairwiseComparisons( final StringBuilder html_desc, final boolean sort_by_species_count_first, final Detailedness detailedness, final boolean ignore_domains_without_combs_in_all_spec, final boolean ignore_domains_specific_to_one_species, final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field, - final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, + final DomainSimilarity.PRINT_OPTION domain_similarity_print_option, final DomainSimilarity.DomainSimilarityScoring scoring, final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, @@ -102,11 +92,13 @@ public class PairwiseGenomeComparator { final String command_line_prg_name, final File out_dir, final boolean write_pairwise_comparisons, - final Map tax_code_to_id_map ) { + final Map tax_code_to_id_map, + final boolean calc_similarity_scores, + final Phylogeny phy ) { init(); - final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes ); - final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); - final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); + final DistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes ); + final DistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); + final DistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); if ( verbose ) { System.out.println(); System.out.println( "Pairwise genome distances:" ); @@ -130,11 +122,11 @@ public class PairwiseGenomeComparator { if ( ( list_of_genome_wide_combinable_domains.get( i ).getSize() < 1 ) || ( list_of_genome_wide_combinable_domains.get( j ).getSize() < 1 ) ) { domain_distance_scores_means - .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); + .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); shared_domains_based_distances - .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); + .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); shared_binary_combinations_based_distances - .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); + .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE ); continue; } final List genome_pair = new ArrayList( 2 ); @@ -146,6 +138,8 @@ public class PairwiseGenomeComparator { } final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field, sort_by_species_count_first, + true, + calc_similarity_scores, true ); final SortedSet similarities = calc .calculateSimilarities( pw_calc, @@ -157,9 +151,9 @@ public class PairwiseGenomeComparator { .calculateDescriptiveStatisticsForMeanValues( similarities ); final String species_j = species[ j ].getSpeciesId(); final DomainArchitectureBasedGenomeSimilarityCalculator genome_similarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains - .get( i ), + .get( i ), list_of_genome_wide_combinable_domains - .get( j ) ); + .get( j ) ); genome_similarity_calculator.setAllowDomainsToBeIgnored( false ); double dissimilarity_score_mean; if ( stats.getN() < 1 ) { @@ -203,39 +197,30 @@ public class PairwiseGenomeComparator { } break; } - DescriptiveStatistics pw_stats = null; if ( write_pairwise_comparisons ) { try { final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str : out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) ); - pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc, - new StringBuilder( species_i + "-" - + species_j ), - writer, - null, - similarities, - true, - null, - domain_similarity_print_option, - scoring, - false, - tax_code_to_id_map, - false ); + SurfacingUtil.writeDomainSimilaritiesToFile( html_desc, + new StringBuilder( species_i + "-" + species_j ), + null, + writer, + null, + similarities, + true, + null, + domain_similarity_print_option, + scoring, + false, + tax_code_to_id_map, + phy, + null ); } catch ( final IOException e ) { ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \"" + pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" ); } } - if ( pw_stats != null ) { - if ( pw_stats.getMin() >= pw_stats.getMax() ) { - ForesterUtil - .printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j - + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is [" - + pw_stats.getMax() - + "], possibly indicating that a genome is compared to itself" ); - } - } } } getDomainDistanceScoresMeans().add( domain_distance_scores_means ); @@ -279,8 +264,8 @@ public class PairwiseGenomeComparator { final SortedSet domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids, jacknife_ratio, generator ); - final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); - final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); + final DistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); + final DistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); for( int i = 0; i < number_of_genomes; ++i ) { final String species_i = species[ i ].getSpeciesId(); shared_domains_based_distances.setIdentifier( i, species_i ); @@ -290,15 +275,15 @@ public class PairwiseGenomeComparator { genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) ); genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) ); final DomainArchitectureBasedGenomeSimilarityCalculator genome_simiarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains - .get( i ), + .get( i ), list_of_genome_wide_combinable_domains - .get( j ) ); + .get( j ) ); genome_simiarity_calculator.setAllowDomainsToBeIgnored( true ); genome_simiarity_calculator.setDomainIdsToIgnore( domain_ids_to_ignore ); shared_domains_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator - .calculateSharedDomainsBasedGenomeSimilarityScore() ); + .calculateSharedDomainsBasedGenomeSimilarityScore() ); shared_binary_combinations_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator - .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() ); + .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() ); } } getSharedDomainsBasedDistances().add( shared_domains_based_distances ); @@ -309,6 +294,12 @@ public class PairwiseGenomeComparator { } } + private void init() { + _domain_distance_scores_means = new ArrayList(); + _shared_domains_based_distances = new ArrayList(); + _shared_binary_combinations_based_distances = new ArrayList(); + } + static private String[] getAllUniqueDomainIdAsArray( final List list_of_genome_wide_combinable_domains ) { String[] all_domain_ids_array; final SortedSet all_domain_ids = new TreeSet();