X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FPairwiseGenomeComparator.java;h=07ece5bae14277fcd1a5c96e5cc933d6f3193de3;hb=1c57d9dd98190445f28ff5b2d447614fb14dd1aa;hp=99d336fe1706f3fd6b44bb2a6d698bfc069622d9;hpb=eb02413234a507a55865bffd9f3677602d6ee8d0;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index 99d336f..07ece5b 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; @@ -43,6 +43,8 @@ import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; +import org.forester.phylogeny.Phylogeny; +import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; @@ -53,7 +55,6 @@ public class PairwiseGenomeComparator { private List _shared_domains_based_distances; private List _shared_binary_combinations_based_distances; - //private List _histogram_datas; public PairwiseGenomeComparator() { init(); } @@ -62,9 +63,6 @@ public class PairwiseGenomeComparator { return _domain_distance_scores_means; } - //public List getHistogramDatas() { - // return _histogram_datas; - //} public List getSharedBinaryCombinationsBasedDistances() { return _shared_binary_combinations_based_distances; } @@ -74,7 +72,6 @@ public class PairwiseGenomeComparator { } private void init() { - //_histogram_datas = new ArrayList(); _domain_distance_scores_means = new ArrayList(); _shared_domains_based_distances = new ArrayList(); _shared_binary_combinations_based_distances = new ArrayList(); @@ -88,7 +85,7 @@ public class PairwiseGenomeComparator { final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field, final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, final DomainSimilarity.DomainSimilarityScoring scoring, - final Map> domain_id_to_go_ids_map, + final Map> domain_id_to_go_ids_map, final Map go_id_to_term_map, final GoNameSpace go_namespace_limit, final Species[] species, @@ -100,7 +97,10 @@ public class PairwiseGenomeComparator { final String automated_pairwise_comparison_prefix, final String command_line_prg_name, final File out_dir, - final boolean write_pairwise_comparisons ) { + final boolean write_pairwise_comparisons, + final Map tax_code_to_id_map, + final boolean calc_similarity_scores, + final Phylogeny phy ) { init(); final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes ); final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); @@ -144,17 +144,14 @@ public class PairwiseGenomeComparator { } final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field, sort_by_species_count_first, - true ); + true, + calc_similarity_scores ); final SortedSet similarities = calc .calculateSimilarities( pw_calc, genome_pair, ignore_domains_without_combs_in_all_spec, ignore_domains_specific_to_one_species ); - SurfacingUtil.decoratePrintableDomainSimilarities( similarities, - detailedness, - go_annotation_output, - go_id_to_term_map, - go_namespace_limit ); + SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness ); final DescriptiveStatistics stats = SurfacingUtil .calculateDescriptiveStatisticsForMeanValues( similarities ); final String species_j = species[ j ].getSpeciesId(); @@ -205,38 +202,29 @@ public class PairwiseGenomeComparator { } break; } - DescriptiveStatistics pw_stats = null; if ( write_pairwise_comparisons ) { try { final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str : out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) ); - pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc, - new StringBuilder( species_i + "-" - + species_j ), - writer, - null, - similarities, - true, - null, - domain_similarity_print_option, - domain_similarity_sort_field, - scoring, - false ); + SurfacingUtil.writeDomainSimilaritiesToFile( html_desc, + new StringBuilder( species_i + "-" + species_j ), + null, + writer, + null, + similarities, + true, + null, + domain_similarity_print_option, + scoring, + false, + tax_code_to_id_map, + phy ); } catch ( final IOException e ) { ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \"" + pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" ); } } - if ( pw_stats != null ) { - if ( pw_stats.getMin() >= pw_stats.getMax() ) { - ForesterUtil - .printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j - + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is [" - + pw_stats.getMax() - + "], possibly indicating that a genome is compared to itself" ); - } - } } } getDomainDistanceScoresMeans().add( domain_distance_scores_means ); @@ -264,7 +252,7 @@ public class PairwiseGenomeComparator { else if ( jacknife_ratio >= 1.0 ) { throw new IllegalArgumentException( "attempt to perform jacknife resampling with jacknife ratio 1.0 or more" ); } - final DomainId[] all_unique_domain_ids = getAllUniqueDomainIdAsArray( list_of_genome_wide_combinable_domains ); + final String[] all_unique_domain_ids = getAllUniqueDomainIdAsArray( list_of_genome_wide_combinable_domains ); if ( verbose ) { System.out.println(); System.out.println( "Jacknife: total of domains: " + all_unique_domain_ids.length ); @@ -277,9 +265,9 @@ public class PairwiseGenomeComparator { if ( verbose ) { System.out.print( " " + r ); } - final SortedSet domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids, - jacknife_ratio, - generator ); + final SortedSet domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids, + jacknife_ratio, + generator ); final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); for( int i = 0; i < number_of_genomes; ++i ) { @@ -310,28 +298,28 @@ public class PairwiseGenomeComparator { } } - static private DomainId[] getAllUniqueDomainIdAsArray( final List list_of_genome_wide_combinable_domains ) { - DomainId[] all_domain_ids_array; - final SortedSet all_domain_ids = new TreeSet(); + static private String[] getAllUniqueDomainIdAsArray( final List list_of_genome_wide_combinable_domains ) { + String[] all_domain_ids_array; + final SortedSet all_domain_ids = new TreeSet(); for( final GenomeWideCombinableDomains genome_wide_combinable_domains : list_of_genome_wide_combinable_domains ) { - final SortedSet all_domains = genome_wide_combinable_domains.getAllDomainIds(); - for( final DomainId domain : all_domains ) { + final SortedSet all_domains = genome_wide_combinable_domains.getAllDomainIds(); + for( final String domain : all_domains ) { all_domain_ids.add( domain ); } } - all_domain_ids_array = new DomainId[ all_domain_ids.size() ]; + all_domain_ids_array = new String[ all_domain_ids.size() ]; int n = 0; - for( final DomainId domain_id : all_domain_ids ) { + for( final String domain_id : all_domain_ids ) { all_domain_ids_array[ n++ ] = domain_id; } return all_domain_ids_array; } - static private SortedSet randomlyPickDomainIds( final DomainId[] all_domain_ids_array, - final double jacknife_ratio, - final Random generator ) { + static private SortedSet randomlyPickDomainIds( final String[] all_domain_ids_array, + final double jacknife_ratio, + final Random generator ) { final int size = all_domain_ids_array.length; - final SortedSet random_domain_ids = new TreeSet(); + final SortedSet random_domain_ids = new TreeSet(); final int number_of_ids_pick = ForesterUtil.roundToInt( jacknife_ratio * size ); while ( random_domain_ids.size() < number_of_ids_pick ) { final int r = generator.nextInt( size );