Changed more concrete DistanceMatrix references to reference interface
[jalview.git] / forester / java / src / org / forester / surfacing / PairwiseGenomeComparator.java
index 99d336f..ed4eae7 100644 (file)
@@ -22,7 +22,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.surfacing;
 
@@ -43,6 +43,8 @@ import org.forester.evoinference.matrix.distance.DistanceMatrix;
 import org.forester.go.GoId;
 import org.forester.go.GoNameSpace;
 import org.forester.go.GoTerm;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.species.Species;
 import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
 import org.forester.util.DescriptiveStatistics;
 import org.forester.util.ForesterUtil;
@@ -50,10 +52,9 @@ import org.forester.util.ForesterUtil;
 public class PairwiseGenomeComparator {
 
     private List<DistanceMatrix> _domain_distance_scores_means;
-    private List<DistanceMatrix> _shared_domains_based_distances;
     private List<DistanceMatrix> _shared_binary_combinations_based_distances;
+    private List<DistanceMatrix> _shared_domains_based_distances;
 
-    //private List<HistogramData>  _histogram_datas;
     public PairwiseGenomeComparator() {
         init();
     }
@@ -62,9 +63,6 @@ public class PairwiseGenomeComparator {
         return _domain_distance_scores_means;
     }
 
-    //public List<HistogramData> getHistogramDatas() {
-    //    return _histogram_datas;
-    //}
     public List<DistanceMatrix> getSharedBinaryCombinationsBasedDistances() {
         return _shared_binary_combinations_based_distances;
     }
@@ -73,22 +71,15 @@ public class PairwiseGenomeComparator {
         return _shared_domains_based_distances;
     }
 
-    private void init() {
-        //_histogram_datas = new ArrayList<HistogramData>();
-        _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
-        _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
-        _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
-    }
-
     public void performPairwiseComparisons( final StringBuilder html_desc,
                                             final boolean sort_by_species_count_first,
                                             final Detailedness detailedness,
                                             final boolean ignore_domains_without_combs_in_all_spec,
                                             final boolean ignore_domains_specific_to_one_species,
                                             final DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field,
-                                            final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+                                            final DomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                             final DomainSimilarity.DomainSimilarityScoring scoring,
-                                            final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
+                                            final Map<String, List<GoId>> domain_id_to_go_ids_map,
                                             final Map<GoId, GoTerm> go_id_to_term_map,
                                             final GoNameSpace go_namespace_limit,
                                             final Species[] species,
@@ -100,11 +91,14 @@ public class PairwiseGenomeComparator {
                                             final String automated_pairwise_comparison_prefix,
                                             final String command_line_prg_name,
                                             final File out_dir,
-                                            final boolean write_pairwise_comparisons ) {
+                                            final boolean write_pairwise_comparisons,
+                                            final Map<String, Integer> tax_code_to_id_map,
+                                            final boolean calc_similarity_scores,
+                                            final Phylogeny phy ) {
         init();
-        final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
-        final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
-        final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+        final DistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+        final DistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+        final DistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
         if ( verbose ) {
             System.out.println();
             System.out.println( "Pairwise genome distances:" );
@@ -128,11 +122,11 @@ public class PairwiseGenomeComparator {
                 if ( ( list_of_genome_wide_combinable_domains.get( i ).getSize() < 1 )
                         || ( list_of_genome_wide_combinable_domains.get( j ).getSize() < 1 ) ) {
                     domain_distance_scores_means
-                            .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+                    .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
                     shared_domains_based_distances
-                            .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+                    .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
                     shared_binary_combinations_based_distances
-                            .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
+                    .setValue( i, j, DomainArchitectureBasedGenomeSimilarityCalculator.MAX_SIMILARITY_SCORE );
                     continue;
                 }
                 final List<GenomeWideCombinableDomains> genome_pair = new ArrayList<GenomeWideCombinableDomains>( 2 );
@@ -144,24 +138,22 @@ public class PairwiseGenomeComparator {
                 }
                 final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                              sort_by_species_count_first,
+                                                                                             true,
+                                                                                             calc_similarity_scores,
                                                                                              true );
                 final SortedSet<DomainSimilarity> similarities = calc
                         .calculateSimilarities( pw_calc,
                                                 genome_pair,
                                                 ignore_domains_without_combs_in_all_spec,
                                                 ignore_domains_specific_to_one_species );
-                SurfacingUtil.decoratePrintableDomainSimilarities( similarities,
-                                                                   detailedness,
-                                                                   go_annotation_output,
-                                                                   go_id_to_term_map,
-                                                                   go_namespace_limit );
+                SurfacingUtil.decoratePrintableDomainSimilarities( similarities, detailedness );
                 final DescriptiveStatistics stats = SurfacingUtil
                         .calculateDescriptiveStatisticsForMeanValues( similarities );
                 final String species_j = species[ j ].getSpeciesId();
                 final DomainArchitectureBasedGenomeSimilarityCalculator genome_similarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains
-                                                                                                                                                                      .get( i ),
+                                                                                                                                                              .get( i ),
                                                                                                                                                               list_of_genome_wide_combinable_domains
-                                                                                                                                                                      .get( j ) );
+                                                                                                                                                              .get( j ) );
                 genome_similarity_calculator.setAllowDomainsToBeIgnored( false );
                 double dissimilarity_score_mean;
                 if ( stats.getN() < 1 ) {
@@ -205,38 +197,30 @@ public class PairwiseGenomeComparator {
                         }
                         break;
                 }
-                DescriptiveStatistics pw_stats = null;
                 if ( write_pairwise_comparisons ) {
                     try {
                         final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str
                                 : out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
-                        pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
-                                                                                new StringBuilder( species_i + "-"
-                                                                                        + species_j ),
-                                                                                writer,
-                                                                                null,
-                                                                                similarities,
-                                                                                true,
-                                                                                null,
-                                                                                domain_similarity_print_option,
-                                                                                domain_similarity_sort_field,
-                                                                                scoring,
-                                                                                false );
+                        SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+                                                                     new StringBuilder( species_i + "-" + species_j ),
+                                                                     null,
+                                                                     writer,
+                                                                     null,
+                                                                     similarities,
+                                                                     true,
+                                                                     null,
+                                                                     domain_similarity_print_option,
+                                                                     scoring,
+                                                                     false,
+                                                                     tax_code_to_id_map,
+                                                                     phy,
+                                                                     null );
                     }
                     catch ( final IOException e ) {
                         ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
                                 + pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" );
                     }
                 }
-                if ( pw_stats != null ) {
-                    if ( pw_stats.getMin() >= pw_stats.getMax() ) {
-                        ForesterUtil
-                                .printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j
-                                        + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is ["
-                                        + pw_stats.getMax()
-                                        + "], possibly indicating that a genome is compared to itself" );
-                    }
-                }
             }
         }
         getDomainDistanceScoresMeans().add( domain_distance_scores_means );
@@ -264,7 +248,7 @@ public class PairwiseGenomeComparator {
         else if ( jacknife_ratio >= 1.0 ) {
             throw new IllegalArgumentException( "attempt to perform jacknife resampling with jacknife ratio 1.0 or more" );
         }
-        final DomainId[] all_unique_domain_ids = getAllUniqueDomainIdAsArray( list_of_genome_wide_combinable_domains );
+        final String[] all_unique_domain_ids = getAllUniqueDomainIdAsArray( list_of_genome_wide_combinable_domains );
         if ( verbose ) {
             System.out.println();
             System.out.println( "Jacknife: total of domains: " + all_unique_domain_ids.length );
@@ -277,11 +261,11 @@ public class PairwiseGenomeComparator {
             if ( verbose ) {
                 System.out.print( " " + r );
             }
-            final SortedSet<DomainId> domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids,
-                                                                                    jacknife_ratio,
-                                                                                    generator );
-            final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
-            final BasicSymmetricalDistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+            final SortedSet<String> domain_ids_to_ignore = randomlyPickDomainIds( all_unique_domain_ids,
+                                                                                  jacknife_ratio,
+                                                                                  generator );
+            final DistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
+            final DistanceMatrix shared_binary_combinations_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
             for( int i = 0; i < number_of_genomes; ++i ) {
                 final String species_i = species[ i ].getSpeciesId();
                 shared_domains_based_distances.setIdentifier( i, species_i );
@@ -291,15 +275,15 @@ public class PairwiseGenomeComparator {
                     genome_pair.add( list_of_genome_wide_combinable_domains.get( i ) );
                     genome_pair.add( list_of_genome_wide_combinable_domains.get( j ) );
                     final DomainArchitectureBasedGenomeSimilarityCalculator genome_simiarity_calculator = new DomainArchitectureBasedGenomeSimilarityCalculator( list_of_genome_wide_combinable_domains
-                                                                                                                                                                         .get( i ),
+                                                                                                                                                                 .get( i ),
                                                                                                                                                                  list_of_genome_wide_combinable_domains
-                                                                                                                                                                         .get( j ) );
+                                                                                                                                                                 .get( j ) );
                     genome_simiarity_calculator.setAllowDomainsToBeIgnored( true );
                     genome_simiarity_calculator.setDomainIdsToIgnore( domain_ids_to_ignore );
                     shared_domains_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator
-                            .calculateSharedDomainsBasedGenomeSimilarityScore() );
+                                                             .calculateSharedDomainsBasedGenomeSimilarityScore() );
                     shared_binary_combinations_based_distances.setValue( i, j, 1.0 - genome_simiarity_calculator
-                            .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() );
+                                                                         .calculateSharedBinaryDomainCombinationBasedGenomeSimilarityScore() );
                 }
             }
             getSharedDomainsBasedDistances().add( shared_domains_based_distances );
@@ -310,28 +294,34 @@ public class PairwiseGenomeComparator {
         }
     }
 
-    static private DomainId[] getAllUniqueDomainIdAsArray( final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains ) {
-        DomainId[] all_domain_ids_array;
-        final SortedSet<DomainId> all_domain_ids = new TreeSet<DomainId>();
+    private void init() {
+        _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
+        _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
+        _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
+    }
+
+    static private String[] getAllUniqueDomainIdAsArray( final List<GenomeWideCombinableDomains> list_of_genome_wide_combinable_domains ) {
+        String[] all_domain_ids_array;
+        final SortedSet<String> all_domain_ids = new TreeSet<String>();
         for( final GenomeWideCombinableDomains genome_wide_combinable_domains : list_of_genome_wide_combinable_domains ) {
-            final SortedSet<DomainId> all_domains = genome_wide_combinable_domains.getAllDomainIds();
-            for( final DomainId domain : all_domains ) {
+            final SortedSet<String> all_domains = genome_wide_combinable_domains.getAllDomainIds();
+            for( final String domain : all_domains ) {
                 all_domain_ids.add( domain );
             }
         }
-        all_domain_ids_array = new DomainId[ all_domain_ids.size() ];
+        all_domain_ids_array = new String[ all_domain_ids.size() ];
         int n = 0;
-        for( final DomainId domain_id : all_domain_ids ) {
+        for( final String domain_id : all_domain_ids ) {
             all_domain_ids_array[ n++ ] = domain_id;
         }
         return all_domain_ids_array;
     }
 
-    static private SortedSet<DomainId> randomlyPickDomainIds( final DomainId[] all_domain_ids_array,
-                                                              final double jacknife_ratio,
-                                                              final Random generator ) {
+    static private SortedSet<String> randomlyPickDomainIds( final String[] all_domain_ids_array,
+                                                            final double jacknife_ratio,
+                                                            final Random generator ) {
         final int size = all_domain_ids_array.length;
-        final SortedSet<DomainId> random_domain_ids = new TreeSet<DomainId>();
+        final SortedSet<String> random_domain_ids = new TreeSet<String>();
         final int number_of_ids_pick = ForesterUtil.roundToInt( jacknife_ratio * size );
         while ( random_domain_ids.size() < number_of_ids_pick ) {
             final int r = generator.nextInt( size );