inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 12 Jul 2013 23:53:00 +0000 (23:53 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 12 Jul 2013 23:53:00 +0000 (23:53 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/CountsBasedPairwiseDomainSimilarity.java
forester/java/src/org/forester/surfacing/DomainCountsBasedPairwiseSimilarityCalculator.java
forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java
forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java
forester/java/src/org/forester/surfacing/ProteinCountsBasedPairwiseDomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/SurfacingConstants.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java
forester/java/src/org/forester/surfacing/TestSurfacing.java

index 71020c5..6f22a3a 100644 (file)
@@ -225,8 +225,8 @@ public class surfacing {
     final static private String                               INPUT_GENOMES_FILE_OPTION                                                     = "genomes";
     final static private String                               INPUT_SPECIES_TREE_OPTION                                                     = "species_tree";
     final static private String                               SEQ_EXTRACT_OPTION                                                            = "prot_extract";
-    final static private String                               PRG_VERSION                                                                   = "2.300";
-    final static private String                               PRG_DATE                                                                      = "130711";
+    final static private String                               PRG_VERSION                                                                   = "2.301";
+    final static private String                               PRG_DATE                                                                      = "130712";
     final static private String                               E_MAIL                                                                        = "czmasek@burnham.org";
     final static private String                               WWW                                                                           = "https://sites.google.com/site/cmzmasek/home/software/forester/surfacing";
     final static private boolean                              IGNORE_DUFS_DEFAULT                                                           = true;
@@ -278,6 +278,7 @@ public class surfacing {
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists_MAPPED.txt";
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+    private static final boolean                              CALC_SIMILARITY_SCORES                                                        = false;
 
     private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                                                  final String[][] input_file_properties,
@@ -1631,7 +1632,7 @@ public class surfacing {
             all_bin_domain_combinations_gained_fitch = new ArrayList<BinaryDomainCombination>();
             all_bin_domain_combinations_lost_fitch = new ArrayList<BinaryDomainCombination>();
         }
-        final DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
+        DomainLengthsTable domain_lengths_table = new DomainLengthsTable();
         final File per_genome_domain_promiscuity_statistics_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR
                 + output_file + D_PROMISCUITY_FILE_SUFFIX );
         BufferedWriter per_genome_domain_promiscuity_statistics_writer = null;
@@ -2009,12 +2010,13 @@ public class surfacing {
             ForesterUtil.programMessage( PRG_NAME, "Wrote domain length data to: " + domain_lengths_analysis_outfile );
             System.out.println();
         }
+        domain_lengths_table = null;
         final long analysis_start_time = new Date().getTime();
         PairwiseDomainSimilarityCalculator pw_calc = null;
-        // double[] values_for_all_scores_histogram = null;
         final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                      sort_by_species_count_first,
-                                                                                     number_of_genomes == 2 );
+                                                                                     number_of_genomes == 2,
+                                                                                     CALC_SIMILARITY_SCORES );
         switch ( scoring ) {
             case COMBINATIONS:
                 pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
@@ -2069,19 +2071,17 @@ public class surfacing {
                     + new java.text.SimpleDateFormat( "yyyy.MM.dd HH:mm:ss" ).format( new java.util.Date() )
                     + "</td></tr>" + nl );
             html_desc.append( "</table>" + nl );
-            final DescriptiveStatistics pw_stats = SurfacingUtil
-                    .writeDomainSimilaritiesToFile( html_desc,
-                                                    new StringBuilder( number_of_genomes + " genomes" ),
-                                                    writer,
-                                                    split_writers,
-                                                    similarities,
-                                                    number_of_genomes == 2,
-                                                    species_order,
-                                                    domain_similarity_print_option,
-                                                    scoring,
-                                                    true,
-                                                    tax_code_to_id_map,
-                                                    false );
+            SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+                                                         new StringBuilder( number_of_genomes + " genomes" ),
+                                                         writer,
+                                                         split_writers,
+                                                         similarities,
+                                                         number_of_genomes == 2,
+                                                         species_order,
+                                                         domain_similarity_print_option,
+                                                         scoring,
+                                                         true,
+                                                         tax_code_to_id_map );
             ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
                     + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
         }
@@ -2118,7 +2118,8 @@ public class surfacing {
                                              surfacing.PRG_NAME,
                                              out_dir,
                                              write_pwc_files,
-                                             tax_code_to_id_map );
+                                             tax_code_to_id_map,
+                                             CALC_SIMILARITY_SCORES );
             String matrix_output_file = new String( output_file.toString() );
             if ( matrix_output_file.indexOf( '.' ) > 1 ) {
                 matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) );
index 042b785..81cef33 100644 (file)
@@ -27,6 +27,7 @@
 
 package org.forester.surfacing;
 
+import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.SortedMap;
@@ -37,19 +38,27 @@ import java.util.TreeSet;
 import org.forester.species.Species;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
 
 public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculator {
 
     final DomainSimilarity.DomainSimilaritySortField _sort;
     private final boolean                            _sort_by_species_count_first;
     private final boolean                            _treat_as_binary_comparison;
+    private final boolean                            _calc_similarity_score;
 
     public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
                                             final boolean sort_by_species_count_first,
-                                            final boolean treat_as_binary_comparison ) {
+                                            final boolean treat_as_binary_comparison,
+                                            final boolean calc_similarity_score ) {
         _sort = sort;
         _sort_by_species_count_first = sort_by_species_count_first;
         _treat_as_binary_comparison = treat_as_binary_comparison;
+        _calc_similarity_score = calc_similarity_score;
+    }
+
+    public boolean isCalcSimilarityScore() {
+        return _calc_similarity_score;
     }
 
     @Override
@@ -65,7 +74,12 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         for( final GenomeWideCombinableDomains cdc : cdc_list ) {
             keys.addAll( ( cdc ).getAllCombinableDomainsIds().keySet() );
         }
+        final DecimalFormat pf = new java.text.DecimalFormat( "000000" );
+        int counter = 1;
+        System.out.println( keys.size() );
         for( final String key : keys ) {
+            ForesterUtil.updateProgress( counter, pf );
+            counter++;
             final List<CombinableDomains> same_id_cd_list = new ArrayList<CombinableDomains>( cdc_list.size() );
             final List<Species> species_with_key_id_domain = new ArrayList<Species>();
             for( final GenomeWideCombinableDomains cdc : cdc_list ) {
@@ -86,9 +100,6 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                     continue;
                 }
             }
-            // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
-            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-            // OLD: if ( same_id_cd_list.size() > 1 ) {
             if ( same_id_cd_list.size() > 0 ) {
                 if ( !ignore_domains_specific_to_one_genome || ( same_id_cd_list.size() > 1 ) ) {
                     final DomainSimilarity s = calculateSimilarity( pairwise_calculator, same_id_cd_list );
@@ -100,48 +111,47 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                     }
                 }
             }
-            // ~~~ NEW:
             else {
                 throw new RuntimeException( "this should not have happened" );
             }
-            // ~~~ OLD:
-            // else if ( same_id_cd_list.size() == 1 ) {
-            // TODO need to go in file
-            // System.out.println( "only in one species [" +
-            // species_with_key_id_domain.get( 0 ) + "]: " + key_id );
-            //}
-            //else {
-            //    throw new RuntimeException( "this should not have happened" );
-            // }
         }
+        System.out.println();
         return similarities;
     }
 
     private DomainSimilarity calculateSimilarity( final PairwiseDomainSimilarityCalculator pairwise_calculator,
                                                   final List<CombinableDomains> domains_list ) {
         if ( domains_list.size() == 1 ) {
-            // BIG CHANGE IN LOGIC: Tuesday July 08, 0;55
-            // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-            // ~~~OLD:
-            //throw new IllegalArgumentException( "attempt to calculate multiple combinable domains similarity for less than two combinable domains" );
-            // ~~~new: 
             final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
             species_data.put( domains_list.get( 0 ).getSpecies(),
                               createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
-            return new PrintableDomainSimilarity( domains_list.get( 0 ),
-                                                  1.0,
-                                                  1.0,
-                                                  1.0,
-                                                  1.0,
-                                                  0.0,
-                                                  0,
-                                                  0,
-                                                  0,
-                                                  species_data,
-                                                  isSortBySpeciesCountFirst(),
-                                                  isTreatAsBinaryComparison() );
+            if ( !isCalcSimilarityScore() ) {
+                return new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                      0,
+                                                      0,
+                                                      species_data,
+                                                      isSortBySpeciesCountFirst(),
+                                                      isTreatAsBinaryComparison() );
+            }
+            else {
+                return new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                      1.0,
+                                                      1.0,
+                                                      1.0,
+                                                      1.0,
+                                                      0.0,
+                                                      0,
+                                                      0,
+                                                      0,
+                                                      species_data,
+                                                      isSortBySpeciesCountFirst(),
+                                                      isTreatAsBinaryComparison() );
+            }
+        }
+        DescriptiveStatistics stat = null;
+        if ( isCalcSimilarityScore() ) {
+            stat = new BasicDescriptiveStatistics();
         }
-        final DescriptiveStatistics stat = new BasicDescriptiveStatistics();
         final SortedMap<Species, SpeciesSpecificDcData> species_data = new TreeMap<Species, SpeciesSpecificDcData>();
         species_data.put( domains_list.get( 0 ).getSpecies(),
                           createSpeciesSpecificDomainSimilariyData( domains_list.get( 0 ) ) );
@@ -170,30 +180,28 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                 if ( Math.abs( difference ) > Math.abs( max_difference ) ) {
                     max_difference = difference;
                 }
-                stat.addValue( pairwise_similarity.getSimilarityScore() );
+                if ( isCalcSimilarityScore() ) {
+                    stat.addValue( pairwise_similarity.getSimilarityScore() );
+                }
             }
         }
-        if ( stat.getN() < 1 ) {
-            throw new AssertionError( "empty descriptive statistics: this should not have happened" );
-        }
-        if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
-            throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+        if ( isCalcSimilarityScore() ) {
+            if ( stat.getN() < 1 ) {
+                throw new RuntimeException( "empty descriptive statistics: this should not have happened" );
+            }
+            if ( ( stat.getN() != 1 ) && isTreatAsBinaryComparison() ) {
+                throw new IllegalArgumentException( "attmpt to treat similarity with N not equal to one as binary comparison" );
+            }
         }
-        if ( ( /*stat.getN() != 1 ||*/!isTreatAsBinaryComparison() ) && ( max_difference_in_counts < 0 ) ) {
+        if ( !isTreatAsBinaryComparison() && ( max_difference_in_counts < 0 ) ) {
             max_difference_in_counts = Math.abs( max_difference_in_counts );
             if ( !is_domain_combination_based ) {
-                max_difference = Math.abs( max_difference ); //=max_difference_in_counts for !is_domain_combination_based.
+                max_difference = Math.abs( max_difference );
             }
         }
         DomainSimilarity similarity = null;
-        if ( stat.getN() == 1 ) {
+        if ( !isCalcSimilarityScore() ) {
             similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
-                                                        stat.getMin(),
-                                                        stat.getMax(),
-                                                        stat.arithmeticMean(),
-                                                        stat.median(),
-                                                        0.0,
-                                                        stat.getN(),
                                                         max_difference_in_counts,
                                                         max_difference,
                                                         species_data,
@@ -201,18 +209,34 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                                                         isTreatAsBinaryComparison() );
         }
         else {
-            similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
-                                                        stat.getMin(),
-                                                        stat.getMax(),
-                                                        stat.arithmeticMean(),
-                                                        stat.median(),
-                                                        stat.sampleStandardDeviation(),
-                                                        stat.getN(),
-                                                        max_difference_in_counts,
-                                                        max_difference,
-                                                        species_data,
-                                                        isSortBySpeciesCountFirst(),
-                                                        isTreatAsBinaryComparison() );
+            if ( stat.getN() == 1 ) {
+                similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                            stat.getMin(),
+                                                            stat.getMax(),
+                                                            stat.arithmeticMean(),
+                                                            stat.median(),
+                                                            0.0,
+                                                            stat.getN(),
+                                                            max_difference_in_counts,
+                                                            max_difference,
+                                                            species_data,
+                                                            isSortBySpeciesCountFirst(),
+                                                            isTreatAsBinaryComparison() );
+            }
+            else {
+                similarity = new PrintableDomainSimilarity( domains_list.get( 0 ),
+                                                            stat.getMin(),
+                                                            stat.getMax(),
+                                                            stat.arithmeticMean(),
+                                                            stat.median(),
+                                                            stat.sampleStandardDeviation(),
+                                                            stat.getN(),
+                                                            max_difference_in_counts,
+                                                            max_difference,
+                                                            species_data,
+                                                            isSortBySpeciesCountFirst(),
+                                                            isTreatAsBinaryComparison() );
+            }
         }
         return similarity;
     }
index 81fdce7..8439f4c 100644 (file)
@@ -29,8 +29,8 @@ package org.forester.surfacing;
 
 public class CountsBasedPairwiseDomainSimilarity implements PairwiseDomainSimilarity {
 
-    private final double _score;
-    private final int    _copy_number_difference;
+    private final short _copy_number_difference;
+    private final short _counts_sum;
 
     /**
      * counts_difference: (counts for domain 1) minus (counts for domain 2).
@@ -39,16 +39,15 @@ public class CountsBasedPairwiseDomainSimilarity implements PairwiseDomainSimila
      * @param counts_difference value of domain_1 minus value of domain_2
      * @param counts_sum
      */
-    public CountsBasedPairwiseDomainSimilarity( final int counts_difference, final int counts_sum ) {
+    public CountsBasedPairwiseDomainSimilarity( final short counts_difference, final short counts_sum ) {
         if ( counts_sum <= 0 ) {
             throw new IllegalArgumentException( "attempt to use copy sum of less than or equal to 0: " + counts_sum );
         }
-        _copy_number_difference = counts_difference;
-        final int abs_copy_number_difference = Math.abs( counts_difference );
-        if ( abs_copy_number_difference > counts_sum ) {
+        if ( Math.abs( counts_difference ) > counts_sum ) {
             throw new IllegalArgumentException( "attempt to use absolute copy number difference larger than copy number sum" );
         }
-        _score = 1.0 - ( ( double ) abs_copy_number_difference / counts_sum );
+        _copy_number_difference = counts_difference;
+        _counts_sum = counts_sum;
     }
 
     /**
@@ -62,6 +61,6 @@ public class CountsBasedPairwiseDomainSimilarity implements PairwiseDomainSimila
 
     @Override
     public double getSimilarityScore() {
-        return _score;
+        return ( 1.0 - ( ( double ) Math.abs( _copy_number_difference ) / _counts_sum ) );
     }
 }
index 6077ffc..904b642 100644 (file)
@@ -35,8 +35,12 @@ public class DomainCountsBasedPairwiseSimilarityCalculator implements PairwiseDo
         if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
             throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
         }
-        final int dc1 = domains_1.getKeyDomainCount();
-        final int dc2 = domains_2.getKeyDomainCount();
-        return new CountsBasedPairwiseDomainSimilarity( dc1 - dc2, dc1 + dc2 );
+        if ( ( domains_1.getKeyDomainCount() > Short.MAX_VALUE ) || ( domains_2.getKeyDomainCount() > Short.MAX_VALUE )
+                || ( ( domains_1.getKeyDomainCount() + domains_2.getKeyDomainCount() ) > Short.MAX_VALUE ) ) {
+            throw new IllegalArgumentException( "too large for short!" );
+        }
+        final short dc1 = ( short ) domains_1.getKeyDomainCount();
+        final short dc2 = ( short ) domains_2.getKeyDomainCount();
+        return new CountsBasedPairwiseDomainSimilarity( ( short ) ( dc1 - dc2 ), ( short ) ( dc1 + dc2 ) );
     }
 }
index e9ee8b3..c020d82 100644 (file)
@@ -54,7 +54,6 @@ public class PairwiseGenomeComparator {
     private List<DistanceMatrix> _shared_domains_based_distances;
     private List<DistanceMatrix> _shared_binary_combinations_based_distances;
 
-    //private List<HistogramData>  _histogram_datas;
     public PairwiseGenomeComparator() {
         init();
     }
@@ -63,9 +62,6 @@ public class PairwiseGenomeComparator {
         return _domain_distance_scores_means;
     }
 
-    //public List<HistogramData> getHistogramDatas() {
-    //    return _histogram_datas;
-    //}
     public List<DistanceMatrix> getSharedBinaryCombinationsBasedDistances() {
         return _shared_binary_combinations_based_distances;
     }
@@ -75,7 +71,6 @@ public class PairwiseGenomeComparator {
     }
 
     private void init() {
-        //_histogram_datas = new ArrayList<HistogramData>();
         _domain_distance_scores_means = new ArrayList<DistanceMatrix>();
         _shared_domains_based_distances = new ArrayList<DistanceMatrix>();
         _shared_binary_combinations_based_distances = new ArrayList<DistanceMatrix>();
@@ -102,7 +97,8 @@ public class PairwiseGenomeComparator {
                                             final String command_line_prg_name,
                                             final File out_dir,
                                             final boolean write_pairwise_comparisons,
-                                            final Map<String, Integer> tax_code_to_id_map ) {
+                                            final Map<String, Integer> tax_code_to_id_map,
+                                            final boolean calc_similarity_scores ) {
         init();
         final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
         final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
@@ -146,7 +142,8 @@ public class PairwiseGenomeComparator {
                 }
                 final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                              sort_by_species_count_first,
-                                                                                             true );
+                                                                                             true,
+                                                                                             calc_similarity_scores );
                 final SortedSet<DomainSimilarity> similarities = calc
                         .calculateSimilarities( pw_calc,
                                                 genome_pair,
@@ -203,39 +200,27 @@ public class PairwiseGenomeComparator {
                         }
                         break;
                 }
-                DescriptiveStatistics pw_stats = null;
                 if ( write_pairwise_comparisons ) {
                     try {
                         final Writer writer = new BufferedWriter( new FileWriter( out_dir == null ? pairwise_similarities_output_file_str
                                 : out_dir + ForesterUtil.FILE_SEPARATOR + pairwise_similarities_output_file_str ) );
-                        pw_stats = SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
-                                                                                new StringBuilder( species_i + "-"
-                                                                                        + species_j ),
-                                                                                writer,
-                                                                                null,
-                                                                                similarities,
-                                                                                true,
-                                                                                null,
-                                                                                domain_similarity_print_option,
-                                                                                scoring,
-                                                                                false,
-                                                                                tax_code_to_id_map,
-                                                                                false );
+                        SurfacingUtil.writeDomainSimilaritiesToFile( html_desc,
+                                                                     new StringBuilder( species_i + "-" + species_j ),
+                                                                     writer,
+                                                                     null,
+                                                                     similarities,
+                                                                     true,
+                                                                     null,
+                                                                     domain_similarity_print_option,
+                                                                     scoring,
+                                                                     false,
+                                                                     tax_code_to_id_map );
                     }
                     catch ( final IOException e ) {
                         ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
                                 + pairwise_similarities_output_file_str + "\" [" + e.getMessage() + "]" );
                     }
                 }
-                if ( pw_stats != null ) {
-                    if ( pw_stats.getMin() >= pw_stats.getMax() ) {
-                        ForesterUtil
-                                .printWarningMessage( command_line_prg_name, "for [" + species_i + "-" + species_j
-                                        + "] score minimum is [" + pw_stats.getMin() + "] while score maximum is ["
-                                        + pw_stats.getMax()
-                                        + "], possibly indicating that a genome is compared to itself" );
-                    }
-                }
             }
         }
         getDomainDistanceScoresMeans().add( domain_distance_scores_means );
index fc7d8b5..f9a13ef 100644 (file)
@@ -42,7 +42,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
     final public static String                              SPECIES_SEPARATOR = "  ";
     final private static int                                EQUAL             = 0;
     final private static String                             NO_SPECIES        = "     ";
-    private static final boolean                            PRINT_MORE_INFO   = false;
     final private double                                    _min;
     final private double                                    _max;
     final private double                                    _mean;
@@ -115,6 +114,43 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         }
     }
 
+    public PrintableDomainSimilarity( final CombinableDomains combinable_domains,
+                                      final int max_difference_in_counts,
+                                      final int max_difference,
+                                      final SortedMap<Species, SpeciesSpecificDcData> species_data,
+                                      final boolean sort_by_species_count_first,
+                                      final boolean treat_as_binary_comparison ) {
+        if ( combinable_domains == null ) {
+            throw new IllegalArgumentException( "attempt to use null combinable domains" );
+        }
+        if ( species_data == null ) {
+            throw new IllegalArgumentException( "attempt to use null species data" );
+        }
+        if ( species_data.size() < 1 ) {
+            throw new IllegalArgumentException( "attempt to use empty species data" );
+        }
+        init();
+        _combinable_domains = combinable_domains;
+        _min = -1;
+        _max = -1;
+        _mean = -1;
+        _sd = -1;
+        _n = -1;
+        _max_difference_in_counts = max_difference_in_counts;
+        _max_difference = max_difference;
+        _species_data = species_data;
+        _treat_as_binary_comparison = treat_as_binary_comparison;
+        final int s = species_data.size();
+        if ( s > 2 ) {
+            if ( getMaximalDifferenceInCounts() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference in counts with more than two species" );
+            }
+            if ( getMaximalDifference() < 0 ) {
+                throw new IllegalArgumentException( "attempt to use negative max difference with more than two species" );
+            }
+        }
+    }
+
     private void addSpeciesSpecificDomainData( final StringBuffer sb,
                                                final Species species,
                                                final boolean html,
@@ -341,23 +377,25 @@ public class PrintableDomainSimilarity implements DomainSimilarity {
         sb.append( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_SEARCH + getDomainId()
                 + "\" target=\"gs_window\">gs</a>" );
         sb.append( "</td>" );
-        sb.append( "<td>" );
-        sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
-        sb.append( "</td>" );
-        if ( PRINT_MORE_INFO ) {
-            if ( !isTreatAsBinaryComparison() ) {
-                sb.append( "<td>" );
-                sb.append( "(" );
-                sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
-                sb.append( ")" );
-                sb.append( "</td>" );
-                sb.append( "<td>" );
-                sb.append( "[" );
-                sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
-                sb.append( "-" );
-                sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
-                sb.append( "]" );
-                sb.append( "</td>" );
+        if ( getMaximalSimilarityScore() > 0 ) {
+            sb.append( "<td>" );
+            sb.append( ForesterUtil.round( getMeanSimilarityScore(), 3 ) );
+            sb.append( "</td>" );
+            if ( SurfacingConstants.PRINT_MORE_DOM_SIMILARITY_INFO ) {
+                if ( !isTreatAsBinaryComparison() ) {
+                    sb.append( "<td>" );
+                    sb.append( "(" );
+                    sb.append( ForesterUtil.round( getStandardDeviationOfSimilarityScore(), 3 ) );
+                    sb.append( ")" );
+                    sb.append( "</td>" );
+                    sb.append( "<td>" );
+                    sb.append( "[" );
+                    sb.append( ForesterUtil.round( getMinimalSimilarityScore(), 3 ) );
+                    sb.append( "-" );
+                    sb.append( ForesterUtil.round( getMaximalSimilarityScore(), 3 ) );
+                    sb.append( "]" );
+                    sb.append( "</td>" );
+                }
             }
         }
         sb.append( "<td>" );
index 309d321..8c42787 100644 (file)
@@ -35,8 +35,13 @@ public class ProteinCountsBasedPairwiseDomainSimilarityCalculator implements Pai
         if ( !domains_1.getKeyDomain().equals( domains_2.getKeyDomain() ) ) {
             throw new IllegalArgumentException( "attempt to calculate similarity between domain collection with different keys" );
         }
-        final int pc1 = domains_1.getKeyDomainProteinsCount();
-        final int pc2 = domains_2.getKeyDomainProteinsCount();
-        return new CountsBasedPairwiseDomainSimilarity( pc1 - pc2, pc1 + pc2 );
+        if ( ( domains_1.getKeyDomainProteinsCount() > Short.MAX_VALUE )
+                || ( domains_2.getKeyDomainProteinsCount() > Short.MAX_VALUE )
+                || ( ( domains_1.getKeyDomainProteinsCount() + domains_2.getKeyDomainCount() ) > Short.MAX_VALUE ) ) {
+            throw new IllegalArgumentException( "too large for short!" );
+        }
+        final short pc1 = ( short ) domains_1.getKeyDomainProteinsCount();
+        final short pc2 = ( short ) domains_2.getKeyDomainProteinsCount();
+        return new CountsBasedPairwiseDomainSimilarity( ( short ) ( pc1 - pc2 ), ( short ) ( pc1 + pc2 ) );
     }
 }
index 9709508..aa588a0 100644 (file)
@@ -30,15 +30,16 @@ import org.forester.util.ForesterUtil;
 
 public class SurfacingConstants {
 
-    public static final String AMIGO_LINK                   = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
-    public static final String EOL_LINK                     = "http://www.eol.org/search?q=";
-    public static final String GO_LINK                      = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
-    public static final String GOOGLE_SCHOLAR_SEARCH        = "http://scholar.google.com/scholar?q=";
-    public static final String GOOGLE_WEB_SEARCH_LINK       = "http://www.google.com/search?q=";
-    public static final String NL                           = ForesterUtil.LINE_SEPARATOR;
-    public static final String NONE                         = "[none]";
-    public static final String PFAM_FAMILY_ID_LINK          = "http://pfam.janelia.org/family/";
-    public static final String UNIPROT_TAXONOMY_ID_LINK     = "http://www.uniprot.org/taxonomy/";
-    static final boolean       SECONDARY_FEATURES_ARE_SCOP  = true;
-    static final String        SECONDARY_FEATURES_SCOP_LINK = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
+    public static final String AMIGO_LINK                     = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
+    public static final String EOL_LINK                       = "http://www.eol.org/search?q=";
+    public static final String GO_LINK                        = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&query=";
+    public static final String GOOGLE_SCHOLAR_SEARCH          = "http://scholar.google.com/scholar?q=";
+    public static final String GOOGLE_WEB_SEARCH_LINK         = "http://www.google.com/search?q=";
+    public static final String NL                             = ForesterUtil.LINE_SEPARATOR;
+    public static final String NONE                           = "[none]";
+    public static final String PFAM_FAMILY_ID_LINK            = "http://pfam.janelia.org/family/";
+    public static final String UNIPROT_TAXONOMY_ID_LINK       = "http://www.uniprot.org/taxonomy/";
+    static final boolean       SECONDARY_FEATURES_ARE_SCOP    = true;
+    static final String        SECONDARY_FEATURES_SCOP_LINK   = "http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi?key=";
+    static final boolean       PRINT_MORE_DOM_SIMILARITY_INFO = false;
 }
index 70d5654..8c0c8b4 100644 (file)
@@ -1665,36 +1665,18 @@ public final class SurfacingUtil {
         }
     }
 
-    public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
-                                                                       final StringBuilder html_title,
-                                                                       final Writer single_writer,
-                                                                       Map<Character, Writer> split_writers,
-                                                                       final SortedSet<DomainSimilarity> similarities,
-                                                                       final boolean treat_as_binary,
-                                                                       final List<Species> species_order,
-                                                                       final PrintableDomainSimilarity.PRINT_OPTION print_option,
-                                                                       final DomainSimilarity.DomainSimilarityScoring scoring,
-                                                                       final boolean verbose,
-                                                                       final Map<String, Integer> tax_code_to_id_map,
-                                                                       final boolean print_some_stats )
+    public static void writeDomainSimilaritiesToFile( final StringBuilder html_desc,
+                                                      final StringBuilder html_title,
+                                                      final Writer single_writer,
+                                                      Map<Character, Writer> split_writers,
+                                                      final SortedSet<DomainSimilarity> similarities,
+                                                      final boolean treat_as_binary,
+                                                      final List<Species> species_order,
+                                                      final PrintableDomainSimilarity.PRINT_OPTION print_option,
+                                                      final DomainSimilarity.DomainSimilarityScoring scoring,
+                                                      final boolean verbose,
+                                                      final Map<String, Integer> tax_code_to_id_map )
             throws IOException {
-        DescriptiveStatistics stats = null;
-        AsciiHistogram histo = null;
-        if ( print_some_stats ) {
-            stats = new BasicDescriptiveStatistics();
-            final String histogram_title = "score mean distribution:";
-            for( final DomainSimilarity similarity : similarities ) {
-                stats.addValue( similarity.getMeanSimilarityScore() );
-            }
-            try {
-                if ( stats.getMin() < stats.getMax() ) {
-                    histo = new AsciiHistogram( stats, histogram_title );
-                }
-            }
-            catch ( final Exception e ) {
-                histo = null;
-            }
-        }
         if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
             split_writers = new HashMap<Character, Writer>();
             split_writers.put( '_', single_writer );
@@ -1718,9 +1700,6 @@ public final class SurfacingUtil {
                     w.write( SurfacingConstants.NL );
                     w.write( html_desc.toString() );
                     w.write( SurfacingConstants.NL );
-                    if ( print_some_stats ) {
-                        printSomeStats( stats, histo, w );
-                    }
                     w.write( "<hr>" );
                     w.write( SurfacingConstants.NL );
                     w.write( "<br>" );
@@ -1798,7 +1777,6 @@ public final class SurfacingUtil {
         for( final Writer w : split_writers.values() ) {
             w.close();
         }
-        return stats;
     }
 
     private static void printSomeStats( final DescriptiveStatistics stats, final AsciiHistogram histo, final Writer w )
index 3a79d7b..c2b857c 100644 (file)
@@ -327,7 +327,8 @@ public class TestSurfacing {
                                                                            new BasicSpecies( "nemve" ) ) );
             final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                          false,
-                                                                                         false );
+                                                                                         false,
+                                                                                         true );
             final SortedSet<DomainSimilarity> sims = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
@@ -477,7 +478,8 @@ public class TestSurfacing {
                                                                             new BasicSpecies( "nemve" ) ) );
             final DomainSimilarityCalculator calc2 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                           false,
-                                                                                          false );
+                                                                                          false,
+                                                                                          true );
             final SortedSet<DomainSimilarity> sims2 = calc2
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list2,
@@ -564,7 +566,8 @@ public class TestSurfacing {
                                                                             new BasicSpecies( "nemve" ) ) );
             final DomainSimilarityCalculator calc3 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                           false,
-                                                                                          false );
+                                                                                          false,
+                                                                                          true );
             final SortedSet<DomainSimilarity> sims3 = calc3
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list3,
@@ -607,7 +610,8 @@ public class TestSurfacing {
                                                                             new BasicSpecies( "nemve" ) ) );
             final DomainSimilarityCalculator calc4 = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                           true,
-                                                                                          false );
+                                                                                          false,
+                                                                                          true );
             final SortedSet<DomainSimilarity> sims4 = calc4
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list4,
@@ -1038,7 +1042,8 @@ public class TestSurfacing {
                                                                            new BasicSpecies( "nemve" ) ) );
             final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                          false,
-                                                                                         false );
+                                                                                         false,
+                                                                                         true );
             final SortedSet<DomainSimilarity> sims = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,
@@ -1146,7 +1151,8 @@ public class TestSurfacing {
                                                                            new BasicSpecies( "nemve" ) ) );
             final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( DomainSimilarity.DomainSimilaritySortField.DOMAIN_ID,
                                                                                          false,
-                                                                                         false );
+                                                                                         false,
+                                                                                         true );
             final SortedSet<DomainSimilarity> sims = calc
                     .calculateSimilarities( new CombinationsBasedPairwiseDomainSimilarityCalculator(),
                                             cdc_list,