in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 May 2012 03:58:39 +0000 (03:58 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 May 2012 03:58:39 +0000 (03:58 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java
forester/java/src/org/forester/util/BasicDescriptiveStatistics.java
forester/java/src/org/forester/util/DescriptiveStatistics.java

index 20e5b1f..b7a44d7 100644 (file)
@@ -286,6 +286,7 @@ public class surfacing {
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX                       = "_indep_dc_gains_fitch_lists_MAPPED.txt";
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX        = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
     public static final String                                INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+    private static final boolean                              PERFORM_DC_REGAIN_PROTEINS_STATS                                              = true;
 
     private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
                                                                  final String[][] input_file_properties,
@@ -1760,9 +1761,13 @@ public class surfacing {
         catch ( final IOException e3 ) {
             e3.printStackTrace();
         }
-        final Map<String, DescriptiveStatistics> protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
-        final Map<String, DescriptiveStatistics> domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+        Map<String, DescriptiveStatistics> protein_length_stats_by_dc = null;
+        Map<String, DescriptiveStatistics> domain_number_stats_by_dc = null;
         final Map<String, DescriptiveStatistics> domain_length_stats_by_domain = new HashMap<String, DescriptiveStatistics>();
+        if ( PERFORM_DC_REGAIN_PROTEINS_STATS ) {
+            protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+            domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+        }
         // Main loop:
         for( int i = 0; i < number_of_genomes; ++i ) {
             System.out.println();
index 974409d..a22bdad 100644 (file)
@@ -238,8 +238,10 @@ public final class SurfacingUtil {
             final DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics();
             final DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics();
             final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics();
-            final DescriptiveStatistics gained_multiple_times_domain_length_stats = new BasicDescriptiveStatistics();
-            final DescriptiveStatistics gained_once_domain_length_stats = new BasicDescriptiveStatistics();
+            long gained_multiple_times_domain_length_sum = 0;
+            long gained_once_domain_length_sum = 0;
+            long gained_multiple_times_domain_length_count = 0;
+            long gained_once_domain_length_count = 0;
             for( final String dc : dcs ) {
                 final int count = dc_gain_counts.get( dc );
                 if ( histogram.containsKey( count ) ) {
@@ -286,15 +288,13 @@ public final class SurfacingUtil {
                     more_than_once.add( dc );
                     if ( protein_length_stats_by_dc != null ) {
                         final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
-                        final double[] a = s.getDataAsDoubleArray();
-                        for( final double element : a ) {
+                        for( final double element : s.getData() ) {
                             gained_multiple_times_lengths_stats.addValue( element );
                         }
                     }
                     if ( domain_number_stats_by_dc != null ) {
                         final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
-                        final double[] a = s.getDataAsDoubleArray();
-                        for( final double element : a ) {
+                        for( final double element : s.getData() ) {
                             gained_multiple_times_domain_count_stats.addValue( element );
                         }
                     }
@@ -302,28 +302,26 @@ public final class SurfacingUtil {
                         final String[] ds = dc.split( "=" );
                         final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
                         final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
-                        final double[] a0 = s0.getDataAsDoubleArray();
-                        final double[] a1 = s1.getDataAsDoubleArray();
-                        for( final double element : a0 ) {
-                            gained_multiple_times_domain_length_stats.addValue( element );
+                        for( final double element : s0.getData() ) {
+                            gained_multiple_times_domain_length_sum += element;
+                            ++gained_multiple_times_domain_length_count;
                         }
-                        for( final double element : a1 ) {
-                            gained_multiple_times_domain_length_stats.addValue( element );
+                        for( final double element : s1.getData() ) {
+                            gained_multiple_times_domain_length_sum += element;
+                            ++gained_multiple_times_domain_length_count;
                         }
                     }
                 }
                 else {
                     if ( protein_length_stats_by_dc != null ) {
                         final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
-                        final double[] a = s.getDataAsDoubleArray();
-                        for( final double element : a ) {
+                        for( final double element : s.getData() ) {
                             gained_once_lengths_stats.addValue( element );
                         }
                     }
                     if ( domain_number_stats_by_dc != null ) {
                         final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
-                        final double[] a = s.getDataAsDoubleArray();
-                        for( final double element : a ) {
+                        for( final double element : s.getData() ) {
                             gained_once_domain_count_stats.addValue( element );
                         }
                     }
@@ -331,13 +329,13 @@ public final class SurfacingUtil {
                         final String[] ds = dc.split( "=" );
                         final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
                         final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
-                        final double[] a0 = s0.getDataAsDoubleArray();
-                        final double[] a1 = s1.getDataAsDoubleArray();
-                        for( final double element : a0 ) {
-                            gained_once_domain_length_stats.addValue( element );
+                        for( final double element : s0.getData() ) {
+                            gained_once_domain_length_sum += element;
+                            ++gained_once_domain_length_count;
                         }
-                        for( final double element : a1 ) {
-                            gained_once_domain_length_stats.addValue( element );
+                        for( final double element : s1.getData() ) {
+                            gained_once_domain_length_sum += element;
+                            ++gained_once_domain_length_count;
                         }
                     }
                 }
@@ -409,7 +407,7 @@ public final class SurfacingUtil {
             out_for_rank_counts.close();
             out_for_ancestor_species_counts.close();
             if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats )
-                    && ( ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
+                    && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
                 final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) );
                 w.write( "Domain Lengths: " );
                 w.write( "\n" );
@@ -455,12 +453,17 @@ public final class SurfacingUtil {
                 w.write( "\n" );
                 w.write( "Gained once, domain lengths:" );
                 w.write( "\n" );
-                w.write( gained_once_domain_length_stats.toString() );
+                w.write( "N: " + gained_once_domain_length_count );
+                w.write( "\n" );
+                w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) );
                 w.write( "\n" );
                 w.write( "\n" );
                 w.write( "Gained multiple times, domain lengths:" );
                 w.write( "\n" );
-                w.write( gained_multiple_times_domain_length_stats.toString() );
+                w.write( "N: " + gained_multiple_times_domain_length_count );
+                w.write( "\n" );
+                w.write( "Avg: "
+                        + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) );
                 w.write( "\n" );
                 w.write( "\n" );
                 w.write( "\n" );
index 8ec95e7..5e8931c 100644 (file)
@@ -356,4 +356,9 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics {
     public static double sampleStandardUnit( final double value, final double mean, final double sd ) {
         return ( value - mean ) / sd;
     }
+
+    @Override
+    public List<Double> getData() {
+        return _data;
+    }
 }
index f5a8a7f..484b00f 100644 (file)
@@ -25,6 +25,8 @@
 
 package org.forester.util;
 
+import java.util.List;
+
 public interface DescriptiveStatistics {
 
     public final static String PLUS_MINUS = "" + ( char ) 177;
@@ -45,6 +47,8 @@ public interface DescriptiveStatistics {
 
     public abstract double[] getDataAsDoubleArray();
 
+    public abstract List<Double> getData();
+
     public abstract double getMax();
 
     public abstract double getMin();