From: cmzmasek@gmail.com Date: Tue, 8 May 2012 03:58:39 +0000 (+0000) Subject: in progress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=78b77ac2a4a069d0a2e53dc5e7652838c58acd0c;p=jalview.git in progress --- diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 20e5b1f..b7a44d7 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -286,6 +286,7 @@ public class surfacing { public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt"; + private static final boolean PERFORM_DC_REGAIN_PROTEINS_STATS = true; private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, final String[][] input_file_properties, @@ -1760,9 +1761,13 @@ public class surfacing { catch ( final IOException e3 ) { e3.printStackTrace(); } - final Map protein_length_stats_by_dc = new HashMap(); - final Map domain_number_stats_by_dc = new HashMap(); + Map protein_length_stats_by_dc = null; + Map domain_number_stats_by_dc = null; final Map domain_length_stats_by_domain = new HashMap(); + if ( PERFORM_DC_REGAIN_PROTEINS_STATS ) { + protein_length_stats_by_dc = new HashMap(); + domain_number_stats_by_dc = new HashMap(); + } // Main loop: for( int i = 0; i < number_of_genomes; ++i ) { System.out.println(); diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 974409d..a22bdad 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -238,8 +238,10 @@ public final class SurfacingUtil { final DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics(); final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics(); - final DescriptiveStatistics gained_multiple_times_domain_length_stats = new BasicDescriptiveStatistics(); - final DescriptiveStatistics gained_once_domain_length_stats = new BasicDescriptiveStatistics(); + long gained_multiple_times_domain_length_sum = 0; + long gained_once_domain_length_sum = 0; + long gained_multiple_times_domain_length_count = 0; + long gained_once_domain_length_count = 0; for( final String dc : dcs ) { final int count = dc_gain_counts.get( dc ); if ( histogram.containsKey( count ) ) { @@ -286,15 +288,13 @@ public final class SurfacingUtil { more_than_once.add( dc ); if ( protein_length_stats_by_dc != null ) { final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_multiple_times_lengths_stats.addValue( element ); } } if ( domain_number_stats_by_dc != null ) { final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_multiple_times_domain_count_stats.addValue( element ); } } @@ -302,28 +302,26 @@ public final class SurfacingUtil { final String[] ds = dc.split( "=" ); final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] ); final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] ); - final double[] a0 = s0.getDataAsDoubleArray(); - final double[] a1 = s1.getDataAsDoubleArray(); - for( final double element : a0 ) { - gained_multiple_times_domain_length_stats.addValue( element ); + for( final double element : s0.getData() ) { + gained_multiple_times_domain_length_sum += element; + ++gained_multiple_times_domain_length_count; } - for( final double element : a1 ) { - gained_multiple_times_domain_length_stats.addValue( element ); + for( final double element : s1.getData() ) { + gained_multiple_times_domain_length_sum += element; + ++gained_multiple_times_domain_length_count; } } } else { if ( protein_length_stats_by_dc != null ) { final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_once_lengths_stats.addValue( element ); } } if ( domain_number_stats_by_dc != null ) { final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc ); - final double[] a = s.getDataAsDoubleArray(); - for( final double element : a ) { + for( final double element : s.getData() ) { gained_once_domain_count_stats.addValue( element ); } } @@ -331,13 +329,13 @@ public final class SurfacingUtil { final String[] ds = dc.split( "=" ); final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] ); final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] ); - final double[] a0 = s0.getDataAsDoubleArray(); - final double[] a1 = s1.getDataAsDoubleArray(); - for( final double element : a0 ) { - gained_once_domain_length_stats.addValue( element ); + for( final double element : s0.getData() ) { + gained_once_domain_length_sum += element; + ++gained_once_domain_length_count; } - for( final double element : a1 ) { - gained_once_domain_length_stats.addValue( element ); + for( final double element : s1.getData() ) { + gained_once_domain_length_sum += element; + ++gained_once_domain_length_count; } } } @@ -409,7 +407,7 @@ public final class SurfacingUtil { out_for_rank_counts.close(); out_for_ancestor_species_counts.close(); if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats ) - && ( ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) { + && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) { final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) ); w.write( "Domain Lengths: " ); w.write( "\n" ); @@ -455,12 +453,17 @@ public final class SurfacingUtil { w.write( "\n" ); w.write( "Gained once, domain lengths:" ); w.write( "\n" ); - w.write( gained_once_domain_length_stats.toString() ); + w.write( "N: " + gained_once_domain_length_count ); + w.write( "\n" ); + w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) ); w.write( "\n" ); w.write( "\n" ); w.write( "Gained multiple times, domain lengths:" ); w.write( "\n" ); - w.write( gained_multiple_times_domain_length_stats.toString() ); + w.write( "N: " + gained_multiple_times_domain_length_count ); + w.write( "\n" ); + w.write( "Avg: " + + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) ); w.write( "\n" ); w.write( "\n" ); w.write( "\n" ); diff --git a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java index 8ec95e7..5e8931c 100644 --- a/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java +++ b/forester/java/src/org/forester/util/BasicDescriptiveStatistics.java @@ -356,4 +356,9 @@ public class BasicDescriptiveStatistics implements DescriptiveStatistics { public static double sampleStandardUnit( final double value, final double mean, final double sd ) { return ( value - mean ) / sd; } + + @Override + public List getData() { + return _data; + } } diff --git a/forester/java/src/org/forester/util/DescriptiveStatistics.java b/forester/java/src/org/forester/util/DescriptiveStatistics.java index f5a8a7f..484b00f 100644 --- a/forester/java/src/org/forester/util/DescriptiveStatistics.java +++ b/forester/java/src/org/forester/util/DescriptiveStatistics.java @@ -25,6 +25,8 @@ package org.forester.util; +import java.util.List; + public interface DescriptiveStatistics { public final static String PLUS_MINUS = "" + ( char ) 177; @@ -45,6 +47,8 @@ public interface DescriptiveStatistics { public abstract double[] getDataAsDoubleArray(); + public abstract List getData(); + public abstract double getMax(); public abstract double getMin();