public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+ private static final boolean PERFORM_DC_REGAIN_PROTEINS_STATS = true;
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
catch ( final IOException e3 ) {
e3.printStackTrace();
}
- final Map<String, DescriptiveStatistics> protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
- final Map<String, DescriptiveStatistics> domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ Map<String, DescriptiveStatistics> protein_length_stats_by_dc = null;
+ Map<String, DescriptiveStatistics> domain_number_stats_by_dc = null;
final Map<String, DescriptiveStatistics> domain_length_stats_by_domain = new HashMap<String, DescriptiveStatistics>();
+ if ( PERFORM_DC_REGAIN_PROTEINS_STATS ) {
+ protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ }
// Main loop:
for( int i = 0; i < number_of_genomes; ++i ) {
System.out.println();
final DescriptiveStatistics gained_once_domain_count_stats = new BasicDescriptiveStatistics();
final DescriptiveStatistics gained_multiple_times_lengths_stats = new BasicDescriptiveStatistics();
final DescriptiveStatistics gained_multiple_times_domain_count_stats = new BasicDescriptiveStatistics();
- final DescriptiveStatistics gained_multiple_times_domain_length_stats = new BasicDescriptiveStatistics();
- final DescriptiveStatistics gained_once_domain_length_stats = new BasicDescriptiveStatistics();
+ long gained_multiple_times_domain_length_sum = 0;
+ long gained_once_domain_length_sum = 0;
+ long gained_multiple_times_domain_length_count = 0;
+ long gained_once_domain_length_count = 0;
for( final String dc : dcs ) {
final int count = dc_gain_counts.get( dc );
if ( histogram.containsKey( count ) ) {
more_than_once.add( dc );
if ( protein_length_stats_by_dc != null ) {
final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
- final double[] a = s.getDataAsDoubleArray();
- for( final double element : a ) {
+ for( final double element : s.getData() ) {
gained_multiple_times_lengths_stats.addValue( element );
}
}
if ( domain_number_stats_by_dc != null ) {
final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
- final double[] a = s.getDataAsDoubleArray();
- for( final double element : a ) {
+ for( final double element : s.getData() ) {
gained_multiple_times_domain_count_stats.addValue( element );
}
}
final String[] ds = dc.split( "=" );
final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
- final double[] a0 = s0.getDataAsDoubleArray();
- final double[] a1 = s1.getDataAsDoubleArray();
- for( final double element : a0 ) {
- gained_multiple_times_domain_length_stats.addValue( element );
+ for( final double element : s0.getData() ) {
+ gained_multiple_times_domain_length_sum += element;
+ ++gained_multiple_times_domain_length_count;
}
- for( final double element : a1 ) {
- gained_multiple_times_domain_length_stats.addValue( element );
+ for( final double element : s1.getData() ) {
+ gained_multiple_times_domain_length_sum += element;
+ ++gained_multiple_times_domain_length_count;
}
}
}
else {
if ( protein_length_stats_by_dc != null ) {
final DescriptiveStatistics s = protein_length_stats_by_dc.get( dc );
- final double[] a = s.getDataAsDoubleArray();
- for( final double element : a ) {
+ for( final double element : s.getData() ) {
gained_once_lengths_stats.addValue( element );
}
}
if ( domain_number_stats_by_dc != null ) {
final DescriptiveStatistics s = domain_number_stats_by_dc.get( dc );
- final double[] a = s.getDataAsDoubleArray();
- for( final double element : a ) {
+ for( final double element : s.getData() ) {
gained_once_domain_count_stats.addValue( element );
}
}
final String[] ds = dc.split( "=" );
final DescriptiveStatistics s0 = domain_length_stats_by_domain.get( ds[ 0 ] );
final DescriptiveStatistics s1 = domain_length_stats_by_domain.get( ds[ 1 ] );
- final double[] a0 = s0.getDataAsDoubleArray();
- final double[] a1 = s1.getDataAsDoubleArray();
- for( final double element : a0 ) {
- gained_once_domain_length_stats.addValue( element );
+ for( final double element : s0.getData() ) {
+ gained_once_domain_length_sum += element;
+ ++gained_once_domain_length_count;
}
- for( final double element : a1 ) {
- gained_once_domain_length_stats.addValue( element );
+ for( final double element : s1.getData() ) {
+ gained_once_domain_length_sum += element;
+ ++gained_once_domain_length_count;
}
}
}
out_for_rank_counts.close();
out_for_ancestor_species_counts.close();
if ( !ForesterUtil.isEmpty( outfilename_for_protein_stats )
- && ( ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
+ && ( ( domain_length_stats_by_domain != null ) || ( protein_length_stats_by_dc != null ) || ( domain_number_stats_by_dc != null ) ) ) {
final BufferedWriter w = new BufferedWriter( new FileWriter( outfilename_for_protein_stats ) );
w.write( "Domain Lengths: " );
w.write( "\n" );
w.write( "\n" );
w.write( "Gained once, domain lengths:" );
w.write( "\n" );
- w.write( gained_once_domain_length_stats.toString() );
+ w.write( "N: " + gained_once_domain_length_count );
+ w.write( "\n" );
+ w.write( "Avg: " + ( ( double ) gained_once_domain_length_sum / gained_once_domain_length_count ) );
w.write( "\n" );
w.write( "\n" );
w.write( "Gained multiple times, domain lengths:" );
w.write( "\n" );
- w.write( gained_multiple_times_domain_length_stats.toString() );
+ w.write( "N: " + gained_multiple_times_domain_length_count );
+ w.write( "\n" );
+ w.write( "Avg: "
+ + ( ( double ) gained_multiple_times_domain_length_sum / gained_multiple_times_domain_length_count ) );
w.write( "\n" );
w.write( "\n" );
w.write( "\n" );