From 504b2b133e9814ac9ee966dc04a1408c455c6a2f Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 30 Apr 2014 19:59:22 +0000 Subject: [PATCH] inprogress --- .../org/forester/application/msa_compactor.java | 42 +++++++++------- .../org/forester/msa_compactor/MsaCompactor.java | 51 ++++++++++---------- 2 files changed, 50 insertions(+), 43 deletions(-) diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index 68639ff..7f65269 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -134,7 +134,7 @@ public class msa_compactor { final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa ); final boolean chart_only = ( !cla.isOptionSet( LENGTH_OPTION ) ) && ( !cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) - && ( !cla.isOptionSet( AV_GAPINESS_OPTION ) ) && ( !cla.isOptionSet( MIN_LENGTH_OPTION ) ); + && ( !cla.isOptionSet( AV_GAPINESS_OPTION ) ); if ( !chart_only && ( out == null ) ) { ForesterUtil.fatalError( PRG_NAME, "outfile file missing" ); } @@ -230,17 +230,20 @@ public class msa_compactor { if ( cla.isOptionSet( MAFFT_OPTIONS ) ) { mafft_options = cla.getOptionValueAsCleanString( MAFFT_OPTIONS ); if ( ForesterUtil.isEmpty( mafft_options ) || ( mafft_options.length() < 3 ) ) { - ForesterUtil.fatalError( PRG_NAME, "gap ratio is out of range: " + gap_ratio ); + ForesterUtil.fatalError( PRG_NAME, "illegal or empty MAFFT options: " + mafft_options ); } } } + else if ( cla.isOptionSet( MAFFT_OPTIONS ) ) { + ForesterUtil.fatalError( PRG_NAME, "no need to indicate MAFFT options without realigning" ); + } if ( chart_only ) { if ( ( out != null ) || ( removed_seqs_out_base != null ) ) { ForesterUtil .fatalError( PRG_NAME, "chart only, no outfile(s) produced, thus no need to indicate output file(s)" ); } - if ( !realign && ( step > 1 ) ) { + if ( !realign && cla.isOptionSet( STEP_OPTION ) ) { ForesterUtil.fatalError( PRG_NAME, "chart only, no re-aligning, thus no need to use step for output and re-aligning; use -" + STEP_FOR_DIAGNOSTICS_OPTION + " instead" ); @@ -261,7 +264,7 @@ public class msa_compactor { + NF_1.format( initial_msa_stats.arithmeticMean() ) ); System.out.println( " Max sequence length : " + ( ( int ) initial_msa_stats.getMax() ) ); System.out.println( " Min sequence length : " + ( ( int ) initial_msa_stats.getMin() ) ); - if ( out != null ) { + if ( !chart_only ) { System.out.println( "Output : " + out ); } else { @@ -273,20 +276,33 @@ public class msa_compactor { if ( worst_remove > 0 ) { System.out.println( "Number of worst offenders to remove : " + worst_remove ); } - else if ( av_gap > 0 ) { + if ( av_gap > 0 ) { System.out.println( "Target gap-ratio : " + av_gap ); } - else if ( length > 0 ) { + if ( length > 0 ) { System.out.println( "Target MSA length : " + length ); } - else { - System.out.println( "Chart and diagnostics only : true" ); + if ( min_length > 1 ) { + System.out.println( "Minimal effective sequence length : " + min_length ); + } + if ( gap_ratio > -1 ) { + System.out.println( "Maximum allowed gap ratio per column : " + gap_ratio ); } if ( ( out != null ) || ( removed_seqs_out_base != null ) ) { System.out.println( "Output format : " + ( output_format == MSA_FORMAT.FASTA ? "fasta" : "phylip" ) ); } - System.out.println( "Step for output and re-aligning : " + step ); + if ( chart_only && !realign ) { + System.out.println( "Step for output and re-aligning : n/a" ); + } + else { + if ( chart_only ) { + System.out.println( "Step for re-aligning : " + step ); + } + else { + System.out.println( "Step for output and re-aligning : " + step ); + } + } System.out.println( "Step for diagnostics reports : " + step_for_diagnostics ); System.out.println( "Calculate mean identity : " + report_aln_mean_identity ); if ( !norm ) { @@ -296,15 +312,7 @@ public class msa_compactor { if ( realign ) { System.out.println( "MAFFT options : " + mafft_options ); } - if ( min_length > 1 ) { - System.out.println( "Minimal effective sequence length : " + min_length ); - } - if ( gap_ratio > -1 ) { - System.out.println( "Maximum allowed gap ratio per column : " + gap_ratio ); - } System.out.println(); - // - // final int initial_number_of_seqs = msa.getNumberOfSequences(); List msa_props = null; final MsaCompactor mc = new MsaCompactor( msa ); diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index 4a07813..366d5bd 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -99,8 +99,10 @@ public class MsaCompactor { for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } - final boolean print_id = ( _step < 2 ) && ( _step_for_diagnostics < 2 ); - printTableHeader( print_id ); + if ( !_realign ) { + _step = -1; + } + printTableHeader(); int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 ); if ( x < 1 ) { x = 1; @@ -134,6 +136,10 @@ public class MsaCompactor { return msa_props; } + final public void deleteGapColumns( final double max_allowed_gap_ratio ) { + _msa.deleteGapColumns( max_allowed_gap_ratio ); + } + final public Msa getMsa() { return _msa; } @@ -142,6 +148,15 @@ public class MsaCompactor { return _removed_seq_ids; } + public final void removeSequencesByMinimalLength( final int min_effective_length ) { + printMsaProperties( "", new MsaProperties( _msa, _report_aln_mean_identity ) ); + System.out.println(); + _msa = DeleteableMsa.createInstance( MsaMethods.removeSequencesByMinimalLength( _msa, min_effective_length ) ); + removeGapColumns(); + printMsaProperties( "", new MsaProperties( _msa, _report_aln_mean_identity ) ); + System.out.println(); + } + public final List removeViaGapAverage( final double mean_gapiness ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _norm ); @@ -150,8 +165,7 @@ public class MsaCompactor { for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } - final boolean print_id = ( _step < 2 ) || ( _step_for_diagnostics < 2 ); - printTableHeader( print_id ); + printTableHeader(); MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); msa_props.add( msa_prop ); printMsaProperties( "", msa_prop ); @@ -191,8 +205,7 @@ public class MsaCompactor { for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } - final boolean print_id = ( _step < 2 ) || ( _step_for_diagnostics < 2 ); - printTableHeader( print_id ); + printTableHeader(); MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); msa_props.add( msa_prop ); printMsaProperties( "", msa_prop ); @@ -225,15 +238,6 @@ public class MsaCompactor { return msa_props; } - public final void removeSequencesByMinimalLength( final int min_effective_length ) { - printMsaProperties( "", new MsaProperties( _msa, _report_aln_mean_identity ) ); - System.out.println(); - _msa = DeleteableMsa.createInstance( MsaMethods.removeSequencesByMinimalLength( _msa, min_effective_length ) ); - removeGapColumns(); - printMsaProperties( "", new MsaProperties( _msa, _report_aln_mean_identity ) ); - System.out.println(); - } - public final List removeWorstOffenders( final int to_remove ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _norm ); @@ -243,8 +247,7 @@ public class MsaCompactor { to_remove_ids.add( stats[ j ].getId() ); _removed_seq_ids.add( stats[ j ].getId() ); } - final boolean print_id = ( _step < 2 ) || ( _step_for_diagnostics < 2 ); - printTableHeader( print_id ); + printTableHeader(); MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); msa_props.add( msa_prop ); printMsaProperties( "", msa_prop ); @@ -275,10 +278,6 @@ public class MsaCompactor { return msa_props; } - final public void deleteGapColumns( final double max_allowed_gap_ratio ) { - _msa.deleteGapColumns( max_allowed_gap_ratio ); - } - public final void setGapRatio( final double gap_ratio ) { _gap_ratio = gap_ratio; } @@ -436,11 +435,11 @@ public class MsaCompactor { } private final boolean isPrintMsaStats( final int i ) { - return ( ( ( _step < 2 ) && ( _step_for_diagnostics < 2 ) ) || ( ( _step_for_diagnostics > 0 ) && ( ( ( i + 1 ) % _step_for_diagnostics ) == 0 ) ) ); + return ( ( ( _step == 1 ) && ( _step_for_diagnostics == 1 ) ) || ( ( _step_for_diagnostics > 0 ) && ( ( ( i + 1 ) % _step_for_diagnostics ) == 0 ) ) ); } private final boolean isPrintMsaStatsWriteOutfileAndRealign( final int i ) { - return ( ( ( _step < 2 ) && ( _step_for_diagnostics < 2 ) ) || ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) ); + return ( ( ( _step == 1 ) && ( _step_for_diagnostics == 1 ) ) || ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) ); } private final StringBuilder msaPropertiesAsSB( final MsaProperties msa_properties ) { @@ -486,7 +485,7 @@ public class MsaCompactor { } private final void printMsaProperties( final String id, final MsaProperties msa_properties ) { - if ( ( _step < 2 ) || ( _step_for_diagnostics < 2 ) ) { + if ( ( _step == 1 ) || ( _step_for_diagnostics == 1 ) ) { System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) ); System.out.print( "\t" ); } @@ -506,8 +505,8 @@ public class MsaCompactor { return msa_prop; } - private final void printTableHeader( final boolean print_id ) { - if ( print_id ) { + private final void printTableHeader() { + if ( ( _step == 1 ) || ( _step_for_diagnostics == 1 ) ) { System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) ); System.out.print( "\t" ); } -- 1.7.10.2