final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
+ private double _gap_ratio = -1;
//
private final String _maffts_opts = "--auto";
- private int _step = 1;
- //
- private boolean _realign = false;
- private boolean _norm = true;
- private int _step_for_diagnostics = 1;
private int _min_length = -1;
- private double _gap_ratio = -1;
- private final boolean _report_aln_mean_identity = false;
- private MSA_FORMAT _output_format = MSA_FORMAT.FASTA;
- private final File _removed_seqs_out_base = null;
//
private DeleteableMsa _msa;
+ private boolean _norm = true;
private File _out_file_base;
+ private MSA_FORMAT _output_format = MSA_FORMAT.FASTA;
private String _path_to_mafft;
+ //
+ private boolean _realign = false;
private final SortedSet<String> _removed_seq_ids;
+ private final File _removed_seqs_out_base = null;
+ private final boolean _report_aln_mean_identity = false;
+ private int _step = 1;
+ private int _step_for_diagnostics = 1;
static {
NF_4.setRoundingMode( RoundingMode.HALF_UP );
NF_3.setRoundingMode( RoundingMode.HALF_UP );
_removed_seq_ids = new TreeSet<String>();
}
+ public final List<MsaProperties> chart( final int step,
+ final boolean realign,
+ final boolean norm,
+ final boolean verbose ) throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ if ( verbose ) {
+ printTableHeader();
+ }
+ int i = 0;
+ final int s = _msa.getNumberOfSequences();
+ final int x = ForesterUtil.roundToInt( s / 20.0 );
+ while ( _msa.getNumberOfSequences() > x ) {
+ final String id = to_remove_ids.get( i );
+ //~_msa = MsaMethods.removeSequence( _msa, id );
+ _msa.deleteRow( id );
+ if ( ( s < 500 ) || ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) ) {
+ removeGapColumns();
+ if ( realign && ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) ) {
+ realignWithMafft();
+ msa_props.add( new MsaProperties( _msa ) );
+ if ( verbose ) {
+ printMsaStats( id );
+ }
+ if ( verbose ) {
+ System.out.print( "(realigned)" );
+ }
+ }
+ else {
+ msa_props.add( new MsaProperties( _msa ) );
+ if ( verbose ) {
+ printMsaStats( id );
+ }
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ }
+ ++i;
+ }
+ return msa_props;
+ }
+
final public Msa getMsa() {
return _msa;
}
return _removed_seq_ids;
}
+ public final void removeViaGapAverage( final double mean_gapiness, final boolean verbose ) throws IOException,
+ InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ if ( verbose ) {
+ printTableHeader();
+ }
+ int i = 0;
+ while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
+ final String id = to_remove_ids.get( i );
+ _msa.deleteRow( id );
+ removeGapColumns();
+ if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) )
+ || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
+ printMsaStatsWriteOutfileAndRealign( _realign, verbose, id );
+ }
+ else if ( verbose ) {
+ printMsaStats( id );
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ ++i;
+ }
+ }
+
+ public void removeViaLength( final int length, final boolean verbose ) throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ if ( verbose ) {
+ printTableHeader();
+ }
+ int i = 0;
+ while ( _msa.getLength() > length ) {
+ final String id = to_remove_ids.get( i );
+ _msa.deleteRow( id );
+ removeGapColumns();
+ if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( _msa.getLength() <= length ) ) {
+ printMsaStatsWriteOutfileAndRealign( _realign, verbose, id );
+ }
+ else if ( verbose ) {
+ printMsaStats( id );
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ ++i;
+ }
+ }
+
+ public final void removeWorstOffenders( final int to_remove, final boolean verbose ) throws IOException,
+ InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ for( int j = 0; j < to_remove; ++j ) {
+ to_remove_ids.add( stats[ j ].getId() );
+ _removed_seq_ids.add( stats[ j ].getId() );
+ }
+ if ( verbose ) {
+ printTableHeader();
+ }
+ for( int i = 0; i < to_remove_ids.size(); ++i ) {
+ final String id = to_remove_ids.get( i );
+ _msa.deleteRow( id );
+ removeGapColumns();
+ if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
+ printMsaStatsWriteOutfileAndRealign( _realign, verbose, id );
+ }
+ else if ( verbose ) {
+ printMsaStats( id );
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ }
+ }
+
+ public final void setGapRatio( final double gap_ratio ) {
+ _gap_ratio = gap_ratio;
+ }
+
+ public final void setMinLength( final int min_length ) {
+ _min_length = min_length;
+ }
+
+ public final void setNorm( final boolean norm ) {
+ _norm = norm;
+ }
+
final public void setOutFileBase( final File out_file_base ) {
_out_file_base = out_file_base;
}
+ public final void setOutputFormat( final MSA_FORMAT output_format ) {
+ _output_format = output_format;
+ }
+
+ public void setPathToMafft( final String path_to_mafft ) {
+ _path_to_mafft = path_to_mafft;
+ }
+
+ public final void setRealign( final boolean realign ) {
+ _realign = realign;
+ }
+
+ public final void setStep( final int step ) {
+ _step = step;
+ }
+
+ public final void setStepForDiagnostics( final int step_for_diagnostics ) {
+ _step_for_diagnostics = step_for_diagnostics;
+ }
+
final public String writeMsa( final File outfile, final MSA_FORMAT format, final String suffix ) throws IOException {
final Double gr = MsaMethods.calcGapRatio( _msa );
final String s = outfile + "_" + _msa.getNumberOfSequences() + "_" + _msa.getLength() + "_"
return gappiness;
}
- public final List<MsaProperties> chart( final int step,
- final boolean realign,
- final boolean norm,
- final boolean verbose ) throws IOException, InterruptedException {
- final GapContribution stats[] = calcGapContribtionsStats( norm );
- final List<String> to_remove_ids = new ArrayList<String>();
- final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
- for( final GapContribution gap_gontribution : stats ) {
- to_remove_ids.add( gap_gontribution.getId() );
- }
- if ( verbose ) {
- printTableHeader();
- }
- int i = 0;
- final int s = _msa.getNumberOfSequences();
- final int x = ForesterUtil.roundToInt( s / 20.0 );
- while ( _msa.getNumberOfSequences() > x ) {
- final String id = to_remove_ids.get( i );
- //~_msa = MsaMethods.removeSequence( _msa, id );
- _msa.deleteRow( id );
- if ( ( s < 500 ) || ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) ) {
- removeGapColumns();
- if ( realign && ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) ) {
- realignWithMafft();
- msa_props.add( new MsaProperties( _msa ) );
- if ( verbose ) {
- printMsaStats( id );
- }
- if ( verbose ) {
- System.out.print( "(realigned)" );
- }
- }
- else {
- msa_props.add( new MsaProperties( _msa ) );
- if ( verbose ) {
- printMsaStats( id );
- }
- }
- if ( verbose ) {
- System.out.println();
- }
- }
- ++i;
- }
- return msa_props;
- }
-
private Phylogeny inferNJphylogeny( final PWD_DISTANCE_METHOD pwd_distance_method,
final Msa msa,
final boolean write_matrix,
_msa.deleteGapOnlyColumns();
}
- public final void removeViaGapAverage( final double mean_gapiness, final boolean verbose ) throws IOException,
- InterruptedException {
- final GapContribution stats[] = calcGapContribtionsStats( _norm );
- final List<String> to_remove_ids = new ArrayList<String>();
- for( final GapContribution gap_gontribution : stats ) {
- to_remove_ids.add( gap_gontribution.getId() );
- }
- if ( verbose ) {
- printTableHeader();
- }
- int i = 0;
- while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
- final String id = to_remove_ids.get( i );
- _msa.deleteRow( id );
- removeGapColumns();
- if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) )
- || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
- printMsaStatsWriteOutfileAndRealign( _realign, verbose, id );
- }
- else if ( verbose ) {
- printMsaStats( id );
- }
- if ( verbose ) {
- System.out.println();
- }
- ++i;
- }
- }
-
- public void removeViaLength( final int length, final boolean verbose ) throws IOException, InterruptedException {
- final GapContribution stats[] = calcGapContribtionsStats( _norm );
- final List<String> to_remove_ids = new ArrayList<String>();
- for( final GapContribution gap_gontribution : stats ) {
- to_remove_ids.add( gap_gontribution.getId() );
- }
- if ( verbose ) {
- printTableHeader();
- }
- int i = 0;
- while ( _msa.getLength() > length ) {
- final String id = to_remove_ids.get( i );
- _msa.deleteRow( id );
- removeGapColumns();
- if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( _msa.getLength() <= length ) ) {
- printMsaStatsWriteOutfileAndRealign( _realign, verbose, id );
- }
- else if ( verbose ) {
- printMsaStats( id );
- }
- if ( verbose ) {
- System.out.println();
- }
- ++i;
- }
- }
-
- public final void removeWorstOffenders( final int to_remove, final boolean verbose ) throws IOException,
- InterruptedException {
- final GapContribution stats[] = calcGapContribtionsStats( _norm );
- final List<String> to_remove_ids = new ArrayList<String>();
- for( int j = 0; j < to_remove; ++j ) {
- to_remove_ids.add( stats[ j ].getId() );
- _removed_seq_ids.add( stats[ j ].getId() );
- }
- if ( verbose ) {
- printTableHeader();
- }
- for( int i = 0; i < to_remove_ids.size(); ++i ) {
- final String id = to_remove_ids.get( i );
- _msa.deleteRow( id );
- removeGapColumns();
- if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
- printMsaStatsWriteOutfileAndRealign( _realign, verbose, id );
- }
- else if ( verbose ) {
- printMsaStats( id );
- }
- if ( verbose ) {
- System.out.println();
- }
- }
- }
-
- public void setPathToMafft( final String path_to_mafft ) {
- _path_to_mafft = path_to_mafft;
- }
-
final private void writeMsa( final String outfile, final MSA_FORMAT format ) throws IOException {
final Writer w = ForesterUtil.createBufferedWriter( outfile );
_msa.write( w, format );
return null;
}
- public final void setStep( final int step ) {
- _step = step;
- }
-
- public final void setNorm( final boolean norm ) {
- _norm = norm;
- }
-
- public final void setStepForDiagnostics( final int step_for_diagnostics ) {
- _step_for_diagnostics = step_for_diagnostics;
- }
-
- public final void setMinLength( final int min_length ) {
- _min_length = min_length;
- }
-
- public final void setGapRatio( final double gap_ratio ) {
- _gap_ratio = gap_ratio;
- }
-
- public final void setOutputFormat( final MSA_FORMAT output_format ) {
- _output_format = output_format;
- }
-
- public final void setRealign( final boolean realign ) {
- _realign = realign;
- }
-
private final static void printTableHeader() {
System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) );
System.out.print( "\t" );