final File in = cla.getFile( 0 );
final File out = cla.getFile( 1 );
int worst_remove = -1;
- double av = -1;
+ double av_gap = -1;
int length = -1;
- int step = 1;
+ int step = -1;
boolean realign = false;
boolean norm = true;
String path_to_mafft = null;
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
}
+ Msa msa = null;
+ final FileInputStream is = new FileInputStream( in );
+ if ( FastaParser.isLikelyFasta( in ) ) {
+ msa = FastaParser.parseMsa( is );
+ }
+ else {
+ msa = GeneralMsaParser.parse( is );
+ }
if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) {
worst_remove = cla.getOptionValueAsInt( REMOVE_WORST_OFFENDERS_OPTION );
+ if ( ( worst_remove < 1 ) || ( worst_remove >= msa.getNumberOfSequences() - 1 ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "number of worst offender sequences to remove is out of range: "
+ + worst_remove );
+ }
}
if ( cla.isOptionSet( AV_GAPINESS_OPTION ) ) {
if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) {
printHelp();
System.exit( 0 );
}
- av = cla.getOptionValueAsDouble( AV_GAPINESS_OPTION );
+ av_gap = cla.getOptionValueAsDouble( AV_GAPINESS_OPTION );
+ if ( ( av_gap < 0 ) || ( av_gap >= 1 ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "target gap-ratio is out of range: " + av_gap );
+ }
}
if ( cla.isOptionSet( LENGTH_OPTION ) ) {
if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla.isOptionSet( AV_GAPINESS_OPTION ) ) {
System.exit( 0 );
}
length = cla.getOptionValueAsInt( LENGTH_OPTION );
+ if ( ( length < 2 ) || ( length >= msa.getLength() ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "target length is out of range: " + length );
+ }
}
if ( cla.isOptionSet( STEP_OPTION ) ) {
step = cla.getOptionValueAsInt( STEP_OPTION );
+ if ( ( step < 1 )
+ || ( ( step > msa.getNumberOfSequences() ) || ( ( worst_remove > 0 ) && ( step > worst_remove ) ) ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "value for step is out of range: " + step );
+ }
}
if ( cla.isOptionSet( REALIGN_OPTION ) ) {
realign = true;
}
checkPathToMafft( path_to_mafft );
}
- Msa msa = null;
- final FileInputStream is = new FileInputStream( in );
- if ( FastaParser.isLikelyFasta( in ) ) {
- msa = FastaParser.parseMsa( is );
- }
- else {
- msa = GeneralMsaParser.parse( is );
- }
if ( worst_remove > 0 ) {
MsaCompactor.removeWorstOffenders( msa, worst_remove, step, realign, norm, path_to_mafft, out );
}
- else if ( av > 0 ) {
- MsaCompactor.reduceGapAverage( msa, av, step, realign, norm, path_to_mafft, out );
+ else if ( av_gap > 0 ) {
+ MsaCompactor.reduceGapAverage( msa, av_gap, step, realign, norm, path_to_mafft, out );
}
else if ( length > 0 ) {
- if ( length >= msa.getLength() ) {
- ForesterUtil.fatalError( PRG_NAME, "target MSA length (" + length
- + ") is greater than or equal to MSA original length (" + msa.getLength() + ")" );
- }
// TODO if < shortest seq -> error
MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
}
final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
+ private final String _maffts_opts = "--retree 1";
private Msa _msa;
private File _out_file_base;
private String _path_to_mafft;
private final SortedSet<String> _removed_seq_ids;
- private final String _maffts_opts = "--retree 1";
static {
NF_4.setRoundingMode( RoundingMode.HALF_UP );
NF_3.setRoundingMode( RoundingMode.HALF_UP );
return sb;
}
+ private final void printMsaStats( final String id ) {
+ System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
+ System.out.print( "\t" );
+ final StringBuilder sb = msaStatsAsSB();
+ System.out.print( sb );
+ System.out.print( "\t" );
+ }
+
final private void realignWithMafft() throws IOException, InterruptedException {
// final MsaInferrer mafft = Mafft
// .createInstance( "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft" );
_msa = MsaMethods.removeSequence( _msa, id );
removeGapColumns();
if ( verbose ) {
- System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
- System.out.print( "\t" );
- final StringBuilder sb = msaStatsAsSB();
- System.out.print( sb );
- System.out.print( "\t" );
+ printMsaStats( id );
}
- if ( ( ( ( i + 1 ) % step ) == 0 ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
+ if ( ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) )
+ || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
if ( realign ) {
realignWithMafft();
}
}
}
- private final static void printTableHeader() {
- System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) );
- System.out.print( "\t" );
- System.out.print( "Seqs" );
- System.out.print( "\t" );
- System.out.print( "Length" );
- System.out.print( "\t" );
- System.out.print( "Gaps" );
- System.out.print( "\t" );
- System.out.print( "MSA qual" );
- System.out.print( "\t" );
- System.out.println();
- }
-
final private void removeViaLength( final int length,
final int step,
final boolean realign,
_msa = MsaMethods.removeSequence( _msa, id );
removeGapColumns();
if ( verbose ) {
- System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
- System.out.print( "\t" );
- final StringBuilder sb = msaStatsAsSB();
- System.out.print( sb );
- System.out.print( "\t" );
+ printMsaStats( id );
}
- if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) {
+ if ( ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) || ( _msa.getLength() <= length ) ) {
if ( realign ) {
realignWithMafft();
}
_msa = MsaMethods.removeSequence( _msa, id );
removeGapColumns();
if ( verbose ) {
- System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
- System.out.print( "\t" );
- final StringBuilder sb = msaStatsAsSB();
- System.out.print( sb );
- System.out.print( "\t" );
+ printMsaStats( id );
}
- if ( ( ( ( i + 1 ) % step ) == 0 ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
+ if ( ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
if ( realign ) {
realignWithMafft();
}
}
return stats;
}
+
+ private final static void printTableHeader() {
+ System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) );
+ System.out.print( "\t" );
+ System.out.print( "Seqs" );
+ System.out.print( "\t" );
+ System.out.print( "Length" );
+ System.out.print( "\t" );
+ System.out.print( "Gaps" );
+ System.out.print( "\t" );
+ System.out.print( "MSA qual" );
+ System.out.print( "\t" );
+ System.out.println();
+ }
}