mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, 50, path_to_mafft, out );
}
else if ( length > 0 ) {
- mc = MsaCompactor.reduceLength( msa, length, step, realign, path_to_mafft, out );
+ if ( length >= msa.getLength() ) {
+ ForesterUtil.fatalError( PRG_NAME, "target MSA length (" + length
+ + ") is greater than or equal to MSA original length (" + msa.getLength() + ")" );
+ }
+ // TODO if < shortest seq -> error
+ mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
}
//System.out.println( MsaMethods.calcGapRatio( mc.getMsa() ) );
// for( final String id : mc.getRemovedSeqIds() ) {
System.out.println();
System.out.println( " -" + REMOVE_WORST_OFFENDERS_OPTION
+ "=<integer> number of worst offender sequences to remove" );
- System.out.println( " -" + LENGTH_OPTION + "=<integer> length" );
+ System.out.println( " -" + LENGTH_OPTION + "=<integer> target MSA length" );
System.out.println( " -" + AV_GAPINESS_OPTION + "=<decimal> gap %" );
System.out.println( " -" + STEP_OPTION + "=<decimal> step" );
System.out.println( " -" + REALIGN_OPTION + " to realign using MAFFT" + mafft_comment );
}
}
- final private void removeViaLength( final int length, final int step, final boolean realign ) throws IOException,
- InterruptedException {
- if ( step < 1 ) {
- throw new IllegalArgumentException( "step cannot be less than 1" );
- }
- if ( length < 11 ) {
- throw new IllegalArgumentException( "target length cannot be less than 1" );
- }
- if ( VERBOSE ) {
- System.out.println( "orig: " + msaStatsAsSB() );
- }
- int counter = step;
- while ( _msa.getLength() > length ) {
- removeWorstOffenders( step, 1, false, false, false );
- if ( realign ) {
- realignWithMafft();
- }
- if ( VERBOSE ) {
- System.out.println( counter + ": " + msaStatsAsSB() );
- }
- counter += step;
- }
- }
-
final private void removeWorstOffenders( final int to_remove,
final int step,
final boolean realign,
}
}
+ final private void removeViaLength( final int length,
+ final int step,
+ final boolean realign,
+ final boolean norm,
+ final boolean verbose ) throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ int i = 0;
+ while ( _msa.getLength() > length ) {
+ final String id = to_remove_ids.get( i );
+ _msa = MsaMethods.removeSequence( _msa, id );
+ removeGapColumns();
+ if ( verbose ) {
+ System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
+ System.out.print( "\t" );
+ final StringBuilder sb = msaStatsAsSB();
+ System.out.print( sb );
+ System.out.print( "\t" );
+ }
+ if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) {
+ if ( realign ) {
+ realignWithMafft();
+ }
+ final String s = writeOutfile();
+ if ( verbose ) {
+ System.out.print( "-> " + s );
+ }
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ ++i;
+ }
+ }
+
private void setPathToMafft( final String path_to_mafft ) {
_path_to_mafft = path_to_mafft;
}
final int length,
final int step,
final boolean realign,
+ final boolean norm,
final String path_to_mafft,
final File out ) throws IOException, InterruptedException {
final MsaCompactor mc = new MsaCompactor( msa );
mc.setPathToMafft( path_to_mafft );
}
mc.setOutFileBase( out );
- mc.removeViaLength( length, step, realign );
+ mc.removeViaLength( length, step, realign, norm, true );
return mc;
}