worst_remove = cla.getOptionValueAsInt( REMOVE_WORST_OFFENDERS_OPTION );
}
if ( cla.isOptionSet( AV_GAPINESS_OPTION ) ) {
+ if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) {
+ printHelp();
+ System.exit( 0 );
+ }
av = cla.getOptionValueAsDouble( AV_GAPINESS_OPTION );
}
if ( cla.isOptionSet( LENGTH_OPTION ) ) {
+ if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla.isOptionSet( AV_GAPINESS_OPTION ) ) {
+ printHelp();
+ System.exit( 0 );
+ }
length = cla.getOptionValueAsInt( LENGTH_OPTION );
}
if ( cla.isOptionSet( STEP_OPTION ) ) {
else {
msa = GeneralMsaParser.parse( is );
}
- MsaCompactor mc = null;
if ( worst_remove > 0 ) {
- mc = MsaCompactor.removeWorstOffenders( msa, worst_remove, step, realign, norm, path_to_mafft, out );
+ MsaCompactor.removeWorstOffenders( msa, worst_remove, step, realign, norm, path_to_mafft, out );
}
else if ( av > 0 ) {
- mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, norm, path_to_mafft, out );
+ MsaCompactor.reduceGapAverage( msa, av, step, realign, norm, path_to_mafft, out );
}
else if ( length > 0 ) {
if ( length >= msa.getLength() ) {
+ ") is greater than or equal to MSA original length (" + msa.getLength() + ")" );
}
// TODO if < shortest seq -> error
- mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
+ MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
}
}
catch ( final Exception e ) {
public class MsaCompactor {
- final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
- final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
+ final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
+ final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
private Msa _msa;
private File _out_file_base;
private String _path_to_mafft;
private final SortedSet<String> _removed_seq_ids;
+ private final String _maffts_opts = "--retree 1";
static {
NF_4.setRoundingMode( RoundingMode.HALF_UP );
NF_3.setRoundingMode( RoundingMode.HALF_UP );
m.write( ForesterUtil.createBufferedWriter( matrix_name ) );
}
catch ( final IOException e ) {
- // TODO Auto-generated catch block
e.printStackTrace();
}
}
sb.append( "\t" );
sb.append( _msa.getLength() );
sb.append( "\t" );
- sb.append( NF_3.format( MsaMethods.calcGapRatio( _msa ) ) );
+ sb.append( NF_4.format( MsaMethods.calcGapRatio( _msa ) ) );
sb.append( "\t" );
- sb.append( NF_3.format( calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) );
+ sb.append( NF_4.format( calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) );
return sb;
}
// .createInstance( "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft" );
final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft );
final List<String> opts = new ArrayList<String>();
- opts.add( "--maxiterate" );
- opts.add( "1000" );
- opts.add( "--localpair" );
- opts.add( "--quiet" );
+ for( final String o : _maffts_opts.split( "\\s" ) ) {
+ opts.add( o );
+ }
+ //opts.add( "--maxiterate" );
+ //opts.add( "1000" );
+ //opts.add( "--localpair" );
+ //opts.add( "--quiet" );
_msa = mafft.infer( _msa.asSequenceList(), opts );
}
for( final GapContribution gap_gontribution : stats ) {
to_remove_ids.add( gap_gontribution.getId() );
}
+ if ( verbose ) {
+ printTableHeader();
+ }
int i = 0;
while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
final String id = to_remove_ids.get( i );
}
}
+ private final static void printTableHeader() {
+ System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) );
+ System.out.print( "\t" );
+ System.out.print( "Seqs" );
+ System.out.print( "\t" );
+ System.out.print( "Length" );
+ System.out.print( "\t" );
+ System.out.print( "Gaps" );
+ System.out.print( "\t" );
+ System.out.print( "MSA qual" );
+ System.out.print( "\t" );
+ System.out.println();
+ }
+
final private void removeViaLength( final int length,
final int step,
final boolean realign,
for( final GapContribution gap_gontribution : stats ) {
to_remove_ids.add( gap_gontribution.getId() );
}
+ if ( verbose ) {
+ printTableHeader();
+ }
int i = 0;
while ( _msa.getLength() > length ) {
final String id = to_remove_ids.get( i );
to_remove_ids.add( stats[ j ].getId() );
_removed_seq_ids.add( stats[ j ].getId() );
}
+ if ( verbose ) {
+ printTableHeader();
+ }
for( int i = 0; i < to_remove_ids.size(); ++i ) {
final String id = to_remove_ids.get( i );
_msa = MsaMethods.removeSequence( _msa, id );
return path;
}
}
+ path = "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft";
+ if ( MsaInferrer.isInstalled( path ) ) {
+ return path;
+ }
path = "/usr/local/bin/mafft";
if ( MsaInferrer.isInstalled( path ) ) {
return path;