final static private String PATH_TO_MAFFT_OPTION = "mafft";
final static private String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn";
final static private String PRG_NAME = "msa_compactor";
- final static private String PRG_DESC = "multiple sequnce aligment compactor";
+ final static private String PRG_DESC = "multiple sequence aligment compactor";
final static private String PRG_VERSION = "0.01";
- final static private String PRG_DATE = "140314";
+ final static private String PRG_DATE = "140316";
final static private String E_MAIL = "phylosoft@gmail.com";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
if ( cla.isOptionSet( DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION ) ) {
norm = false;
}
- // else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) {
- // step = cla.getOptionValueAsInt( STEP_OPTION );
- // window = cla.getOptionValueAsInt( WINDOW_OPTION );
- // }
- // else {
- // printHelp();
- // System.exit( 0 );
- // }
if ( realign ) {
if ( ForesterUtil.isEmpty( path_to_mafft ) ) {
path_to_mafft = MsaCompactor.guessPathToMafft();
// TODO if < shortest seq -> error
mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
}
- //System.out.println( MsaMethods.calcGapRatio( mc.getMsa() ) );
- // for( final String id : mc.getRemovedSeqIds() ) {
- // System.out.println( id );
- //}
- //mc.writeMsa( out, MSA_FORMAT.PHYLIP, ".aln" );
}
catch ( final Exception e ) {
e.printStackTrace();
System.out.println( " -" + REMOVE_WORST_OFFENDERS_OPTION
+ "=<integer> number of worst offender sequences to remove" );
System.out.println( " -" + LENGTH_OPTION + "=<integer> target MSA length" );
- System.out.println( " -" + AV_GAPINESS_OPTION + "=<decimal> gap %" );
- System.out.println( " -" + STEP_OPTION + "=<decimal> step" );
+ System.out.println( " -" + AV_GAPINESS_OPTION + "=<decimal> target gap-ratio (0.0-1.0)" );
+ System.out.println( " -" + STEP_OPTION + "=<integer> step (for output and re-aligning)" );
System.out.println( " -" + REALIGN_OPTION + " to realign using MAFFT" + mafft_comment );
System.out.println();
System.out.println();
public class MsaCompactor {
- final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
- final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
- private static final boolean VERBOSE = false;
+ final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
+ final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
private Msa _msa;
private File _out_file_base;
private String _path_to_mafft;
final private GapContribution[] calcGapContribtionsStats( final boolean norm ) {
final GapContribution stats[] = calcGapContribtions( norm );
Arrays.sort( stats );
- // for( final GapContribution stat : stats ) {
- // final StringBuilder sb = new StringBuilder();
- // sb.append( stat.getId() );
- // sb.append( "\t" );
- // sb.append( NF_4.format( stat.getValue() ) );
- // sb.append( "\t" );
- // sb.append( NF_4.format( stat.median() ) );
- // sb.append( "\t" );
- // sb.append( NF_4.format( stat.getMin() ) );
- // sb.append( "\t" );
- // sb.append( NF_4.format( stat.getMax() ) );
- //sb.append( "\t" );
- //System.out.println( sb );
- // }
return stats;
}
}
}
- final private void removeViaGapAverageOLD( final double mean_gapiness,
- final int step,
- final boolean realign,
- final File outfile,
- final int minimal_effective_length ) throws IOException,
- InterruptedException {
- if ( step < 1 ) {
- throw new IllegalArgumentException( "step cannot be less than 1" );
- }
- if ( mean_gapiness < 0 ) {
- throw new IllegalArgumentException( "target average gap ratio cannot be less than 0" );
- }
- if ( VERBOSE ) {
- System.out.println( "orig: " + msaStatsAsSB() );
- }
- if ( minimal_effective_length > 1 ) {
- _msa = MsaMethods.removeSequencesByMinimalLength( _msa, minimal_effective_length );
- if ( VERBOSE ) {
- System.out.println( "short seq removal: " + msaStatsAsSB() );
- }
- }
- int counter = step;
- double gr;
- do {
- removeWorstOffenders( step, 1, false, false, false );
- if ( realign ) {
- realignWithMafft();
- }
- gr = MsaMethods.calcGapRatio( _msa );
- if ( VERBOSE ) {
- System.out.println( counter + ": " + msaStatsAsSB() );
- }
- // write( outfile, gr );
- counter += step;
- } while ( gr > mean_gapiness );
- if ( VERBOSE ) {
- System.out.println( "final: " + msaStatsAsSB() );
- }
- }
-
final private void removeViaLength( final int length,
final int step,
final boolean realign,