+ _removed_seq_ids = new TreeSet<String>();
+ }
+
+ final public SortedSet<String> getRemovedSeqIds() {
+ return _removed_seq_ids;
+ }
+
+ final public Msa getMsa() {
+ return _msa;
+ }
+
+ public final static MsaCompactor removeWorstOffenders( final Msa msa,
+ final int worst_offenders_to_remove,
+ final boolean realign ) throws IOException,
+ InterruptedException {
+ final MsaCompactor mc = new MsaCompactor( msa );
+ mc.removeWorstOffenders( worst_offenders_to_remove, 1, realign );
+ return mc;
+ }
+
+ public final static MsaCompactor reduceGapAverage( final Msa msa,
+ final double max_gap_average,
+ final int step,
+ final boolean realign ) throws IOException, InterruptedException {
+ final MsaCompactor mc = new MsaCompactor( msa );
+ mc.removeViaGapAverage( max_gap_average, step, realign );
+ return mc;
+ }
+
+ public final static MsaCompactor reduceLength( final Msa msa,
+ final int length,
+ final int step,
+ final boolean realign ) throws IOException, InterruptedException {
+ final MsaCompactor mc = new MsaCompactor( msa );
+ mc.removeViaLength( length, step, realign );
+ return mc;
+ }
+
+ final private void removeGapColumns() {
+ _msa = MsaMethods.createInstance().removeGapColumns( 1, 0, _msa );
+ }
+
+ final private void removeWorstOffenders( final int to_remove, final int step, final boolean realign )
+ throws IOException, InterruptedException {
+ final DescriptiveStatistics stats[] = calcStats();
+ final List<String> to_remove_ids = new ArrayList<String>();
+ for( int j = 0; j < to_remove; ++j ) {
+ to_remove_ids.add( stats[ j ].getDescription() );
+ _removed_seq_ids.add( stats[ j ].getDescription() );
+ }
+ _msa = MsaMethods.removeSequences( _msa, to_remove_ids );
+ removeGapColumns();
+ if ( realign ) {
+ mafft();
+ }
+ }
+
+ final private void mafft() throws IOException, InterruptedException {
+ final MsaInferrer mafft = Mafft.createInstance( "/home/czmasek/bin/mafft" );
+ final List<String> opts = new ArrayList<String>();
+ // opts.add( "--maxiterate" );
+ // opts.add( "1000" );
+ // opts.add( "--localpair" );
+ opts.add( "--quiet" );
+ _msa = mafft.infer( _msa.asSequenceList(), opts );
+ }
+
+ final private void removeViaGapAverage( final double mean_gapiness, final int step, final boolean realign )
+ throws IOException, InterruptedException {
+ if ( step < 1 ) {
+ throw new IllegalArgumentException( "step cannot be less than 1" );
+ }
+ if ( mean_gapiness < 0 ) {
+ throw new IllegalArgumentException( "target average gap ratio cannot be less than 0" );
+ }
+ if ( VERBOSE ) {
+ System.out.println( "start: " + _msa.getLength() + " "
+ + ForesterUtil.round( MsaMethods.calcBasicGapinessStatistics( _msa ).arithmeticMean(), 3 ) );
+ }
+ int counter = step;
+ while ( MsaMethods.calcBasicGapinessStatistics( _msa ).arithmeticMean() > mean_gapiness ) {
+ removeWorstOffenders( step, 1, false );
+ if ( realign ) {
+ mafft();
+ }
+ if ( VERBOSE ) {
+ System.out.println( counter + ": " + _msa.getLength() + " "
+ + ForesterUtil.round( MsaMethods.calcBasicGapinessStatistics( _msa ).arithmeticMean(), 3 ) );
+ }
+ counter += step;
+ }