From b7bc82a4c68e585b887014d214fe00a7daa5a788 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 16 Apr 2014 23:06:52 +0000 Subject: [PATCH] inprogress --- .../org/forester/application/msa_compactor.java | 9 ++- .../org/forester/msa_compactor/MsaCompactor.java | 65 ++++++++++++-------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index 345539a..5239429 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -114,7 +114,12 @@ public class msa_compactor { mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, 50, path_to_mafft, out ); } else if ( length > 0 ) { - mc = MsaCompactor.reduceLength( msa, length, step, realign, path_to_mafft, out ); + if ( length >= msa.getLength() ) { + ForesterUtil.fatalError( PRG_NAME, "target MSA length (" + length + + ") is greater than or equal to MSA original length (" + msa.getLength() + ")" ); + } + // TODO if < shortest seq -> error + mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out ); } //System.out.println( MsaMethods.calcGapRatio( mc.getMsa() ) ); // for( final String id : mc.getRemovedSeqIds() ) { @@ -167,7 +172,7 @@ public class msa_compactor { System.out.println(); System.out.println( " -" + REMOVE_WORST_OFFENDERS_OPTION + "= number of worst offender sequences to remove" ); - System.out.println( " -" + LENGTH_OPTION + "= length" ); + System.out.println( " -" + LENGTH_OPTION + "= target MSA length" ); System.out.println( " -" + AV_GAPINESS_OPTION + "= gap %" ); System.out.println( " -" + STEP_OPTION + "= step" ); System.out.println( " -" + REALIGN_OPTION + " to realign using MAFFT" + mafft_comment ); diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index a7886f1..a9e9f8a 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -250,30 +250,6 @@ public class MsaCompactor { } } - final private void removeViaLength( final int length, final int step, final boolean realign ) throws IOException, - InterruptedException { - if ( step < 1 ) { - throw new IllegalArgumentException( "step cannot be less than 1" ); - } - if ( length < 11 ) { - throw new IllegalArgumentException( "target length cannot be less than 1" ); - } - if ( VERBOSE ) { - System.out.println( "orig: " + msaStatsAsSB() ); - } - int counter = step; - while ( _msa.getLength() > length ) { - removeWorstOffenders( step, 1, false, false, false ); - if ( realign ) { - realignWithMafft(); - } - if ( VERBOSE ) { - System.out.println( counter + ": " + msaStatsAsSB() ); - } - counter += step; - } - } - final private void removeWorstOffenders( final int to_remove, final int step, final boolean realign, @@ -311,6 +287,44 @@ public class MsaCompactor { } } + final private void removeViaLength( final int length, + final int step, + final boolean realign, + final boolean norm, + final boolean verbose ) throws IOException, InterruptedException { + final GapContribution stats[] = calcGapContribtionsStats( norm ); + final List to_remove_ids = new ArrayList(); + for( final GapContribution gap_gontribution : stats ) { + to_remove_ids.add( gap_gontribution.getId() ); + } + int i = 0; + while ( _msa.getLength() > length ) { + final String id = to_remove_ids.get( i ); + _msa = MsaMethods.removeSequence( _msa, id ); + removeGapColumns(); + if ( verbose ) { + System.out.print( ForesterUtil.pad( id, 20, ' ', false ) ); + System.out.print( "\t" ); + final StringBuilder sb = msaStatsAsSB(); + System.out.print( sb ); + System.out.print( "\t" ); + } + if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) { + if ( realign ) { + realignWithMafft(); + } + final String s = writeOutfile(); + if ( verbose ) { + System.out.print( "-> " + s ); + } + } + if ( verbose ) { + System.out.println(); + } + ++i; + } + } + private void setPathToMafft( final String path_to_mafft ) { _path_to_mafft = path_to_mafft; } @@ -375,6 +389,7 @@ public class MsaCompactor { final int length, final int step, final boolean realign, + final boolean norm, final String path_to_mafft, final File out ) throws IOException, InterruptedException { final MsaCompactor mc = new MsaCompactor( msa ); @@ -382,7 +397,7 @@ public class MsaCompactor { mc.setPathToMafft( path_to_mafft ); } mc.setOutFileBase( out ); - mc.removeViaLength( length, step, realign ); + mc.removeViaLength( length, step, realign, norm, true ); return mc; } -- 1.7.10.2