From 14375b2b2837d1ae5a2d0bb44f8aae9f1916281a Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 16 Apr 2014 23:22:25 +0000 Subject: [PATCH] inprogress --- .../org/forester/application/msa_compactor.java | 2 +- .../org/forester/msa_compactor/MsaCompactor.java | 88 ++++++++++++++------ 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index 5239429..4230ce0 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -111,7 +111,7 @@ public class msa_compactor { mc = MsaCompactor.removeWorstOffenders( msa, worst_remove, step, realign, norm, path_to_mafft, out ); } else if ( av > 0 ) { - mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, 50, path_to_mafft, out ); + mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, norm, path_to_mafft, out ); } else if ( length > 0 ) { if ( length >= msa.getLength() ) { diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index a9e9f8a..2a5c5b1 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -213,8 +213,46 @@ public class MsaCompactor { final private void removeViaGapAverage( final double mean_gapiness, final int step, final boolean realign, - final File outfile, - final int minimal_effective_length ) throws IOException, + final boolean norm, + final boolean verbose ) throws IOException, InterruptedException { + final GapContribution stats[] = calcGapContribtionsStats( norm ); + final List to_remove_ids = new ArrayList(); + for( final GapContribution gap_gontribution : stats ) { + to_remove_ids.add( gap_gontribution.getId() ); + } + int i = 0; + while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) { + final String id = to_remove_ids.get( i ); + _msa = MsaMethods.removeSequence( _msa, id ); + removeGapColumns(); + if ( verbose ) { + System.out.print( ForesterUtil.pad( id, 20, ' ', false ) ); + System.out.print( "\t" ); + final StringBuilder sb = msaStatsAsSB(); + System.out.print( sb ); + System.out.print( "\t" ); + } + if ( ( ( ( i + 1 ) % step ) == 0 ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) { + if ( realign ) { + realignWithMafft(); + } + final String s = writeOutfile(); + if ( verbose ) { + System.out.print( "-> " + s ); + } + } + if ( verbose ) { + System.out.println(); + } + ++i; + } + } + + final private void removeViaGapAverageOLD( final double mean_gapiness, + final int step, + final boolean realign, + final File outfile, + final int minimal_effective_length ) throws IOException, InterruptedException { if ( step < 1 ) { throw new IllegalArgumentException( "step cannot be less than 1" ); @@ -250,18 +288,18 @@ public class MsaCompactor { } } - final private void removeWorstOffenders( final int to_remove, - final int step, - final boolean realign, - final boolean norm, - final boolean verbose ) throws IOException, InterruptedException { + final private void removeViaLength( final int length, + final int step, + final boolean realign, + final boolean norm, + final boolean verbose ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( norm ); final List to_remove_ids = new ArrayList(); - for( int j = 0; j < to_remove; ++j ) { - to_remove_ids.add( stats[ j ].getId() ); - _removed_seq_ids.add( stats[ j ].getId() ); + for( final GapContribution gap_gontribution : stats ) { + to_remove_ids.add( gap_gontribution.getId() ); } - for( int i = 0; i < to_remove_ids.size(); ++i ) { + int i = 0; + while ( _msa.getLength() > length ) { final String id = to_remove_ids.get( i ); _msa = MsaMethods.removeSequence( _msa, id ); removeGapColumns(); @@ -272,7 +310,7 @@ public class MsaCompactor { System.out.print( sb ); System.out.print( "\t" ); } - if ( ( ( ( i + 1 ) % step ) == 0 ) || ( i == ( to_remove_ids.size() - 1 ) ) ) { + if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) { if ( realign ) { realignWithMafft(); } @@ -284,21 +322,22 @@ public class MsaCompactor { if ( verbose ) { System.out.println(); } + ++i; } } - final private void removeViaLength( final int length, - final int step, - final boolean realign, - final boolean norm, - final boolean verbose ) throws IOException, InterruptedException { + final private void removeWorstOffenders( final int to_remove, + final int step, + final boolean realign, + final boolean norm, + final boolean verbose ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( norm ); final List to_remove_ids = new ArrayList(); - for( final GapContribution gap_gontribution : stats ) { - to_remove_ids.add( gap_gontribution.getId() ); + for( int j = 0; j < to_remove; ++j ) { + to_remove_ids.add( stats[ j ].getId() ); + _removed_seq_ids.add( stats[ j ].getId() ); } - int i = 0; - while ( _msa.getLength() > length ) { + for( int i = 0; i < to_remove_ids.size(); ++i ) { final String id = to_remove_ids.get( i ); _msa = MsaMethods.removeSequence( _msa, id ); removeGapColumns(); @@ -309,7 +348,7 @@ public class MsaCompactor { System.out.print( sb ); System.out.print( "\t" ); } - if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) { + if ( ( ( ( i + 1 ) % step ) == 0 ) || ( i == ( to_remove_ids.size() - 1 ) ) ) { if ( realign ) { realignWithMafft(); } @@ -321,7 +360,6 @@ public class MsaCompactor { if ( verbose ) { System.out.println(); } - ++i; } } @@ -373,7 +411,7 @@ public class MsaCompactor { final double max_gap_average, final int step, final boolean realign, - final int minimal_effective_length, + final boolean norm, final String path_to_mafft, final File out ) throws IOException, InterruptedException { final MsaCompactor mc = new MsaCompactor( msa ); @@ -381,7 +419,7 @@ public class MsaCompactor { mc.setPathToMafft( path_to_mafft ); } mc.setOutFileBase( out ); - mc.removeViaGapAverage( max_gap_average, step, realign, out, minimal_effective_length ); + mc.removeViaGapAverage( max_gap_average, step, realign, norm, true ); return mc; } -- 1.7.10.2