inprogress
[jalview.git] / forester / java / src / org / forester / msa_compactor / MsaCompactor.java
index a7886f1..2a5c5b1 100644 (file)
@@ -213,8 +213,46 @@ public class MsaCompactor {
     final private void removeViaGapAverage( final double mean_gapiness,
                                             final int step,
                                             final boolean realign,
-                                            final File outfile,
-                                            final int minimal_effective_length ) throws IOException,
+                                            final boolean norm,
+                                            final boolean verbose ) throws IOException, InterruptedException {
+        final GapContribution stats[] = calcGapContribtionsStats( norm );
+        final List<String> to_remove_ids = new ArrayList<String>();
+        for( final GapContribution gap_gontribution : stats ) {
+            to_remove_ids.add( gap_gontribution.getId() );
+        }
+        int i = 0;
+        while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
+            final String id = to_remove_ids.get( i );
+            _msa = MsaMethods.removeSequence( _msa, id );
+            removeGapColumns();
+            if ( verbose ) {
+                System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
+                System.out.print( "\t" );
+                final StringBuilder sb = msaStatsAsSB();
+                System.out.print( sb );
+                System.out.print( "\t" );
+            }
+            if ( ( ( ( i + 1 ) % step ) == 0 ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
+                if ( realign ) {
+                    realignWithMafft();
+                }
+                final String s = writeOutfile();
+                if ( verbose ) {
+                    System.out.print( "-> " + s );
+                }
+            }
+            if ( verbose ) {
+                System.out.println();
+            }
+            ++i;
+        }
+    }
+
+    final private void removeViaGapAverageOLD( final double mean_gapiness,
+                                               final int step,
+                                               final boolean realign,
+                                               final File outfile,
+                                               final int minimal_effective_length ) throws IOException,
             InterruptedException {
         if ( step < 1 ) {
             throw new IllegalArgumentException( "step cannot be less than 1" );
@@ -250,27 +288,41 @@ public class MsaCompactor {
         }
     }
 
-    final private void removeViaLength( final int length, final int step, final boolean realign ) throws IOException,
-            InterruptedException {
-        if ( step < 1 ) {
-            throw new IllegalArgumentException( "step cannot be less than 1" );
-        }
-        if ( length < 11 ) {
-            throw new IllegalArgumentException( "target length cannot be less than 1" );
-        }
-        if ( VERBOSE ) {
-            System.out.println( "orig: " + msaStatsAsSB() );
+    final private void removeViaLength( final int length,
+                                        final int step,
+                                        final boolean realign,
+                                        final boolean norm,
+                                        final boolean verbose ) throws IOException, InterruptedException {
+        final GapContribution stats[] = calcGapContribtionsStats( norm );
+        final List<String> to_remove_ids = new ArrayList<String>();
+        for( final GapContribution gap_gontribution : stats ) {
+            to_remove_ids.add( gap_gontribution.getId() );
         }
-        int counter = step;
+        int i = 0;
         while ( _msa.getLength() > length ) {
-            removeWorstOffenders( step, 1, false, false, false );
-            if ( realign ) {
-                realignWithMafft();
+            final String id = to_remove_ids.get( i );
+            _msa = MsaMethods.removeSequence( _msa, id );
+            removeGapColumns();
+            if ( verbose ) {
+                System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
+                System.out.print( "\t" );
+                final StringBuilder sb = msaStatsAsSB();
+                System.out.print( sb );
+                System.out.print( "\t" );
             }
-            if ( VERBOSE ) {
-                System.out.println( counter + ": " + msaStatsAsSB() );
+            if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) {
+                if ( realign ) {
+                    realignWithMafft();
+                }
+                final String s = writeOutfile();
+                if ( verbose ) {
+                    System.out.print( "-> " + s );
+                }
             }
-            counter += step;
+            if ( verbose ) {
+                System.out.println();
+            }
+            ++i;
         }
     }
 
@@ -359,7 +411,7 @@ public class MsaCompactor {
                                                        final double max_gap_average,
                                                        final int step,
                                                        final boolean realign,
-                                                       final int minimal_effective_length,
+                                                       final boolean norm,
                                                        final String path_to_mafft,
                                                        final File out ) throws IOException, InterruptedException {
         final MsaCompactor mc = new MsaCompactor( msa );
@@ -367,7 +419,7 @@ public class MsaCompactor {
             mc.setPathToMafft( path_to_mafft );
         }
         mc.setOutFileBase( out );
-        mc.removeViaGapAverage( max_gap_average, step, realign, out, minimal_effective_length );
+        mc.removeViaGapAverage( max_gap_average, step, realign, norm, true );
         return mc;
     }
 
@@ -375,6 +427,7 @@ public class MsaCompactor {
                                                    final int length,
                                                    final int step,
                                                    final boolean realign,
+                                                   final boolean norm,
                                                    final String path_to_mafft,
                                                    final File out ) throws IOException, InterruptedException {
         final MsaCompactor mc = new MsaCompactor( msa );
@@ -382,7 +435,7 @@ public class MsaCompactor {
             mc.setPathToMafft( path_to_mafft );
         }
         mc.setOutFileBase( out );
-        mc.removeViaLength( length, step, realign );
+        mc.removeViaLength( length, step, realign, norm, true );
         return mc;
     }