inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 16 Apr 2014 23:06:52 +0000 (23:06 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 16 Apr 2014 23:06:52 +0000 (23:06 +0000)
forester/java/src/org/forester/application/msa_compactor.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java

index 345539a..5239429 100644 (file)
@@ -114,7 +114,12 @@ public class msa_compactor {
                 mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, 50, path_to_mafft, out );
             }
             else if ( length > 0 ) {
                 mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, 50, path_to_mafft, out );
             }
             else if ( length > 0 ) {
-                mc = MsaCompactor.reduceLength( msa, length, step, realign, path_to_mafft, out );
+                if ( length >= msa.getLength() ) {
+                    ForesterUtil.fatalError( PRG_NAME, "target MSA length (" + length
+                            + ") is greater than or equal to MSA original length (" + msa.getLength() + ")" );
+                }
+                // TODO if < shortest seq -> error
+                mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
             }
             //System.out.println( MsaMethods.calcGapRatio( mc.getMsa() ) );
             // for( final String id : mc.getRemovedSeqIds() ) {
             }
             //System.out.println( MsaMethods.calcGapRatio( mc.getMsa() ) );
             // for( final String id : mc.getRemovedSeqIds() ) {
@@ -167,7 +172,7 @@ public class msa_compactor {
         System.out.println();
         System.out.println( "   -" + REMOVE_WORST_OFFENDERS_OPTION
                 + "=<integer>  number of worst offender sequences to remove" );
         System.out.println();
         System.out.println( "   -" + REMOVE_WORST_OFFENDERS_OPTION
                 + "=<integer>  number of worst offender sequences to remove" );
-        System.out.println( "   -" + LENGTH_OPTION + "=<integer>  length" );
+        System.out.println( "   -" + LENGTH_OPTION + "=<integer>  target MSA length" );
         System.out.println( "   -" + AV_GAPINESS_OPTION + "=<decimal>  gap %" );
         System.out.println( "   -" + STEP_OPTION + "=<decimal>  step" );
         System.out.println( "   -" + REALIGN_OPTION + "            to realign using MAFFT" + mafft_comment );
         System.out.println( "   -" + AV_GAPINESS_OPTION + "=<decimal>  gap %" );
         System.out.println( "   -" + STEP_OPTION + "=<decimal>  step" );
         System.out.println( "   -" + REALIGN_OPTION + "            to realign using MAFFT" + mafft_comment );
index a7886f1..a9e9f8a 100644 (file)
@@ -250,30 +250,6 @@ public class MsaCompactor {
         }
     }
 
         }
     }
 
-    final private void removeViaLength( final int length, final int step, final boolean realign ) throws IOException,
-            InterruptedException {
-        if ( step < 1 ) {
-            throw new IllegalArgumentException( "step cannot be less than 1" );
-        }
-        if ( length < 11 ) {
-            throw new IllegalArgumentException( "target length cannot be less than 1" );
-        }
-        if ( VERBOSE ) {
-            System.out.println( "orig: " + msaStatsAsSB() );
-        }
-        int counter = step;
-        while ( _msa.getLength() > length ) {
-            removeWorstOffenders( step, 1, false, false, false );
-            if ( realign ) {
-                realignWithMafft();
-            }
-            if ( VERBOSE ) {
-                System.out.println( counter + ": " + msaStatsAsSB() );
-            }
-            counter += step;
-        }
-    }
-
     final private void removeWorstOffenders( final int to_remove,
                                              final int step,
                                              final boolean realign,
     final private void removeWorstOffenders( final int to_remove,
                                              final int step,
                                              final boolean realign,
@@ -311,6 +287,44 @@ public class MsaCompactor {
         }
     }
 
         }
     }
 
+    final private void removeViaLength( final int length,
+                                        final int step,
+                                        final boolean realign,
+                                        final boolean norm,
+                                        final boolean verbose ) throws IOException, InterruptedException {
+        final GapContribution stats[] = calcGapContribtionsStats( norm );
+        final List<String> to_remove_ids = new ArrayList<String>();
+        for( final GapContribution gap_gontribution : stats ) {
+            to_remove_ids.add( gap_gontribution.getId() );
+        }
+        int i = 0;
+        while ( _msa.getLength() > length ) {
+            final String id = to_remove_ids.get( i );
+            _msa = MsaMethods.removeSequence( _msa, id );
+            removeGapColumns();
+            if ( verbose ) {
+                System.out.print( ForesterUtil.pad( id, 20, ' ', false ) );
+                System.out.print( "\t" );
+                final StringBuilder sb = msaStatsAsSB();
+                System.out.print( sb );
+                System.out.print( "\t" );
+            }
+            if ( ( ( ( i + 1 ) % step ) == 0 ) || ( _msa.getLength() <= length ) ) {
+                if ( realign ) {
+                    realignWithMafft();
+                }
+                final String s = writeOutfile();
+                if ( verbose ) {
+                    System.out.print( "-> " + s );
+                }
+            }
+            if ( verbose ) {
+                System.out.println();
+            }
+            ++i;
+        }
+    }
+
     private void setPathToMafft( final String path_to_mafft ) {
         _path_to_mafft = path_to_mafft;
     }
     private void setPathToMafft( final String path_to_mafft ) {
         _path_to_mafft = path_to_mafft;
     }
@@ -375,6 +389,7 @@ public class MsaCompactor {
                                                    final int length,
                                                    final int step,
                                                    final boolean realign,
                                                    final int length,
                                                    final int step,
                                                    final boolean realign,
+                                                   final boolean norm,
                                                    final String path_to_mafft,
                                                    final File out ) throws IOException, InterruptedException {
         final MsaCompactor mc = new MsaCompactor( msa );
                                                    final String path_to_mafft,
                                                    final File out ) throws IOException, InterruptedException {
         final MsaCompactor mc = new MsaCompactor( msa );
@@ -382,7 +397,7 @@ public class MsaCompactor {
             mc.setPathToMafft( path_to_mafft );
         }
         mc.setOutFileBase( out );
             mc.setPathToMafft( path_to_mafft );
         }
         mc.setOutFileBase( out );
-        mc.removeViaLength( length, step, realign );
+        mc.removeViaLength( length, step, realign, norm, true );
         return mc;
     }
 
         return mc;
     }