inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 17 Apr 2014 02:57:48 +0000 (02:57 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 17 Apr 2014 02:57:48 +0000 (02:57 +0000)
forester/java/src/org/forester/application/msa_compactor.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java

index 4230ce0..fd035fe 100644 (file)
@@ -26,9 +26,9 @@ public class msa_compactor {
     final static private String PATH_TO_MAFFT_OPTION                   = "mafft";
     final static private String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn";
     final static private String PRG_NAME                               = "msa_compactor";
-    final static private String PRG_DESC                               = "multiple sequnce aligment compactor";
+    final static private String PRG_DESC                               = "multiple sequence aligment compactor";
     final static private String PRG_VERSION                            = "0.01";
-    final static private String PRG_DATE                               = "140314";
+    final static private String PRG_DATE                               = "140316";
     final static private String E_MAIL                                 = "phylosoft@gmail.com";
     final static private String WWW                                    = "https://sites.google.com/site/cmzmasek/home/software/forester";
 
@@ -84,14 +84,6 @@ public class msa_compactor {
             if ( cla.isOptionSet( DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION ) ) {
                 norm = false;
             }
-            //            else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) {
-            //                step = cla.getOptionValueAsInt( STEP_OPTION );
-            //                window = cla.getOptionValueAsInt( WINDOW_OPTION );
-            //            }
-            //            else {
-            //                printHelp();
-            //                System.exit( 0 );
-            //            }
             if ( realign ) {
                 if ( ForesterUtil.isEmpty( path_to_mafft ) ) {
                     path_to_mafft = MsaCompactor.guessPathToMafft();
@@ -121,11 +113,6 @@ public class msa_compactor {
                 // TODO if < shortest seq -> error
                 mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
             }
-            //System.out.println( MsaMethods.calcGapRatio( mc.getMsa() ) );
-            // for( final String id : mc.getRemovedSeqIds() ) {
-            //     System.out.println( id );
-            //}
-            //mc.writeMsa( out, MSA_FORMAT.PHYLIP, ".aln" );
         }
         catch ( final Exception e ) {
             e.printStackTrace();
@@ -173,8 +160,8 @@ public class msa_compactor {
         System.out.println( "   -" + REMOVE_WORST_OFFENDERS_OPTION
                 + "=<integer>  number of worst offender sequences to remove" );
         System.out.println( "   -" + LENGTH_OPTION + "=<integer>  target MSA length" );
-        System.out.println( "   -" + AV_GAPINESS_OPTION + "=<decimal>  gap %" );
-        System.out.println( "   -" + STEP_OPTION + "=<decimal>  step" );
+        System.out.println( "   -" + AV_GAPINESS_OPTION + "=<decimal>  target gap-ratio (0.0-1.0)" );
+        System.out.println( "   -" + STEP_OPTION + "=<integer>  step (for output and re-aligning)" );
         System.out.println( "   -" + REALIGN_OPTION + "            to realign using MAFFT" + mafft_comment );
         System.out.println();
         System.out.println();
index 2a5c5b1..3092060 100644 (file)
@@ -35,9 +35,8 @@ import org.forester.util.ForesterUtil;
 
 public class MsaCompactor {
 
-    final private static NumberFormat NF_3    = new DecimalFormat( "#.###" );
-    final private static NumberFormat NF_4    = new DecimalFormat( "#.####" );
-    private static final boolean      VERBOSE = false;
+    final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
+    final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
     private Msa                       _msa;
     private File                      _out_file_base;
     private String                    _path_to_mafft;
@@ -123,20 +122,6 @@ public class MsaCompactor {
     final private GapContribution[] calcGapContribtionsStats( final boolean norm ) {
         final GapContribution stats[] = calcGapContribtions( norm );
         Arrays.sort( stats );
-        // for( final GapContribution stat : stats ) {
-        //  final StringBuilder sb = new StringBuilder();
-        //  sb.append( stat.getId() );
-        //  sb.append( "\t" );
-        //  sb.append( NF_4.format( stat.getValue() ) );
-        //  sb.append( "\t" );
-        //            sb.append( NF_4.format( stat.median() ) );
-        //            sb.append( "\t" );
-        //            sb.append( NF_4.format( stat.getMin() ) );
-        //            sb.append( "\t" );
-        //            sb.append( NF_4.format( stat.getMax() ) );
-        //sb.append( "\t" );
-        //System.out.println( sb );
-        // }
         return stats;
     }
 
@@ -248,46 +233,6 @@ public class MsaCompactor {
         }
     }
 
-    final private void removeViaGapAverageOLD( final double mean_gapiness,
-                                               final int step,
-                                               final boolean realign,
-                                               final File outfile,
-                                               final int minimal_effective_length ) throws IOException,
-            InterruptedException {
-        if ( step < 1 ) {
-            throw new IllegalArgumentException( "step cannot be less than 1" );
-        }
-        if ( mean_gapiness < 0 ) {
-            throw new IllegalArgumentException( "target average gap ratio cannot be less than 0" );
-        }
-        if ( VERBOSE ) {
-            System.out.println( "orig: " + msaStatsAsSB() );
-        }
-        if ( minimal_effective_length > 1 ) {
-            _msa = MsaMethods.removeSequencesByMinimalLength( _msa, minimal_effective_length );
-            if ( VERBOSE ) {
-                System.out.println( "short seq removal: " + msaStatsAsSB() );
-            }
-        }
-        int counter = step;
-        double gr;
-        do {
-            removeWorstOffenders( step, 1, false, false, false );
-            if ( realign ) {
-                realignWithMafft();
-            }
-            gr = MsaMethods.calcGapRatio( _msa );
-            if ( VERBOSE ) {
-                System.out.println( counter + ": " + msaStatsAsSB() );
-            }
-            //   write( outfile, gr );
-            counter += step;
-        } while ( gr > mean_gapiness );
-        if ( VERBOSE ) {
-            System.out.println( "final: " + msaStatsAsSB() );
-        }
-    }
-
     final private void removeViaLength( final int length,
                                         final int step,
                                         final boolean realign,