inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 28 Apr 2014 19:49:11 +0000 (19:49 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 28 Apr 2014 19:49:11 +0000 (19:49 +0000)
forester/java/src/org/forester/application/msa_compactor.java
forester/java/src/org/forester/msa/DeleteableMsa.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java

index c0b0663..d5b694f 100644 (file)
@@ -126,6 +126,10 @@ public class msa_compactor {
             }
             final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa );
             System.out.println( initial_msa_stats.toString() );
+            if ( ( cla.isOptionSet( LENGTH_OPTION ) || cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla
+                    .isOptionSet( AV_GAPINESS_OPTION ) ) && ( out == null ) ) {
+                ForesterUtil.fatalError( PRG_NAME, "outfile file missing" );
+            }
             if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) {
                 worst_remove = cla.getOptionValueAsInt( REMOVE_WORST_OFFENDERS_OPTION );
                 if ( ( worst_remove < 1 ) || ( worst_remove >= msa.getNumberOfSequences() - 1 ) ) {
@@ -216,6 +220,11 @@ public class msa_compactor {
                     }
                 }
             }
+            if ( ( !cla.isOptionSet( LENGTH_OPTION ) && !cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) && !cla
+                    .isOptionSet( AV_GAPINESS_OPTION ) ) && ( ( out != null ) || ( removed_seqs_out_base != null ) ) ) {
+                ForesterUtil.fatalError( PRG_NAME,
+                                         "chart only, no outfile(s) produced (no need to indicate output file(s))" );
+            }
             ForesterUtil.printProgramInformation( PRG_NAME,
                                                   PRG_DESC,
                                                   PRG_VERSION,
@@ -254,7 +263,6 @@ public class msa_compactor {
             System.out.println( "Step for diagnostics reports         : "
                     + ( step_for_diagnostics > 1 ? step_for_diagnostics : 1 ) );
             System.out.println( "Calculate mean identity              : " + report_aln_mean_identity );
-            
             if ( !norm ) {
                 System.out.println( "Normalize                            : " + norm );
             }
@@ -279,6 +287,9 @@ public class msa_compactor {
                 }
                 mc.setNorm( norm );
                 mc.setOutFileBase( out );
+                if ( removed_seqs_out_base != null ) {
+                    mc.setRemovedSeqsOutBase( removed_seqs_out_base );
+                }
                 if ( step > 1 ) {
                     mc.setStep( step );
                 }
@@ -295,7 +306,15 @@ public class msa_compactor {
                 }
                 mc.setNorm( norm );
                 mc.setOutFileBase( out );
-                mc.setStep( step );
+                if ( removed_seqs_out_base != null ) {
+                    mc.setRemovedSeqsOutBase( removed_seqs_out_base );
+                }
+                if ( step > 1 ) {
+                    mc.setStep( step );
+                }
+                if ( step_for_diagnostics > 1 ) {
+                    mc.setStepForDiagnostics( step_for_diagnostics );
+                }
                 mc.removeViaGapAverage( av_gap );
             }
             else if ( length > 0 ) {
@@ -306,8 +325,15 @@ public class msa_compactor {
                     mc.setPathToMafft( path_to_mafft );
                 }
                 mc.setNorm( norm );
-                mc.setOutFileBase( out );
-                mc.setStep( step );
+                if ( removed_seqs_out_base != null ) {
+                    mc.setRemovedSeqsOutBase( removed_seqs_out_base );
+                }
+                if ( step > 1 ) {
+                    mc.setStep( step );
+                }
+                if ( step_for_diagnostics > 1 ) {
+                    mc.setStepForDiagnostics( step_for_diagnostics );
+                }
                 mc.removeViaLength( length );
             }
             else {
@@ -377,7 +403,7 @@ public class msa_compactor {
         }
         System.out.println( "Usage:" );
         System.out.println();
-        System.out.println( PRG_NAME + " <options> <msa input file> <output file>" );
+        System.out.println( PRG_NAME + " <options> <msa input file> <output file base>" );
         System.out.println();
         System.out.println( " options: " );
         System.out.println();
@@ -394,8 +420,10 @@ public class msa_compactor {
                 + "=<integer>  minimal effecive sequence length (for deleting of shorter sequences)" );
         System.out.println( "   -" + GAP_RATIO_LENGTH_OPTION
                 + "=<decimal>  maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" );
-        System.out.println( "   -" + REPORT_ALN_MEAN_IDENTITY
-                + "             to calculate mean MSA column identity (\"MSA quality\")  (not recommended for very large alignments)" );
+        System.out
+                .println( "   -"
+                        + REPORT_ALN_MEAN_IDENTITY
+                        + "             to calculate mean MSA column identity (\"MSA quality\")  (not recommended for very large alignments)" );
         System.out.println( "   -" + OUTPUT_FORMAT_PHYLIP_OPTION
                 + "             to write output alignments in phylip format instead of fasta" );
         System.out.println( "   -" + OUTPUT_REMOVED_SEQS_OPTION + "=<file>     to output the removed sequences" );
index 05bf074..a781bf6 100644 (file)
@@ -60,7 +60,7 @@ public final class DeleteableMsa extends BasicMsa {
         }
         return max;
     }
-    
+
     final public void deleteGapColumns( final double max_allowed_gap_ratio ) {
         if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) {
             throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio );
index 30b66d9..374d3bb 100644 (file)
@@ -41,7 +41,8 @@ import org.forester.evoinference.distance.PairwiseDistanceCalculator;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD;
 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
 import org.forester.evoinference.tools.BootstrapResampler;
-import org.forester.msa.BasicMsa;
+import org.forester.io.writers.SequenceWriter;
+import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
 import org.forester.msa.DeleteableMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
@@ -57,22 +58,26 @@ import org.forester.util.ForesterUtil;
 
 public class MsaCompactor {
 
-    final private static NumberFormat NF_3                      = new DecimalFormat( "#.###" );
-    final private static NumberFormat NF_4                      = new DecimalFormat( "#.####" );
-    private double                    _gap_ratio                = -1;
+    final private static NumberFormat NF_3                   = new DecimalFormat( "#.###" );
+    final private static NumberFormat NF_4                   = new DecimalFormat( "#.####" );
+    private double                    _gap_ratio             = -1;
     //
-    private final String              _maffts_opts              = "--auto";
-    private int                       _min_length               = -1;
+    private final String              _maffts_opts           = "--auto";
+    private int                       _min_length            = -1;
     //
-    private DeleteableMsa             _msa                      = null;
-    private boolean                   _norm                     = true;
-    private File                      _out_file_base            = null;
-    private MSA_FORMAT                _output_format            = MSA_FORMAT.FASTA;
-    private String                    _path_to_mafft            = null;
+    private DeleteableMsa             _msa                   = null;
+    private boolean                   _norm                  = true;
+    private File                      _out_file_base         = null;
+    private MSA_FORMAT                _output_format         = MSA_FORMAT.FASTA;
+    private String                    _path_to_mafft         = null;
     //
-    private boolean                   _realign                  = false;
+    private boolean                   _realign               = false;
     private final SortedSet<String>   _removed_seq_ids;
-    private final File                _removed_seqs_out_base    = null;
+    private File                      _removed_seqs_out_base = null;
+
+    public final void setRemovedSeqsOutBase( final File removed_seqs_out_base ) {
+        _removed_seqs_out_base = removed_seqs_out_base;
+    }
     private boolean                   _report_aln_mean_identity = false;
     private int                       _step                     = -1;
     private int                       _step_for_diagnostics     = -1;
@@ -123,8 +128,10 @@ public class MsaCompactor {
             System.out.println();
             ++i;
         }
-        final String msg = writeAndAlignRemovedSeqs();
-        System.out.println( msg );
+        if ( _removed_seqs_out_base != null ) {
+            final String msg = writeAndAlignRemovedSeqs();
+            System.out.println( msg );
+        }
     }
 
     public void removeViaLength( final int length ) throws IOException, InterruptedException {
@@ -149,8 +156,10 @@ public class MsaCompactor {
             System.out.println();
             ++i;
         }
-        final String msg = writeAndAlignRemovedSeqs();
-        System.out.println( msg );
+        if ( _removed_seqs_out_base != null ) {
+            final String msg = writeAndAlignRemovedSeqs();
+            System.out.println( msg );
+        }
     }
 
     public final void removeWorstOffenders( final int to_remove ) throws IOException, InterruptedException {
@@ -177,8 +186,10 @@ public class MsaCompactor {
                 System.out.println();
             }
         }
-        final String msg = writeAndAlignRemovedSeqs();
-        System.out.println( msg );
+        if ( _removed_seqs_out_base != null ) {
+            final String msg = writeAndAlignRemovedSeqs();
+            System.out.println( msg );
+        }
     }
 
     public final List<MsaProperties> chart( final int step, final boolean realign, final boolean norm )
@@ -278,10 +289,9 @@ public class MsaCompactor {
 
     final public String writeAndAlignRemovedSeqs() throws IOException, InterruptedException {
         final StringBuilder msg = new StringBuilder();
-        final Msa removed = BasicMsa.createInstance( _removed_seqs );
-        final String n = _removed_seqs_out_base + "_" + removed.getNumberOfSequences() + ".fasta";
-        writeMsa( removed, n, MSA_FORMAT.FASTA );
-        msg.append( "wrote " + removed.getNumberOfSequences() + " removed sequences to " + n );
+        final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta";
+        SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 );
+        msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + n );
         if ( _realign ) {
             final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft );
             final List<String> opts = new ArrayList<String>();
@@ -402,7 +412,7 @@ public class MsaCompactor {
     }
 
     private final void printMsaProperties( final String id, final MsaProperties msa_properties ) {
-        System.out.print( ForesterUtil.pad( _longest_id_length + 1, 20, ' ', false ) );
+        System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) );
         System.out.print( "\t" );
         final StringBuilder sb = msaPropertiesAsSB( msa_properties );
         System.out.print( sb );
@@ -493,7 +503,7 @@ public class MsaCompactor {
     }
 
     private final void printTableHeader() {
-        System.out.print( ForesterUtil.pad( "Id", _longest_id_length + 1, ' ', false ) );
+        System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) );
         System.out.print( "\t" );
         System.out.print( "Seqs" );
         System.out.print( "\t" );