Changed more concrete DistanceMatrix references to reference interface
[jalview.git] / forester / java / src / org / forester / archaeopteryx / tools / PhylogeneticInferrer.java
index c5fa04d..cc62751 100644 (file)
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.archaeopteryx.tools;
 
 import java.io.BufferedWriter;
-import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -35,34 +34,35 @@ import java.util.List;
 import javax.swing.JOptionPane;
 
 import org.forester.archaeopteryx.MainFrameApplication;
-import org.forester.evoinference.distance.NeighborJoining;
+import org.forester.evoinference.distance.NeighborJoiningF;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
+import org.forester.evoinference.matrix.distance.DistanceMatrix;
 import org.forester.evoinference.tools.BootstrapResampler;
-import org.forester.io.parsers.FastaParser;
-import org.forester.io.writers.SequenceWriter;
-import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
 import org.forester.msa.BasicMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
+import org.forester.msa.Msa.MSA_FORMAT;
 import org.forester.msa.MsaInferrer;
-import org.forester.msa.MsaTools;
+import org.forester.msa.MsaMethods;
 import org.forester.msa.ResampleableMsa;
 import org.forester.phylogeny.Phylogeny;
-import org.forester.sequence.Sequence;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.sequence.MolecularSequence;
 import org.forester.tools.ConfidenceAssessor;
 import org.forester.util.ForesterUtil;
 
-public class PhylogeneticInferrer implements Runnable {
+public class PhylogeneticInferrer extends RunnableProcess {
 
     private Msa                                _msa;
     private final MainFrameApplication         _mf;
     private final PhylogeneticInferenceOptions _options;
-    private final List<Sequence>               _seqs;
+    private final List<MolecularSequence>      _seqs;
+    private final boolean                      DEBUG           = true;
     public final static String                 MSA_FILE_SUFFIX = ".aln";
     public final static String                 PWD_FILE_SUFFIX = ".pwd";
 
-    public PhylogeneticInferrer( final List<Sequence> seqs,
+    public PhylogeneticInferrer( final List<MolecularSequence> seqs,
                                  final PhylogeneticInferenceOptions options,
                                  final MainFrameApplication mf ) {
         _msa = null;
@@ -80,25 +80,23 @@ public class PhylogeneticInferrer implements Runnable {
         _options = options;
     }
 
-    private Msa inferMsa() throws IOException {
-        final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
-        System.out.println( "temp file: " + temp_seqs_file );
-        //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
-        final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
-        SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
-        writer.close();
-        final List<String> opts = processMafftOptions();
-        Msa msa = null;
-        try {
-            msa = runMAFFT( temp_seqs_file, opts );
-        }
-        catch ( final InterruptedException e ) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
+    private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException {
+        //        final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
+        //        if ( DEBUG ) {
+        //            System.out.println();
+        //            System.out.println( "temp file: " + temp_seqs_file );
+        //            System.out.println();
+        //        }
+        //        //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
+        //        final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
+        //        SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
+        //        writer.close();
+        switch ( msa_prg ) {
+            case MAFFT:
+                return runMAFFT( _seqs, processMafftOptions() );
+            default:
+                return null;
         }
-        // copy aln file to intermediate dir file
-        // delete temp seqs file
-        return msa;
     }
 
     private List<String> processMafftOptions() {
@@ -119,7 +117,7 @@ public class PhylogeneticInferrer implements Runnable {
     }
 
     private Phylogeny inferPhylogeny( final Msa msa ) {
-        BasicSymmetricalDistanceMatrix m = null;
+        DistanceMatrix m = null;
         switch ( _options.getPwdDistanceMethod() ) {
             case KIMURA_DISTANCE:
                 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
@@ -145,61 +143,87 @@ public class PhylogeneticInferrer implements Runnable {
                 e.printStackTrace();
             }
         }
-        final NeighborJoining nj = new NeighborJoining();
+        final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 );
         final Phylogeny phy = nj.execute( m );
-        FastaParser.extractFastaInformation( phy );
+        PhylogenyMethods.addMolecularSeqsToTree( phy, msa );
+        PhylogenyMethods.extractFastaInformation( phy );
         return phy;
     }
 
-    private void infer() {
+    private void infer() throws InterruptedException {
         //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
         if ( ( _msa == null ) && ( _seqs == null ) ) {
             throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
         }
+        start( _mf, "phylogenetic inference" );
         if ( _msa == null ) {
             Msa msa = null;
             try {
-                msa = inferMsa();
+                msa = inferMsa( MSA_PRG.MAFFT );
             }
             catch ( final IOException e ) {
+                end( _mf );
                 JOptionPane.showMessageDialog( _mf,
-                                               "Could not create multiple sequence alignment with "
-                                                       + _options.getMsaPrg() + "\nand the following parameters:\n\""
-                                                       + _options.getMsaPrgParameters() + "\"\nError:"
+                                               "Could not create multiple sequence alignment with \""
+                                                       + _options.getMsaPrg() + "\" and the following parameters:\n\""
+                                                       + _options.getMsaPrgParameters() + "\"\nError: "
                                                        + e.getLocalizedMessage(),
-                                               "Failed to Calculate MSA",
-                                               JOptionPane.ERROR_MESSAGE );
+                                                       "Failed to Calculate MSA",
+                                                       JOptionPane.ERROR_MESSAGE );
+                if ( DEBUG ) {
+                    e.printStackTrace();
+                }
+                return;
+            }
+            catch ( final Exception e ) {
+                end( _mf );
+                JOptionPane.showMessageDialog( _mf,
+                                               "Could not create multiple sequence alignment with \""
+                                                       + _options.getMsaPrg() + "\" and the following parameters:\n\""
+                                                       + _options.getMsaPrgParameters() + "\"\nError: "
+                                                       + e.getLocalizedMessage(),
+                                                       "Unexpected Exception During MSA Calculation",
+                                                       JOptionPane.ERROR_MESSAGE );
+                if ( DEBUG ) {
+                    e.printStackTrace();
+                }
                 return;
             }
             if ( msa == null ) {
+                end( _mf );
                 JOptionPane.showMessageDialog( _mf,
                                                "Could not create multiple sequence alignment with "
                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
                                                        + _options.getMsaPrgParameters() + "\"",
-                                               "Failed to Calculate MSA",
-                                               JOptionPane.ERROR_MESSAGE );
+                                                       "Failed to Calculate MSA",
+                                                       JOptionPane.ERROR_MESSAGE );
                 return;
             }
-            System.out.println( msa.toString() );
-            System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
-            final MsaTools msa_tools = MsaTools.createInstance();
+            if ( DEBUG ) {
+                System.out.println( msa.toString() );
+                System.out.println( MsaMethods.calcGapRatio( msa ) );
+            }
+            final MsaMethods msa_tools = MsaMethods.createInstance();
             if ( _options.isExecuteMsaProcessing() ) {
-                msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
+                msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
                                                   _options.getMsaProcessingMinAllowedLength(),
                                                   msa );
                 if ( msa == null ) {
+                    end( _mf );
                     JOptionPane.showMessageDialog( _mf,
                                                    "Less than two sequences longer than "
                                                            + _options.getMsaProcessingMinAllowedLength()
                                                            + " residues left after MSA processing",
-                                                   "MSA Processing Settings Too Stringent",
-                                                   JOptionPane.ERROR_MESSAGE );
+                                                           "MSA Processing Settings Too Stringent",
+                                                           JOptionPane.ERROR_MESSAGE );
                     return;
                 }
             }
-            System.out.println( msa_tools.getIgnoredSequenceIds() );
-            System.out.println( msa.toString() );
-            System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
+            if ( DEBUG ) {
+                System.out.println( msa_tools.getIgnoredSequenceIds() );
+                System.out.println( msa.toString() );
+                System.out.println( MsaMethods.calcGapRatio( msa ) );
+            }
             _msa = msa;
         }
         final int n = _options.getBootstrapSamples();
@@ -208,7 +232,7 @@ public class PhylogeneticInferrer implements Runnable {
         if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
             final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
             final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
-                    .getLength(), n, seed );
+                                                                                                          .getLength(), n, seed );
             final Phylogeny[] eval_phys = new Phylogeny[ n ];
             for( int i = 0; i < n; ++i ) {
                 resampleable_msa.resample( resampled_column_positions[ i ] );
@@ -217,7 +241,8 @@ public class PhylogeneticInferrer implements Runnable {
             ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
         }
         _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
-        _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+        //  _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+        end( _mf );
         JOptionPane.showMessageDialog( _mf,
                                        "Inference successfully completed",
                                        "Inference Completed",
@@ -226,14 +251,23 @@ public class PhylogeneticInferrer implements Runnable {
 
     @Override
     public void run() {
-        infer();
+        try {
+            infer();
+        }
+        catch ( final InterruptedException e ) {
+            // TODO need to handle this exception SOMEHOW!
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
     }
 
-    private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
+    private Msa runMAFFT( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
+    InterruptedException {
         Msa msa = null;
-        final MsaInferrer mafft = Mafft.createInstance();
+        final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
+                                                        .getCanonicalPath() );
         try {
-            msa = mafft.infer( input_seqs, opts );
+            msa = mafft.infer( seqs, opts );
         }
         catch ( final IOException e ) {
             System.out.println( mafft.getErrorDescription() );
@@ -241,15 +275,15 @@ public class PhylogeneticInferrer implements Runnable {
         return msa;
     }
 
-    private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
+    private void writeToFiles( final DistanceMatrix m ) {
         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
             try {
                 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
-                        + MSA_FILE_SUFFIX ) );
-                _msa.write( msa_writer );
+                                                                                      + MSA_FILE_SUFFIX ) );
+                _msa.write( msa_writer, MSA_FORMAT.PHYLIP );
                 msa_writer.close();
                 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
-                        + PWD_FILE_SUFFIX ) );
+                                                                                      + PWD_FILE_SUFFIX ) );
                 m.write( pwd_writer );
                 pwd_writer.close();
             }
@@ -258,4 +292,8 @@ public class PhylogeneticInferrer implements Runnable {
             }
         }
     }
+
+    public enum MSA_PRG {
+        MAFFT;
+    }
 }