cc627519c3625f4c51ce32aa4940589874eb4ea5
[jalview.git] / forester / java / src / org / forester / archaeopteryx / tools / PhylogeneticInferrer.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
4 //
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.archaeopteryx.tools;
27
28 import java.io.BufferedWriter;
29 import java.io.FileWriter;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.List;
33
34 import javax.swing.JOptionPane;
35
36 import org.forester.archaeopteryx.MainFrameApplication;
37 import org.forester.evoinference.distance.NeighborJoiningF;
38 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
39 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
40 import org.forester.evoinference.matrix.distance.DistanceMatrix;
41 import org.forester.evoinference.tools.BootstrapResampler;
42 import org.forester.msa.BasicMsa;
43 import org.forester.msa.Mafft;
44 import org.forester.msa.Msa;
45 import org.forester.msa.Msa.MSA_FORMAT;
46 import org.forester.msa.MsaInferrer;
47 import org.forester.msa.MsaMethods;
48 import org.forester.msa.ResampleableMsa;
49 import org.forester.phylogeny.Phylogeny;
50 import org.forester.phylogeny.PhylogenyMethods;
51 import org.forester.sequence.MolecularSequence;
52 import org.forester.tools.ConfidenceAssessor;
53 import org.forester.util.ForesterUtil;
54
55 public class PhylogeneticInferrer extends RunnableProcess {
56
57     private Msa                                _msa;
58     private final MainFrameApplication         _mf;
59     private final PhylogeneticInferenceOptions _options;
60     private final List<MolecularSequence>      _seqs;
61     private final boolean                      DEBUG           = true;
62     public final static String                 MSA_FILE_SUFFIX = ".aln";
63     public final static String                 PWD_FILE_SUFFIX = ".pwd";
64
65     public PhylogeneticInferrer( final List<MolecularSequence> seqs,
66                                  final PhylogeneticInferenceOptions options,
67                                  final MainFrameApplication mf ) {
68         _msa = null;
69         _seqs = seqs;
70         _mf = mf;
71         _options = options;
72     }
73
74     public PhylogeneticInferrer( final Msa msa,
75                                  final PhylogeneticInferenceOptions options,
76                                  final MainFrameApplication mf ) {
77         _msa = msa;
78         _seqs = null;
79         _mf = mf;
80         _options = options;
81     }
82
83     private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException {
84         //        final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
85         //        if ( DEBUG ) {
86         //            System.out.println();
87         //            System.out.println( "temp file: " + temp_seqs_file );
88         //            System.out.println();
89         //        }
90         //        //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
91         //        final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
92         //        SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
93         //        writer.close();
94         switch ( msa_prg ) {
95             case MAFFT:
96                 return runMAFFT( _seqs, processMafftOptions() );
97             default:
98                 return null;
99         }
100     }
101
102     private List<String> processMafftOptions() {
103         final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
104         final String[] opts_ary = opts_str.split( " " );
105         final List<String> opts = new ArrayList<String>();
106         boolean saw_quiet = false;
107         for( final String opt : opts_ary ) {
108             opts.add( opt );
109             if ( opt.equals( "--quiet" ) ) {
110                 saw_quiet = true;
111             }
112         }
113         if ( !saw_quiet ) {
114             opts.add( "--quiet" );
115         }
116         return opts;
117     }
118
119     private Phylogeny inferPhylogeny( final Msa msa ) {
120         DistanceMatrix m = null;
121         switch ( _options.getPwdDistanceMethod() ) {
122             case KIMURA_DISTANCE:
123                 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
124                 break;
125             case POISSON_DISTANCE:
126                 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
127                 break;
128             case FRACTIONAL_DISSIMILARITY:
129                 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
130                 break;
131             default:
132                 throw new RuntimeException( "invalid pwd method" );
133         }
134         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
135             BufferedWriter pwd_writer;
136             try {
137                 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
138                 m.write( pwd_writer );
139                 pwd_writer.close();
140             }
141             catch ( final IOException e ) {
142                 // TODO Auto-generated catch block
143                 e.printStackTrace();
144             }
145         }
146         final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 );
147         final Phylogeny phy = nj.execute( m );
148         PhylogenyMethods.addMolecularSeqsToTree( phy, msa );
149         PhylogenyMethods.extractFastaInformation( phy );
150         return phy;
151     }
152
153     private void infer() throws InterruptedException {
154         //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
155         if ( ( _msa == null ) && ( _seqs == null ) ) {
156             throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
157         }
158         start( _mf, "phylogenetic inference" );
159         if ( _msa == null ) {
160             Msa msa = null;
161             try {
162                 msa = inferMsa( MSA_PRG.MAFFT );
163             }
164             catch ( final IOException e ) {
165                 end( _mf );
166                 JOptionPane.showMessageDialog( _mf,
167                                                "Could not create multiple sequence alignment with \""
168                                                        + _options.getMsaPrg() + "\" and the following parameters:\n\""
169                                                        + _options.getMsaPrgParameters() + "\"\nError: "
170                                                        + e.getLocalizedMessage(),
171                                                        "Failed to Calculate MSA",
172                                                        JOptionPane.ERROR_MESSAGE );
173                 if ( DEBUG ) {
174                     e.printStackTrace();
175                 }
176                 return;
177             }
178             catch ( final Exception e ) {
179                 end( _mf );
180                 JOptionPane.showMessageDialog( _mf,
181                                                "Could not create multiple sequence alignment with \""
182                                                        + _options.getMsaPrg() + "\" and the following parameters:\n\""
183                                                        + _options.getMsaPrgParameters() + "\"\nError: "
184                                                        + e.getLocalizedMessage(),
185                                                        "Unexpected Exception During MSA Calculation",
186                                                        JOptionPane.ERROR_MESSAGE );
187                 if ( DEBUG ) {
188                     e.printStackTrace();
189                 }
190                 return;
191             }
192             if ( msa == null ) {
193                 end( _mf );
194                 JOptionPane.showMessageDialog( _mf,
195                                                "Could not create multiple sequence alignment with "
196                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
197                                                        + _options.getMsaPrgParameters() + "\"",
198                                                        "Failed to Calculate MSA",
199                                                        JOptionPane.ERROR_MESSAGE );
200                 return;
201             }
202             if ( DEBUG ) {
203                 System.out.println( msa.toString() );
204                 System.out.println( MsaMethods.calcGapRatio( msa ) );
205             }
206             final MsaMethods msa_tools = MsaMethods.createInstance();
207             if ( _options.isExecuteMsaProcessing() ) {
208                 msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
209                                                   _options.getMsaProcessingMinAllowedLength(),
210                                                   msa );
211                 if ( msa == null ) {
212                     end( _mf );
213                     JOptionPane.showMessageDialog( _mf,
214                                                    "Less than two sequences longer than "
215                                                            + _options.getMsaProcessingMinAllowedLength()
216                                                            + " residues left after MSA processing",
217                                                            "MSA Processing Settings Too Stringent",
218                                                            JOptionPane.ERROR_MESSAGE );
219                     return;
220                 }
221             }
222             if ( DEBUG ) {
223                 System.out.println( msa_tools.getIgnoredSequenceIds() );
224                 System.out.println( msa.toString() );
225                 System.out.println( MsaMethods.calcGapRatio( msa ) );
226             }
227             _msa = msa;
228         }
229         final int n = _options.getBootstrapSamples();
230         final long seed = _options.getRandomNumberGeneratorSeed();
231         final Phylogeny master_phy = inferPhylogeny( _msa );
232         if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
233             final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
234             final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
235                                                                                                           .getLength(), n, seed );
236             final Phylogeny[] eval_phys = new Phylogeny[ n ];
237             for( int i = 0; i < n; ++i ) {
238                 resampleable_msa.resample( resampled_column_positions[ i ] );
239                 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
240             }
241             ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
242         }
243         _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
244         //  _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
245         end( _mf );
246         JOptionPane.showMessageDialog( _mf,
247                                        "Inference successfully completed",
248                                        "Inference Completed",
249                                        JOptionPane.INFORMATION_MESSAGE );
250     }
251
252     @Override
253     public void run() {
254         try {
255             infer();
256         }
257         catch ( final InterruptedException e ) {
258             // TODO need to handle this exception SOMEHOW!
259             // TODO Auto-generated catch block
260             e.printStackTrace();
261         }
262     }
263
264     private Msa runMAFFT( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
265     InterruptedException {
266         Msa msa = null;
267         final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
268                                                         .getCanonicalPath() );
269         try {
270             msa = mafft.infer( seqs, opts );
271         }
272         catch ( final IOException e ) {
273             System.out.println( mafft.getErrorDescription() );
274         }
275         return msa;
276     }
277
278     private void writeToFiles( final DistanceMatrix m ) {
279         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
280             try {
281                 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
282                                                                                       + MSA_FILE_SUFFIX ) );
283                 _msa.write( msa_writer, MSA_FORMAT.PHYLIP );
284                 msa_writer.close();
285                 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
286                                                                                       + PWD_FILE_SUFFIX ) );
287                 m.write( pwd_writer );
288                 pwd_writer.close();
289             }
290             catch ( final Exception e ) {
291                 System.out.println( "Error: " + e.getMessage() );
292             }
293         }
294     }
295
296     public enum MSA_PRG {
297         MAFFT;
298     }
299 }