inprogress
[jalview.git] / forester / java / src / org / forester / archaeopteryx / tools / PhylogeneticInferrer.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
4 //
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.archaeopteryx.tools;
27
28 import java.io.BufferedWriter;
29 import java.io.FileWriter;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.List;
33
34 import javax.swing.JOptionPane;
35
36 import org.forester.archaeopteryx.MainFrameApplication;
37 import org.forester.evoinference.distance.NeighborJoiningF;
38 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
39 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
40 import org.forester.evoinference.tools.BootstrapResampler;
41 import org.forester.msa.BasicMsa;
42 import org.forester.msa.Mafft;
43 import org.forester.msa.Msa;
44 import org.forester.msa.Msa.MSA_FORMAT;
45 import org.forester.msa.MsaInferrer;
46 import org.forester.msa.MsaMethods;
47 import org.forester.msa.ResampleableMsa;
48 import org.forester.phylogeny.Phylogeny;
49 import org.forester.phylogeny.PhylogenyMethods;
50 import org.forester.sequence.Sequence;
51 import org.forester.tools.ConfidenceAssessor;
52 import org.forester.util.ForesterUtil;
53
54 public class PhylogeneticInferrer extends RunnableProcess {
55
56     private Msa                                _msa;
57     private final MainFrameApplication         _mf;
58     private final PhylogeneticInferenceOptions _options;
59     private final List<Sequence>               _seqs;
60     private final boolean                      DEBUG           = true;
61     public final static String                 MSA_FILE_SUFFIX = ".aln";
62     public final static String                 PWD_FILE_SUFFIX = ".pwd";
63
64     public PhylogeneticInferrer( final List<Sequence> seqs,
65                                  final PhylogeneticInferenceOptions options,
66                                  final MainFrameApplication mf ) {
67         _msa = null;
68         _seqs = seqs;
69         _mf = mf;
70         _options = options;
71     }
72
73     public PhylogeneticInferrer( final Msa msa,
74                                  final PhylogeneticInferenceOptions options,
75                                  final MainFrameApplication mf ) {
76         _msa = msa;
77         _seqs = null;
78         _mf = mf;
79         _options = options;
80     }
81
82     private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException {
83         //        final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
84         //        if ( DEBUG ) {
85         //            System.out.println();
86         //            System.out.println( "temp file: " + temp_seqs_file );
87         //            System.out.println();
88         //        }
89         //        //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
90         //        final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
91         //        SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
92         //        writer.close();
93         switch ( msa_prg ) {
94             case MAFFT:
95                 return runMAFFT( _seqs, processMafftOptions() );
96             default:
97                 return null;
98         }
99     }
100
101     private List<String> processMafftOptions() {
102         final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
103         final String[] opts_ary = opts_str.split( " " );
104         final List<String> opts = new ArrayList<String>();
105         boolean saw_quiet = false;
106         for( final String opt : opts_ary ) {
107             opts.add( opt );
108             if ( opt.equals( "--quiet" ) ) {
109                 saw_quiet = true;
110             }
111         }
112         if ( !saw_quiet ) {
113             opts.add( "--quiet" );
114         }
115         return opts;
116     }
117
118     private Phylogeny inferPhylogeny( final Msa msa ) {
119         BasicSymmetricalDistanceMatrix m = null;
120         switch ( _options.getPwdDistanceMethod() ) {
121             case KIMURA_DISTANCE:
122                 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
123                 break;
124             case POISSON_DISTANCE:
125                 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
126                 break;
127             case FRACTIONAL_DISSIMILARITY:
128                 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
129                 break;
130             default:
131                 throw new RuntimeException( "invalid pwd method" );
132         }
133         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
134             BufferedWriter pwd_writer;
135             try {
136                 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
137                 m.write( pwd_writer );
138                 pwd_writer.close();
139             }
140             catch ( final IOException e ) {
141                 // TODO Auto-generated catch block
142                 e.printStackTrace();
143             }
144         }
145         final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 );
146         final Phylogeny phy = nj.execute( m );
147         PhylogenyMethods.extractFastaInformation( phy );
148         return phy;
149     }
150
151     private void infer() throws InterruptedException {
152         //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
153         if ( ( _msa == null ) && ( _seqs == null ) ) {
154             throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
155         }
156         start( _mf, "phylogenetic inference" );
157         if ( _msa == null ) {
158             Msa msa = null;
159             try {
160                 msa = inferMsa( MSA_PRG.MAFFT );
161             }
162             catch ( final IOException e ) {
163                 end( _mf );
164                 JOptionPane.showMessageDialog( _mf,
165                                                "Could not create multiple sequence alignment with \""
166                                                        + _options.getMsaPrg() + "\" and the following parameters:\n\""
167                                                        + _options.getMsaPrgParameters() + "\"\nError: "
168                                                        + e.getLocalizedMessage(),
169                                                "Failed to Calculate MSA",
170                                                JOptionPane.ERROR_MESSAGE );
171                 if ( DEBUG ) {
172                     e.printStackTrace();
173                 }
174                 return;
175             }
176             catch ( final Exception e ) {
177                 end( _mf );
178                 JOptionPane.showMessageDialog( _mf,
179                                                "Could not create multiple sequence alignment with \""
180                                                        + _options.getMsaPrg() + "\" and the following parameters:\n\""
181                                                        + _options.getMsaPrgParameters() + "\"\nError: "
182                                                        + e.getLocalizedMessage(),
183                                                "Unexpected Exception During MSA Calculation",
184                                                JOptionPane.ERROR_MESSAGE );
185                 if ( DEBUG ) {
186                     e.printStackTrace();
187                 }
188                 return;
189             }
190             if ( msa == null ) {
191                 end( _mf );
192                 JOptionPane.showMessageDialog( _mf,
193                                                "Could not create multiple sequence alignment with "
194                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
195                                                        + _options.getMsaPrgParameters() + "\"",
196                                                "Failed to Calculate MSA",
197                                                JOptionPane.ERROR_MESSAGE );
198                 return;
199             }
200             if ( DEBUG ) {
201                 System.out.println( msa.toString() );
202                 System.out.println( MsaMethods.calcGapRatio( msa ) );
203             }
204             final MsaMethods msa_tools = MsaMethods.createInstance();
205             if ( _options.isExecuteMsaProcessing() ) {
206                 msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
207                                                   _options.getMsaProcessingMinAllowedLength(),
208                                                   msa );
209                 if ( msa == null ) {
210                     end( _mf );
211                     JOptionPane.showMessageDialog( _mf,
212                                                    "Less than two sequences longer than "
213                                                            + _options.getMsaProcessingMinAllowedLength()
214                                                            + " residues left after MSA processing",
215                                                    "MSA Processing Settings Too Stringent",
216                                                    JOptionPane.ERROR_MESSAGE );
217                     return;
218                 }
219             }
220             if ( DEBUG ) {
221                 System.out.println( msa_tools.getIgnoredSequenceIds() );
222                 System.out.println( msa.toString() );
223                 System.out.println( MsaMethods.calcGapRatio( msa ) );
224             }
225             _msa = msa;
226         }
227         final int n = _options.getBootstrapSamples();
228         final long seed = _options.getRandomNumberGeneratorSeed();
229         final Phylogeny master_phy = inferPhylogeny( _msa );
230         if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
231             final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
232             final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
233                     .getLength(), n, seed );
234             final Phylogeny[] eval_phys = new Phylogeny[ n ];
235             for( int i = 0; i < n; ++i ) {
236                 resampleable_msa.resample( resampled_column_positions[ i ] );
237                 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
238             }
239             ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
240         }
241         _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
242         //  _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
243         end( _mf );
244         JOptionPane.showMessageDialog( _mf,
245                                        "Inference successfully completed",
246                                        "Inference Completed",
247                                        JOptionPane.INFORMATION_MESSAGE );
248     }
249
250     @Override
251     public void run() {
252         try {
253             infer();
254         }
255         catch ( final InterruptedException e ) {
256             // TODO need to handle this exception SOMEHOW!
257             // TODO Auto-generated catch block
258             e.printStackTrace();
259         }
260     }
261
262     private Msa runMAFFT( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
263         Msa msa = null;
264         final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
265                 .getCanonicalPath() );
266         try {
267             msa = mafft.infer( seqs, opts );
268         }
269         catch ( final IOException e ) {
270             System.out.println( mafft.getErrorDescription() );
271         }
272         return msa;
273     }
274
275     private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
276         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
277             try {
278                 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
279                         + MSA_FILE_SUFFIX ) );
280                 _msa.write( msa_writer, MSA_FORMAT.PHYLIP );
281                 msa_writer.close();
282                 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
283                         + PWD_FILE_SUFFIX ) );
284                 m.write( pwd_writer );
285                 pwd_writer.close();
286             }
287             catch ( final Exception e ) {
288                 System.out.println( "Error: " + e.getMessage() );
289             }
290         }
291     }
292
293     public enum MSA_PRG {
294         MAFFT;
295     }
296 }