in progress...
[jalview.git] / forester / java / src / org / forester / archaeopteryx / PhylogeneticInferrer.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
4 //
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.archaeopteryx;
27
28 import java.io.BufferedWriter;
29 import java.io.File;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.List;
34
35 import javax.swing.JOptionPane;
36
37 import org.forester.evoinference.distance.NeighborJoining;
38 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
39 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
40 import org.forester.evoinference.tools.BootstrapResampler;
41 import org.forester.io.parsers.FastaParser;
42 import org.forester.io.writers.SequenceWriter;
43 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
44 import org.forester.msa.BasicMsa;
45 import org.forester.msa.Mafft;
46 import org.forester.msa.Msa;
47 import org.forester.msa.MsaInferrer;
48 import org.forester.msa.MsaTools;
49 import org.forester.msa.ResampleableMsa;
50 import org.forester.phylogeny.Phylogeny;
51 import org.forester.sequence.Sequence;
52 import org.forester.tools.ConfidenceAssessor;
53 import org.forester.util.ForesterUtil;
54
55 public class PhylogeneticInferrer implements Runnable {
56
57     private Msa                                _msa;
58     private final MainFrameApplication         _mf;
59     private final PhylogeneticInferenceOptions _options;
60     private final List<Sequence>               _seqs;
61     public final static String                 MSA_FILE_SUFFIX = ".aln";
62     public final static String                 PWD_FILE_SUFFIX = ".pwd";
63
64     public PhylogeneticInferrer( final List<Sequence> seqs,
65                                  final PhylogeneticInferenceOptions options,
66                                  final MainFrameApplication mf ) {
67         _msa = null;
68         _seqs = seqs;
69         _mf = mf;
70         _options = options;
71     }
72
73     public PhylogeneticInferrer( final Msa msa,
74                                  final PhylogeneticInferenceOptions options,
75                                  final MainFrameApplication mf ) {
76         _msa = msa;
77         _seqs = null;
78         _mf = mf;
79         _options = options;
80     }
81
82     private Msa inferMsa() throws IOException {
83         final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
84         System.out.println( "temp file: " + temp_seqs_file );
85         //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
86         final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
87         SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
88         writer.close();
89         final List<String> opts = processMafftOptions();
90         Msa msa = null;
91         try {
92             msa = runMAFFT( temp_seqs_file, opts );
93         }
94         catch ( final InterruptedException e ) {
95             // TODO Auto-generated catch block
96             e.printStackTrace();
97         }
98         // copy aln file to intermediate dir file
99         // delete temp seqs file
100         return msa;
101     }
102
103     private List<String> processMafftOptions() {
104         final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
105         final String[] opts_ary = opts_str.split( " " );
106         final List<String> opts = new ArrayList<String>();
107         boolean saw_quiet = false;
108         for( final String opt : opts_ary ) {
109             opts.add( opt );
110             if ( opt.equals( "--quiet" ) ) {
111                 saw_quiet = true;
112             }
113         }
114         if ( !saw_quiet ) {
115             opts.add( "--quiet" );
116         }
117         return opts;
118     }
119
120     private Phylogeny inferPhylogeny( final Msa msa ) {
121         BasicSymmetricalDistanceMatrix m = null;
122         switch ( _options.getPwdDistanceMethod() ) {
123             case KIMURA_DISTANCE:
124                 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
125                 break;
126             case POISSON_DISTANCE:
127                 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
128                 break;
129             case FRACTIONAL_DISSIMILARITY:
130                 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
131                 break;
132             default:
133                 throw new RuntimeException( "invalid pwd method" );
134         }
135         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
136             BufferedWriter pwd_writer;
137             try {
138                 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
139                 m.write( pwd_writer );
140                 pwd_writer.close();
141             }
142             catch ( final IOException e ) {
143                 // TODO Auto-generated catch block
144                 e.printStackTrace();
145             }
146         }
147         final NeighborJoining nj = new NeighborJoining();
148         final Phylogeny phy = nj.execute( m );
149         FastaParser.extractFastaInformation( phy );
150         return phy;
151     }
152
153     private void infer() {
154         //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
155         if ( ( _msa == null ) && ( _seqs == null ) ) {
156             throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
157         }
158         if ( _msa == null ) {
159             Msa msa = null;
160             try {
161                 msa = inferMsa();
162             }
163             catch ( final IOException e ) {
164                 JOptionPane.showMessageDialog( _mf,
165                                                "Could not create multiple sequence alignment with "
166                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
167                                                        + _options.getMsaPrgParameters() + "\"\nError:"
168                                                        + e.getLocalizedMessage(),
169                                                "Failed to Calculate MSA",
170                                                JOptionPane.ERROR_MESSAGE );
171                 return;
172             }
173             if ( msa == null ) {
174                 JOptionPane.showMessageDialog( _mf,
175                                                "Could not create multiple sequence alignment with "
176                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
177                                                        + _options.getMsaPrgParameters() + "\"",
178                                                "Failed to Calculate MSA",
179                                                JOptionPane.ERROR_MESSAGE );
180                 return;
181             }
182             System.out.println( msa.toString() );
183             System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
184             final MsaTools msa_tools = MsaTools.createInstance();
185             if ( _options.isExecuteMsaProcessing() ) {
186                 msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
187                                                   _options.getMsaProcessingMinAllowedLength(),
188                                                   msa );
189                 if ( msa == null ) {
190                     JOptionPane.showMessageDialog( _mf,
191                                                    "Less than two sequences longer than "
192                                                            + _options.getMsaProcessingMinAllowedLength()
193                                                            + " residues left after MSA processing",
194                                                    "MSA Processing Settings Too Stringent",
195                                                    JOptionPane.ERROR_MESSAGE );
196                     return;
197                 }
198             }
199             System.out.println( msa_tools.getIgnoredSequenceIds() );
200             System.out.println( msa.toString() );
201             System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
202             _msa = msa;
203         }
204         final int n = _options.getBootstrapSamples();
205         final long seed = _options.getRandomNumberGeneratorSeed();
206         final Phylogeny master_phy = inferPhylogeny( _msa );
207         if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
208             final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
209             final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
210                     .getLength(), n, seed );
211             final Phylogeny[] eval_phys = new Phylogeny[ n ];
212             for( int i = 0; i < n; ++i ) {
213                 resampleable_msa.resample( resampled_column_positions[ i ] );
214                 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
215             }
216             ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
217         }
218         _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
219         _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
220         JOptionPane.showMessageDialog( _mf,
221                                        "Inference successfully completed",
222                                        "Inference Completed",
223                                        JOptionPane.INFORMATION_MESSAGE );
224     }
225
226     @Override
227     public void run() {
228         infer();
229     }
230
231     private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
232         Msa msa = null;
233         final MsaInferrer mafft = Mafft.createInstance();
234         try {
235             msa = mafft.infer( input_seqs, opts );
236         }
237         catch ( final IOException e ) {
238             System.out.println( mafft.getErrorDescription() );
239         }
240         return msa;
241     }
242
243     private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
244         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
245             try {
246                 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
247                         + MSA_FILE_SUFFIX ) );
248                 _msa.write( msa_writer );
249                 msa_writer.close();
250                 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
251                         + PWD_FILE_SUFFIX ) );
252                 m.write( pwd_writer );
253                 pwd_writer.close();
254             }
255             catch ( final Exception e ) {
256                 System.out.println( "Error: " + e.getMessage() );
257             }
258         }
259     }
260 }