initial commit
[jalview.git] / forester / java / src / org / forester / archaeopteryx / PhylogeneticInferrer.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
4 //
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
7 // All rights reserved
8 // 
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 // 
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.archaeopteryx;
27
28 import java.io.BufferedWriter;
29 import java.io.File;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.List;
34
35 import javax.swing.JOptionPane;
36
37 import org.forester.evoinference.distance.NeighborJoining;
38 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
39 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
40 import org.forester.evoinference.tools.BootstrapResampler;
41 import org.forester.io.parsers.FastaParser;
42 import org.forester.io.writers.SequenceWriter;
43 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
44 import org.forester.msa.BasicMsa;
45 import org.forester.msa.Mafft;
46 import org.forester.msa.Msa;
47 import org.forester.msa.MsaInferrer;
48 import org.forester.msa.MsaTools;
49 import org.forester.msa.ResampleableMsa;
50 import org.forester.phylogeny.Phylogeny;
51 import org.forester.sequence.Sequence;
52 import org.forester.tools.ConfidenceAssessor;
53 import org.forester.util.ForesterUtil;
54
55 public class PhylogeneticInferrer implements Runnable {
56
57     private Msa                                _msa;
58     private final MainFrameApplication         _mf;
59     private final PhylogeneticInferenceOptions _options;
60     private final List<Sequence>               _seqs;
61     public final static String                 MSA_FILE_SUFFIX = ".aln";
62     public final static String                 PWD_FILE_SUFFIX = ".pwd";
63
64     public PhylogeneticInferrer( final List<Sequence> seqs,
65                                  final PhylogeneticInferenceOptions options,
66                                  final MainFrameApplication mf ) {
67         _msa = null;
68         _seqs = seqs;
69         _mf = mf;
70         _options = options;
71     }
72
73     public PhylogeneticInferrer( final Msa msa,
74                                  final PhylogeneticInferenceOptions options,
75                                  final MainFrameApplication mf ) {
76         _msa = msa;
77         _seqs = null;
78         _mf = mf;
79         _options = options;
80     }
81
82     private Msa inferMsa() throws IOException {
83         final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
84         System.out.println( "temp file: " + temp_seqs_file );
85         //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
86         final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
87         SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
88         writer.close();
89         final List<String> opts = processMafftOptions();
90         Msa msa = null;
91         try {
92             msa = runMAFFT( temp_seqs_file, opts );
93         }
94         catch ( final InterruptedException e ) {
95             // TODO Auto-generated catch block
96             e.printStackTrace();
97         }
98         // copy aln file to intermediate dir file
99         // delete temp seqs file
100         return msa;
101     }
102
103     private List<String> processMafftOptions() {
104         final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
105         final String[] opts_ary = opts_str.split( " " );
106         final List<String> opts = new ArrayList<String>();
107         boolean saw_quiet = false;
108         for( final String opt : opts_ary ) {
109             opts.add( opt );
110             if ( opt.equals( "--quiet" ) ) {
111                 saw_quiet = true;
112             }
113         }
114         if ( !saw_quiet ) {
115             opts.add( "--quiet" );
116         }
117         return opts;
118     }
119
120     private Phylogeny inferPhylogeny( final Msa msa ) {
121         BasicSymmetricalDistanceMatrix m = null;
122         switch ( _options.getPwdDistanceMethod() ) {
123             case KIMURA_DISTANCE:
124                 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
125                 break;
126             case POISSON_DISTANCE:
127                 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
128                 break;
129             case FRACTIONAL_DISSIMILARITY:
130                 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
131                 break;
132             default:
133                 throw new RuntimeException( "invalid pwd method" );
134         }
135         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
136             BufferedWriter pwd_writer;
137             try {
138                 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
139                 m.write( pwd_writer );
140                 pwd_writer.close();
141             }
142             catch ( final IOException e ) {
143                 // TODO Auto-generated catch block
144                 e.printStackTrace();
145             }
146         }
147         final NeighborJoining nj = new NeighborJoining();
148         final Phylogeny phy = nj.execute( m );
149         FastaParser.extractFastaInformation( phy );
150         return phy;
151     }
152
153     private void infer() {
154         //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
155         if ( ( _msa == null ) && ( _seqs == null ) ) {
156             throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
157         }
158         if ( _msa == null ) {
159             Msa msa = null;
160             try {
161                 msa = inferMsa();
162             }
163             catch ( final IOException e ) {
164                 JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with "
165                         + _options.getMsaPrg() + "\nand the following parameters:\n\"" + _options.getMsaPrgParameters()
166                         + "\"\nError:" + e.getLocalizedMessage(), "Failed to Calculate MSA", JOptionPane.ERROR_MESSAGE );
167                 return;
168             }
169             if ( msa == null ) {
170                 JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with "
171                         + _options.getMsaPrg() + "\nand the following parameters:\n\"" + _options.getMsaPrgParameters()
172                         + "\"", "Failed to Calculate MSA", JOptionPane.ERROR_MESSAGE );
173                 return;
174             }
175             System.out.println( msa.toString() );
176             System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
177             final MsaTools msa_tools = MsaTools.createInstance();
178             if ( _options.isExecuteMsaProcessing() ) {
179                 msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(), _options
180                         .getMsaProcessingMinAllowedLength(), msa );
181                 if ( msa == null ) {
182                     JOptionPane.showMessageDialog( _mf,
183                                                    "Less than two sequences longer than "
184                                                            + _options.getMsaProcessingMinAllowedLength()
185                                                            + " residues left after MSA processing",
186                                                    "MSA Processing Settings Too Stringent",
187                                                    JOptionPane.ERROR_MESSAGE );
188                     return;
189                 }
190             }
191             System.out.println( msa_tools.getIgnoredSequenceIds() );
192             System.out.println( msa.toString() );
193             System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
194             _msa = msa;
195         }
196         final int n = _options.getBootstrapSamples();
197         final long seed = _options.getRandomNumberGeneratorSeed();
198         final Phylogeny master_phy = inferPhylogeny( _msa );
199         if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
200             final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
201             final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
202                     .getLength(), n, seed );
203             final Phylogeny[] eval_phys = new Phylogeny[ n ];
204             for( int i = 0; i < n; ++i ) {
205                 resampleable_msa.resample( resampled_column_positions[ i ] );
206                 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
207             }
208             ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
209         }
210         _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
211         _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
212         JOptionPane.showMessageDialog( _mf,
213                                        "Inference successfully completed",
214                                        "Inference Completed",
215                                        JOptionPane.INFORMATION_MESSAGE );
216     }
217
218     @Override
219     public void run() {
220         infer();
221     }
222
223     private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
224         Msa msa = null;
225         final MsaInferrer mafft = Mafft.createInstance();
226         try {
227             msa = mafft.infer( input_seqs, opts );
228         }
229         catch ( final IOException e ) {
230             System.out.println( mafft.getErrorDescription() );
231         }
232         return msa;
233     }
234
235     private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
236         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
237             try {
238                 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options
239                         .getIntermediateFilesBase()
240                         + MSA_FILE_SUFFIX ) );
241                 _msa.write( msa_writer );
242                 msa_writer.close();
243                 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options
244                         .getIntermediateFilesBase()
245                         + PWD_FILE_SUFFIX ) );
246                 m.write( pwd_writer );
247                 pwd_writer.close();
248             }
249             catch ( final Exception e ) {
250                 System.out.println( "Error: " + e.getMessage() );
251             }
252         }
253     }
254 }