refactored
[jalview.git] / forester / java / src / org / forester / archaeopteryx / tools / PhylogeneticInferrer.java
1 // $Id:
2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
4 //
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.archaeopteryx.tools;
27
28 import java.io.BufferedWriter;
29 import java.io.File;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.List;
34
35 import javax.swing.JOptionPane;
36
37 import org.forester.archaeopteryx.MainFrameApplication;
38 import org.forester.evoinference.distance.NeighborJoining;
39 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
40 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
41 import org.forester.evoinference.tools.BootstrapResampler;
42 import org.forester.io.parsers.FastaParser;
43 import org.forester.io.writers.SequenceWriter;
44 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
45 import org.forester.msa.BasicMsa;
46 import org.forester.msa.Mafft;
47 import org.forester.msa.Msa;
48 import org.forester.msa.MsaInferrer;
49 import org.forester.msa.MsaTools;
50 import org.forester.msa.ResampleableMsa;
51 import org.forester.phylogeny.Phylogeny;
52 import org.forester.sequence.Sequence;
53 import org.forester.tools.ConfidenceAssessor;
54 import org.forester.util.ForesterUtil;
55
56 public class PhylogeneticInferrer implements Runnable {
57
58     private Msa                                _msa;
59     private final MainFrameApplication         _mf;
60     private final PhylogeneticInferenceOptions _options;
61     private final List<Sequence>               _seqs;
62     public final static String                 MSA_FILE_SUFFIX = ".aln";
63     public final static String                 PWD_FILE_SUFFIX = ".pwd";
64
65     public PhylogeneticInferrer( final List<Sequence> seqs,
66                                  final PhylogeneticInferenceOptions options,
67                                  final MainFrameApplication mf ) {
68         _msa = null;
69         _seqs = seqs;
70         _mf = mf;
71         _options = options;
72     }
73
74     public PhylogeneticInferrer( final Msa msa,
75                                  final PhylogeneticInferenceOptions options,
76                                  final MainFrameApplication mf ) {
77         _msa = msa;
78         _seqs = null;
79         _mf = mf;
80         _options = options;
81     }
82
83     private Msa inferMsa() throws IOException {
84         final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
85         System.out.println( "temp file: " + temp_seqs_file );
86         //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
87         final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
88         SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
89         writer.close();
90         final List<String> opts = processMafftOptions();
91         Msa msa = null;
92         try {
93             msa = runMAFFT( temp_seqs_file, opts );
94         }
95         catch ( final InterruptedException e ) {
96             // TODO Auto-generated catch block
97             e.printStackTrace();
98         }
99         // copy aln file to intermediate dir file
100         // delete temp seqs file
101         return msa;
102     }
103
104     private List<String> processMafftOptions() {
105         final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
106         final String[] opts_ary = opts_str.split( " " );
107         final List<String> opts = new ArrayList<String>();
108         boolean saw_quiet = false;
109         for( final String opt : opts_ary ) {
110             opts.add( opt );
111             if ( opt.equals( "--quiet" ) ) {
112                 saw_quiet = true;
113             }
114         }
115         if ( !saw_quiet ) {
116             opts.add( "--quiet" );
117         }
118         return opts;
119     }
120
121     private Phylogeny inferPhylogeny( final Msa msa ) {
122         BasicSymmetricalDistanceMatrix m = null;
123         switch ( _options.getPwdDistanceMethod() ) {
124             case KIMURA_DISTANCE:
125                 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
126                 break;
127             case POISSON_DISTANCE:
128                 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
129                 break;
130             case FRACTIONAL_DISSIMILARITY:
131                 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
132                 break;
133             default:
134                 throw new RuntimeException( "invalid pwd method" );
135         }
136         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
137             BufferedWriter pwd_writer;
138             try {
139                 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
140                 m.write( pwd_writer );
141                 pwd_writer.close();
142             }
143             catch ( final IOException e ) {
144                 // TODO Auto-generated catch block
145                 e.printStackTrace();
146             }
147         }
148         final NeighborJoining nj = new NeighborJoining();
149         final Phylogeny phy = nj.execute( m );
150         FastaParser.extractFastaInformation( phy );
151         return phy;
152     }
153
154     private void infer() {
155         //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
156         if ( ( _msa == null ) && ( _seqs == null ) ) {
157             throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
158         }
159         if ( _msa == null ) {
160             Msa msa = null;
161             try {
162                 msa = inferMsa();
163             }
164             catch ( final IOException e ) {
165                 JOptionPane.showMessageDialog( _mf,
166                                                "Could not create multiple sequence alignment with "
167                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
168                                                        + _options.getMsaPrgParameters() + "\"\nError:"
169                                                        + e.getLocalizedMessage(),
170                                                "Failed to Calculate MSA",
171                                                JOptionPane.ERROR_MESSAGE );
172                 return;
173             }
174             if ( msa == null ) {
175                 JOptionPane.showMessageDialog( _mf,
176                                                "Could not create multiple sequence alignment with "
177                                                        + _options.getMsaPrg() + "\nand the following parameters:\n\""
178                                                        + _options.getMsaPrgParameters() + "\"",
179                                                "Failed to Calculate MSA",
180                                                JOptionPane.ERROR_MESSAGE );
181                 return;
182             }
183             System.out.println( msa.toString() );
184             System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
185             final MsaTools msa_tools = MsaTools.createInstance();
186             if ( _options.isExecuteMsaProcessing() ) {
187                 msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
188                                                   _options.getMsaProcessingMinAllowedLength(),
189                                                   msa );
190                 if ( msa == null ) {
191                     JOptionPane.showMessageDialog( _mf,
192                                                    "Less than two sequences longer than "
193                                                            + _options.getMsaProcessingMinAllowedLength()
194                                                            + " residues left after MSA processing",
195                                                    "MSA Processing Settings Too Stringent",
196                                                    JOptionPane.ERROR_MESSAGE );
197                     return;
198                 }
199             }
200             System.out.println( msa_tools.getIgnoredSequenceIds() );
201             System.out.println( msa.toString() );
202             System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
203             _msa = msa;
204         }
205         final int n = _options.getBootstrapSamples();
206         final long seed = _options.getRandomNumberGeneratorSeed();
207         final Phylogeny master_phy = inferPhylogeny( _msa );
208         if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
209             final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
210             final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
211                     .getLength(), n, seed );
212             final Phylogeny[] eval_phys = new Phylogeny[ n ];
213             for( int i = 0; i < n; ++i ) {
214                 resampleable_msa.resample( resampled_column_positions[ i ] );
215                 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
216             }
217             ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
218         }
219         _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
220         _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
221         JOptionPane.showMessageDialog( _mf,
222                                        "Inference successfully completed",
223                                        "Inference Completed",
224                                        JOptionPane.INFORMATION_MESSAGE );
225     }
226
227     @Override
228     public void run() {
229         infer();
230     }
231
232     private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
233         Msa msa = null;
234         final MsaInferrer mafft = Mafft.createInstance();
235         try {
236             msa = mafft.infer( input_seqs, opts );
237         }
238         catch ( final IOException e ) {
239             System.out.println( mafft.getErrorDescription() );
240         }
241         return msa;
242     }
243
244     private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
245         if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
246             try {
247                 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
248                         + MSA_FILE_SUFFIX ) );
249                 _msa.write( msa_writer );
250                 msa_writer.close();
251                 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
252                         + PWD_FILE_SUFFIX ) );
253                 m.write( pwd_writer );
254                 pwd_writer.close();
255             }
256             catch ( final Exception e ) {
257                 System.out.println( "Error: " + e.getMessage() );
258             }
259         }
260     }
261 }