2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.archaeopteryx.tools;
28 import java.io.BufferedWriter;
29 import java.io.FileWriter;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.List;
34 import javax.swing.JOptionPane;
36 import org.forester.archaeopteryx.MainFrameApplication;
37 import org.forester.evoinference.distance.NeighborJoiningF;
38 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
39 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
40 import org.forester.evoinference.matrix.distance.DistanceMatrix;
41 import org.forester.evoinference.tools.BootstrapResampler;
42 import org.forester.msa.BasicMsa;
43 import org.forester.msa.Mafft;
44 import org.forester.msa.Msa;
45 import org.forester.msa.Msa.MSA_FORMAT;
46 import org.forester.msa.MsaInferrer;
47 import org.forester.msa.MsaMethods;
48 import org.forester.msa.ResampleableMsa;
49 import org.forester.phylogeny.Phylogeny;
50 import org.forester.phylogeny.PhylogenyMethods;
51 import org.forester.sequence.MolecularSequence;
52 import org.forester.tools.ConfidenceAssessor;
53 import org.forester.util.ForesterUtil;
55 public class PhylogeneticInferrer extends RunnableProcess {
58 private final MainFrameApplication _mf;
59 private final PhylogeneticInferenceOptions _options;
60 private final List<MolecularSequence> _seqs;
61 private final boolean DEBUG = true;
62 public final static String MSA_FILE_SUFFIX = ".aln";
63 public final static String PWD_FILE_SUFFIX = ".pwd";
65 public PhylogeneticInferrer( final List<MolecularSequence> seqs,
66 final PhylogeneticInferenceOptions options,
67 final MainFrameApplication mf ) {
74 public PhylogeneticInferrer( final Msa msa,
75 final PhylogeneticInferenceOptions options,
76 final MainFrameApplication mf ) {
83 private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException {
84 // final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
86 // System.out.println();
87 // System.out.println( "temp file: " + temp_seqs_file );
88 // System.out.println();
90 // //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
91 // final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
92 // SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
96 return runMAFFT( _seqs, processMafftOptions() );
102 private List<String> processMafftOptions() {
103 final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
104 final String[] opts_ary = opts_str.split( " " );
105 final List<String> opts = new ArrayList<String>();
106 boolean saw_quiet = false;
107 for( final String opt : opts_ary ) {
109 if ( opt.equals( "--quiet" ) ) {
114 opts.add( "--quiet" );
119 private Phylogeny inferPhylogeny( final Msa msa ) {
120 DistanceMatrix m = null;
121 switch ( _options.getPwdDistanceMethod() ) {
122 case KIMURA_DISTANCE:
123 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
125 case POISSON_DISTANCE:
126 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
128 case FRACTIONAL_DISSIMILARITY:
129 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
132 throw new RuntimeException( "invalid pwd method" );
134 if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
135 BufferedWriter pwd_writer;
137 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
138 m.write( pwd_writer );
141 catch ( final IOException e ) {
142 // TODO Auto-generated catch block
146 final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 );
147 final Phylogeny phy = nj.execute( m );
148 PhylogenyMethods.addMolecularSeqsToTree( phy, msa );
149 PhylogenyMethods.extractFastaInformation( phy );
153 private void infer() throws InterruptedException {
154 //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
155 if ( ( _msa == null ) && ( _seqs == null ) ) {
156 throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
158 start( _mf, "phylogenetic inference" );
159 if ( _msa == null ) {
162 msa = inferMsa( MSA_PRG.MAFFT );
164 catch ( final IOException e ) {
166 JOptionPane.showMessageDialog( _mf,
167 "Could not create multiple sequence alignment with \""
168 + _options.getMsaPrg() + "\" and the following parameters:\n\""
169 + _options.getMsaPrgParameters() + "\"\nError: "
170 + e.getLocalizedMessage(),
171 "Failed to Calculate MSA",
172 JOptionPane.ERROR_MESSAGE );
178 catch ( final Exception e ) {
180 JOptionPane.showMessageDialog( _mf,
181 "Could not create multiple sequence alignment with \""
182 + _options.getMsaPrg() + "\" and the following parameters:\n\""
183 + _options.getMsaPrgParameters() + "\"\nError: "
184 + e.getLocalizedMessage(),
185 "Unexpected Exception During MSA Calculation",
186 JOptionPane.ERROR_MESSAGE );
194 JOptionPane.showMessageDialog( _mf,
195 "Could not create multiple sequence alignment with "
196 + _options.getMsaPrg() + "\nand the following parameters:\n\""
197 + _options.getMsaPrgParameters() + "\"",
198 "Failed to Calculate MSA",
199 JOptionPane.ERROR_MESSAGE );
203 System.out.println( msa.toString() );
204 System.out.println( MsaMethods.calcGapRatio( msa ) );
206 final MsaMethods msa_tools = MsaMethods.createInstance();
207 if ( _options.isExecuteMsaProcessing() ) {
208 msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
209 _options.getMsaProcessingMinAllowedLength(),
213 JOptionPane.showMessageDialog( _mf,
214 "Less than two sequences longer than "
215 + _options.getMsaProcessingMinAllowedLength()
216 + " residues left after MSA processing",
217 "MSA Processing Settings Too Stringent",
218 JOptionPane.ERROR_MESSAGE );
223 System.out.println( msa_tools.getIgnoredSequenceIds() );
224 System.out.println( msa.toString() );
225 System.out.println( MsaMethods.calcGapRatio( msa ) );
229 final int n = _options.getBootstrapSamples();
230 final long seed = _options.getRandomNumberGeneratorSeed();
231 final Phylogeny master_phy = inferPhylogeny( _msa );
232 if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
233 final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
234 final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
235 .getLength(), n, seed );
236 final Phylogeny[] eval_phys = new Phylogeny[ n ];
237 for( int i = 0; i < n; ++i ) {
238 resampleable_msa.resample( resampled_column_positions[ i ] );
239 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
241 ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
243 _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
244 // _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
246 JOptionPane.showMessageDialog( _mf,
247 "Inference successfully completed",
248 "Inference Completed",
249 JOptionPane.INFORMATION_MESSAGE );
257 catch ( final InterruptedException e ) {
258 // TODO need to handle this exception SOMEHOW!
259 // TODO Auto-generated catch block
264 private Msa runMAFFT( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
265 InterruptedException {
267 final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
268 .getCanonicalPath() );
270 msa = mafft.infer( seqs, opts );
272 catch ( final IOException e ) {
273 System.out.println( mafft.getErrorDescription() );
278 private void writeToFiles( final DistanceMatrix m ) {
279 if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
281 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
282 + MSA_FILE_SUFFIX ) );
283 _msa.write( msa_writer, MSA_FORMAT.PHYLIP );
285 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
286 + PWD_FILE_SUFFIX ) );
287 m.write( pwd_writer );
290 catch ( final Exception e ) {
291 System.out.println( "Error: " + e.getMessage() );
296 public enum MSA_PRG {