2 // forester -- software libraries and applications
3 // for genomics and evolutionary biology research.
5 // Copyright (C) 2010 Christian M Zmasek
6 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.archaeopteryx;
28 import java.io.BufferedWriter;
30 import java.io.FileWriter;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.List;
35 import javax.swing.JOptionPane;
37 import org.forester.evoinference.distance.NeighborJoining;
38 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
39 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
40 import org.forester.evoinference.tools.BootstrapResampler;
41 import org.forester.io.parsers.FastaParser;
42 import org.forester.io.writers.SequenceWriter;
43 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
44 import org.forester.msa.BasicMsa;
45 import org.forester.msa.Mafft;
46 import org.forester.msa.Msa;
47 import org.forester.msa.MsaInferrer;
48 import org.forester.msa.MsaTools;
49 import org.forester.msa.ResampleableMsa;
50 import org.forester.phylogeny.Phylogeny;
51 import org.forester.sequence.Sequence;
52 import org.forester.tools.ConfidenceAssessor;
53 import org.forester.util.ForesterUtil;
55 public class PhylogeneticInferrer implements Runnable {
58 private final MainFrameApplication _mf;
59 private final PhylogeneticInferenceOptions _options;
60 private final List<Sequence> _seqs;
61 public final static String MSA_FILE_SUFFIX = ".aln";
62 public final static String PWD_FILE_SUFFIX = ".pwd";
64 public PhylogeneticInferrer( final List<Sequence> seqs,
65 final PhylogeneticInferenceOptions options,
66 final MainFrameApplication mf ) {
73 public PhylogeneticInferrer( final Msa msa,
74 final PhylogeneticInferenceOptions options,
75 final MainFrameApplication mf ) {
82 private Msa inferMsa() throws IOException {
83 final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
84 System.out.println( "temp file: " + temp_seqs_file );
85 //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
86 final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
87 SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
89 final List<String> opts = processMafftOptions();
92 msa = runMAFFT( temp_seqs_file, opts );
94 catch ( final InterruptedException e ) {
95 // TODO Auto-generated catch block
98 // copy aln file to intermediate dir file
99 // delete temp seqs file
103 private List<String> processMafftOptions() {
104 final String opts_str = _options.getMsaPrgParameters().trim().toLowerCase();
105 final String[] opts_ary = opts_str.split( " " );
106 final List<String> opts = new ArrayList<String>();
107 boolean saw_quiet = false;
108 for( final String opt : opts_ary ) {
110 if ( opt.equals( "--quiet" ) ) {
115 opts.add( "--quiet" );
120 private Phylogeny inferPhylogeny( final Msa msa ) {
121 BasicSymmetricalDistanceMatrix m = null;
122 switch ( _options.getPwdDistanceMethod() ) {
123 case KIMURA_DISTANCE:
124 m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
126 case POISSON_DISTANCE:
127 m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
129 case FRACTIONAL_DISSIMILARITY:
130 m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
133 throw new RuntimeException( "invalid pwd method" );
135 if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
136 BufferedWriter pwd_writer;
138 pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() + PWD_FILE_SUFFIX ) );
139 m.write( pwd_writer );
142 catch ( final IOException e ) {
143 // TODO Auto-generated catch block
147 final NeighborJoining nj = new NeighborJoining();
148 final Phylogeny phy = nj.execute( m );
149 FastaParser.extractFastaInformation( phy );
153 private void infer() {
154 //_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
155 if ( ( _msa == null ) && ( _seqs == null ) ) {
156 throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
158 if ( _msa == null ) {
163 catch ( final IOException e ) {
164 JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with "
165 + _options.getMsaPrg() + "\nand the following parameters:\n\"" + _options.getMsaPrgParameters()
166 + "\"\nError:" + e.getLocalizedMessage(), "Failed to Calculate MSA", JOptionPane.ERROR_MESSAGE );
170 JOptionPane.showMessageDialog( _mf, "Could not create multiple sequence alignment with "
171 + _options.getMsaPrg() + "\nand the following parameters:\n\"" + _options.getMsaPrgParameters()
172 + "\"", "Failed to Calculate MSA", JOptionPane.ERROR_MESSAGE );
175 System.out.println( msa.toString() );
176 System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
177 final MsaTools msa_tools = MsaTools.createInstance();
178 if ( _options.isExecuteMsaProcessing() ) {
179 msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(), _options
180 .getMsaProcessingMinAllowedLength(), msa );
182 JOptionPane.showMessageDialog( _mf,
183 "Less than two sequences longer than "
184 + _options.getMsaProcessingMinAllowedLength()
185 + " residues left after MSA processing",
186 "MSA Processing Settings Too Stringent",
187 JOptionPane.ERROR_MESSAGE );
191 System.out.println( msa_tools.getIgnoredSequenceIds() );
192 System.out.println( msa.toString() );
193 System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
196 final int n = _options.getBootstrapSamples();
197 final long seed = _options.getRandomNumberGeneratorSeed();
198 final Phylogeny master_phy = inferPhylogeny( _msa );
199 if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
200 final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
201 final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
202 .getLength(), n, seed );
203 final Phylogeny[] eval_phys = new Phylogeny[ n ];
204 for( int i = 0; i < n; ++i ) {
205 resampleable_msa.resample( resampled_column_positions[ i ] );
206 eval_phys[ i ] = inferPhylogeny( resampleable_msa );
208 ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
210 _mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
211 _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
212 JOptionPane.showMessageDialog( _mf,
213 "Inference successfully completed",
214 "Inference Completed",
215 JOptionPane.INFORMATION_MESSAGE );
223 private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
225 final MsaInferrer mafft = Mafft.createInstance();
227 msa = mafft.infer( input_seqs, opts );
229 catch ( final IOException e ) {
230 System.out.println( mafft.getErrorDescription() );
235 private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
236 if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
238 final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options
239 .getIntermediateFilesBase()
240 + MSA_FILE_SUFFIX ) );
241 _msa.write( msa_writer );
243 final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options
244 .getIntermediateFilesBase()
245 + PWD_FILE_SUFFIX ) );
246 m.write( pwd_writer );
249 catch ( final Exception e ) {
250 System.out.println( "Error: " + e.getMessage() );