package org.forester.archaeopteryx.tools;
import java.io.BufferedWriter;
-import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Matcher;
import javax.swing.JOptionPane;
+import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.MainFrameApplication;
import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.tools.BootstrapResampler;
import org.forester.io.parsers.FastaParser;
-import org.forester.io.writers.SequenceWriter;
-import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
import org.forester.msa.BasicMsa;
+import org.forester.msa.ClustalOmega;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
+import org.forester.msa.Msa.MSA_FORMAT;
import org.forester.msa.MsaInferrer;
-import org.forester.msa.MsaTools;
+import org.forester.msa.MsaMethods;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sequence.Sequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
-public class PhylogeneticInferrer implements Runnable {
+public class PhylogeneticInferrer extends RunnableProcess {
private Msa _msa;
private final MainFrameApplication _mf;
private final PhylogeneticInferenceOptions _options;
private final List<Sequence> _seqs;
+ private final boolean DEBUG = true;
public final static String MSA_FILE_SUFFIX = ".aln";
public final static String PWD_FILE_SUFFIX = ".pwd";
_options = options;
}
- private Msa inferMsa() throws IOException {
- final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
- System.out.println( "temp file: " + temp_seqs_file );
- //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
- final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
- SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
- writer.close();
- final List<String> opts = processMafftOptions();
- Msa msa = null;
- try {
- msa = runMAFFT( temp_seqs_file, opts );
- }
- catch ( final InterruptedException e ) {
- // TODO Auto-generated catch block
- e.printStackTrace();
+ private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException {
+ // final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
+ // if ( DEBUG ) {
+ // System.out.println();
+ // System.out.println( "temp file: " + temp_seqs_file );
+ // System.out.println();
+ // }
+ // //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
+ // final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
+ // SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
+ // writer.close();
+ switch ( msa_prg ) {
+ case MAFFT:
+ return runMAFFT( _seqs, processMafftOptions() );
+ case CLUSTAL_O:
+ return runClustalOmega( _seqs, processMafftOptions() );
+ default:
+ return null;
}
- // copy aln file to intermediate dir file
- // delete temp seqs file
- return msa;
}
private List<String> processMafftOptions() {
e.printStackTrace();
}
}
- final NeighborJoining nj = new NeighborJoining();
+ final NeighborJoining nj = NeighborJoining.createInstance();
final Phylogeny phy = nj.execute( m );
- FastaParser.extractFastaInformation( phy );
+ PhylogeneticInferrer.extractFastaInformation( phy );
return phy;
}
- private void infer() {
+ private void infer() throws InterruptedException {
//_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
if ( ( _msa == null ) && ( _seqs == null ) ) {
throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
}
+ start( _mf, "phylogenetic inference" );
if ( _msa == null ) {
Msa msa = null;
try {
- msa = inferMsa();
+ msa = inferMsa( MSA_PRG.MAFFT );
}
catch ( final IOException e ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
- "Could not create multiple sequence alignment with "
- + _options.getMsaPrg() + "\nand the following parameters:\n\""
- + _options.getMsaPrgParameters() + "\"\nError:"
+ "Could not create multiple sequence alignment with \""
+ + _options.getMsaPrg() + "\" and the following parameters:\n\""
+ + _options.getMsaPrgParameters() + "\"\nError: "
+ e.getLocalizedMessage(),
"Failed to Calculate MSA",
JOptionPane.ERROR_MESSAGE );
+ if ( DEBUG ) {
+ e.printStackTrace();
+ }
+ return;
+ }
+ catch ( final Exception e ) {
+ end( _mf );
+ JOptionPane.showMessageDialog( _mf,
+ "Could not create multiple sequence alignment with \""
+ + _options.getMsaPrg() + "\" and the following parameters:\n\""
+ + _options.getMsaPrgParameters() + "\"\nError: "
+ + e.getLocalizedMessage(),
+ "Unexpected Exception During MSA Calculation",
+ JOptionPane.ERROR_MESSAGE );
+ if ( DEBUG ) {
+ e.printStackTrace();
+ }
return;
}
if ( msa == null ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Could not create multiple sequence alignment with "
+ _options.getMsaPrg() + "\nand the following parameters:\n\""
JOptionPane.ERROR_MESSAGE );
return;
}
- System.out.println( msa.toString() );
- System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
- final MsaTools msa_tools = MsaTools.createInstance();
+ if ( DEBUG ) {
+ System.out.println( msa.toString() );
+ System.out.println( MsaMethods.calcGapRatio( msa ) );
+ }
+ final MsaMethods msa_tools = MsaMethods.createInstance();
if ( _options.isExecuteMsaProcessing() ) {
msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
_options.getMsaProcessingMinAllowedLength(),
msa );
if ( msa == null ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Less than two sequences longer than "
+ _options.getMsaProcessingMinAllowedLength()
return;
}
}
- System.out.println( msa_tools.getIgnoredSequenceIds() );
- System.out.println( msa.toString() );
- System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
+ if ( DEBUG ) {
+ System.out.println( msa_tools.getIgnoredSequenceIds() );
+ System.out.println( msa.toString() );
+ System.out.println( MsaMethods.calcGapRatio( msa ) );
+ }
_msa = msa;
}
final int n = _options.getBootstrapSamples();
ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
}
_mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
- _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+ // _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Inference successfully completed",
"Inference Completed",
@Override
public void run() {
- infer();
+ try {
+ infer();
+ }
+ catch ( final InterruptedException e ) {
+ // TODO need to handle this exception SOMEHOW!
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
}
- private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
+ private Msa runMAFFT( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
Msa msa = null;
- final MsaInferrer mafft = Mafft.createInstance();
+ final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
+ .getCanonicalPath() );
try {
- msa = mafft.infer( input_seqs, opts );
+ msa = mafft.infer( seqs, opts );
}
catch ( final IOException e ) {
System.out.println( mafft.getErrorDescription() );
return msa;
}
+ private Msa runClustalOmega( final List<Sequence> seqs, final List<String> opts ) throws IOException,
+ InterruptedException {
+ Msa msa = null;
+ final MsaInferrer clustalo = ClustalOmega.createInstance( _mf.getInferenceManager().getPathToLocalClustalo()
+ .getCanonicalPath() );
+ try {
+ msa = clustalo.infer( seqs, opts );
+ }
+ catch ( final IOException e ) {
+ System.out.println( clustalo.getErrorDescription() );
+ }
+ return msa;
+ }
+
private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
try {
final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
+ MSA_FILE_SUFFIX ) );
- _msa.write( msa_writer );
+ _msa.write( msa_writer, MSA_FORMAT.PHYLIP );
msa_writer.close();
final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
+ PWD_FILE_SUFFIX ) );
}
}
}
+
+ public static void extractFastaInformation( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+ if ( name_m.lookingAt() ) {
+ System.out.println();
+ // System.out.println( name_m.group( 1 ) );
+ // System.out.println( name_m.group( 2 ) );
+ // System.out.println( name_m.group( 3 ) );
+ // System.out.println( name_m.group( 4 ) );
+ final String acc_source = name_m.group( 1 );
+ final String acc = name_m.group( 2 );
+ final String seq_name = name_m.group( 3 );
+ final String tax_sn = name_m.group( 4 );
+ if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+ AptxUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+ }
+ if ( !ForesterUtil.isEmpty( seq_name ) ) {
+ AptxUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setName( seq_name );
+ }
+ if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+ AptxUtil.ensurePresenceOfTaxonomy( node );
+ node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+ }
+ }
+ }
+ }
+ }
+
+ public enum MSA_PRG {
+ MAFFT, CLUSTAL_O;
+ }
}