import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Matcher;
import javax.swing.JOptionPane;
+import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.MainFrameApplication;
import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
import org.forester.msa.MsaInferrer;
-import org.forester.msa.MsaTools;
+import org.forester.msa.MsaMethods;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sequence.Sequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
-public class PhylogeneticInferrer implements Runnable {
+public class PhylogeneticInferrer extends RunnableProcess {
private Msa _msa;
private final MainFrameApplication _mf;
private final PhylogeneticInferenceOptions _options;
private final List<Sequence> _seqs;
+ private final boolean DEBUG = true;
public final static String MSA_FILE_SUFFIX = ".aln";
public final static String PWD_FILE_SUFFIX = ".pwd";
_options = options;
}
- private Msa inferMsa() throws IOException {
- final File temp_seqs_file = File.createTempFile( "aptx", ".fasta" );
- System.out.println( "temp file: " + temp_seqs_file );
+ private Msa inferMsa() throws IOException, InterruptedException {
+ final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
+ if ( DEBUG ) {
+ System.out.println();
+ System.out.println( "temp file: " + temp_seqs_file );
+ System.out.println();
+ }
//final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
writer.close();
final List<String> opts = processMafftOptions();
- Msa msa = null;
- try {
- msa = runMAFFT( temp_seqs_file, opts );
- }
- catch ( final InterruptedException e ) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- // copy aln file to intermediate dir file
- // delete temp seqs file
- return msa;
+ return runMAFFT( temp_seqs_file, opts );
}
private List<String> processMafftOptions() {
e.printStackTrace();
}
}
- final NeighborJoining nj = new NeighborJoining();
+ final NeighborJoining nj = NeighborJoining.createInstance();
final Phylogeny phy = nj.execute( m );
- FastaParser.extractFastaInformation( phy );
+ PhylogeneticInferrer.extractFastaInformation( phy );
return phy;
}
- private void infer() {
+ private void infer() throws InterruptedException {
//_mf.getMainPanel().getCurrentTreePanel().setWaitCursor();
if ( ( _msa == null ) && ( _seqs == null ) ) {
throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
}
+ start( _mf, "phylogenetic inference" );
if ( _msa == null ) {
Msa msa = null;
try {
msa = inferMsa();
}
catch ( final IOException e ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
- "Could not create multiple sequence alignment with "
- + _options.getMsaPrg() + "\nand the following parameters:\n\""
- + _options.getMsaPrgParameters() + "\"\nError:"
+ "Could not create multiple sequence alignment with \""
+ + _options.getMsaPrg() + "\" and the following parameters:\n\""
+ + _options.getMsaPrgParameters() + "\"\nError: "
+ e.getLocalizedMessage(),
"Failed to Calculate MSA",
JOptionPane.ERROR_MESSAGE );
+ if ( DEBUG ) {
+ e.printStackTrace();
+ }
+ return;
+ }
+ catch ( final Exception e ) {
+ end( _mf );
+ JOptionPane.showMessageDialog( _mf,
+ "Could not create multiple sequence alignment with \""
+ + _options.getMsaPrg() + "\" and the following parameters:\n\""
+ + _options.getMsaPrgParameters() + "\"\nError: "
+ + e.getLocalizedMessage(),
+ "Unexpected Exception During MSA Calculation",
+ JOptionPane.ERROR_MESSAGE );
+ if ( DEBUG ) {
+ e.printStackTrace();
+ }
return;
}
if ( msa == null ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Could not create multiple sequence alignment with "
+ _options.getMsaPrg() + "\nand the following parameters:\n\""
JOptionPane.ERROR_MESSAGE );
return;
}
- System.out.println( msa.toString() );
- System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
- final MsaTools msa_tools = MsaTools.createInstance();
+ if ( DEBUG ) {
+ System.out.println( msa.toString() );
+ System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() );
+ }
+ final MsaMethods msa_tools = MsaMethods.createInstance();
if ( _options.isExecuteMsaProcessing() ) {
msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
_options.getMsaProcessingMinAllowedLength(),
msa );
if ( msa == null ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Less than two sequences longer than "
+ _options.getMsaProcessingMinAllowedLength()
return;
}
}
- System.out.println( msa_tools.getIgnoredSequenceIds() );
- System.out.println( msa.toString() );
- System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
+ if ( DEBUG ) {
+ System.out.println( msa_tools.getIgnoredSequenceIds() );
+ System.out.println( msa.toString() );
+ System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() );
+ }
_msa = msa;
}
final int n = _options.getBootstrapSamples();
ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
}
_mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
- _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+ // _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Inference successfully completed",
"Inference Completed",
@Override
public void run() {
- infer();
+ try {
+ infer();
+ }
+ catch ( final InterruptedException e ) {
+ // TODO need to handle this exception SOMEHOW!
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
}
private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
}
}
}
+
+ public static void extractFastaInformation( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+ if ( name_m.lookingAt() ) {
+ System.out.println();
+ // System.out.println( name_m.group( 1 ) );
+ // System.out.println( name_m.group( 2 ) );
+ // System.out.println( name_m.group( 3 ) );
+ // System.out.println( name_m.group( 4 ) );
+ final String acc_source = name_m.group( 1 );
+ final String acc = name_m.group( 2 );
+ final String seq_name = name_m.group( 3 );
+ final String tax_sn = name_m.group( 4 );
+ if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+ AptxUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+ }
+ if ( !ForesterUtil.isEmpty( seq_name ) ) {
+ AptxUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setName( seq_name );
+ }
+ if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+ AptxUtil.ensurePresenceOfTaxonomy( node );
+ node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+ }
+ }
+ }
+ }
+ }
}