// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.archaeopteryx.tools;
import java.io.BufferedWriter;
-import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.util.regex.Matcher;
import javax.swing.JOptionPane;
-import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.MainFrameApplication;
-import org.forester.evoinference.distance.NeighborJoining;
+import org.forester.evoinference.distance.NeighborJoiningF;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.tools.BootstrapResampler;
-import org.forester.io.parsers.FastaParser;
-import org.forester.io.writers.SequenceWriter;
-import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
import org.forester.msa.BasicMsa;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
+import org.forester.msa.Msa.MSA_FORMAT;
import org.forester.msa.MsaInferrer;
import org.forester.msa.MsaMethods;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
-import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.sequence.Sequence;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.sequence.MolecularSequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
-public class PhylogeneticInferrer implements Runnable {
+public class PhylogeneticInferrer extends RunnableProcess {
private Msa _msa;
private final MainFrameApplication _mf;
private final PhylogeneticInferenceOptions _options;
- private final List<Sequence> _seqs;
+ private final List<MolecularSequence> _seqs;
private final boolean DEBUG = true;
public final static String MSA_FILE_SUFFIX = ".aln";
public final static String PWD_FILE_SUFFIX = ".pwd";
- public PhylogeneticInferrer( final List<Sequence> seqs,
+ public PhylogeneticInferrer( final List<MolecularSequence> seqs,
final PhylogeneticInferenceOptions options,
final MainFrameApplication mf ) {
_msa = null;
_options = options;
}
- private Msa inferMsa() throws IOException, InterruptedException {
- final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
- if ( DEBUG ) {
- System.out.println();
- System.out.println( "temp file: " + temp_seqs_file );
- System.out.println();
+ private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException {
+ // final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" );
+ // if ( DEBUG ) {
+ // System.out.println();
+ // System.out.println( "temp file: " + temp_seqs_file );
+ // System.out.println();
+ // }
+ // //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
+ // final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
+ // SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
+ // writer.close();
+ switch ( msa_prg ) {
+ case MAFFT:
+ return runMAFFT( _seqs, processMafftOptions() );
+ default:
+ return null;
}
- //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" );
- final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) );
- SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
- writer.close();
- final List<String> opts = processMafftOptions();
- return runMAFFT( temp_seqs_file, opts );
}
private List<String> processMafftOptions() {
e.printStackTrace();
}
}
- final NeighborJoining nj = NeighborJoining.createInstance();
+ final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 );
final Phylogeny phy = nj.execute( m );
- PhylogeneticInferrer.extractFastaInformation( phy );
+ PhylogenyMethods.addMolecularSeqsToTree( phy, msa );
+ PhylogenyMethods.extractFastaInformation( phy );
return phy;
}
if ( ( _msa == null ) && ( _seqs == null ) ) {
throw new IllegalArgumentException( "cannot run phylogenetic analysis with null msa and seq array" );
}
+ start( _mf, "phylogenetic inference" );
if ( _msa == null ) {
Msa msa = null;
try {
- msa = inferMsa();
+ msa = inferMsa( MSA_PRG.MAFFT );
}
catch ( final IOException e ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Could not create multiple sequence alignment with \""
+ _options.getMsaPrg() + "\" and the following parameters:\n\""
+ _options.getMsaPrgParameters() + "\"\nError: "
+ e.getLocalizedMessage(),
- "Failed to Calculate MSA",
- JOptionPane.ERROR_MESSAGE );
+ "Failed to Calculate MSA",
+ JOptionPane.ERROR_MESSAGE );
if ( DEBUG ) {
e.printStackTrace();
}
return;
}
catch ( final Exception e ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Could not create multiple sequence alignment with \""
+ _options.getMsaPrg() + "\" and the following parameters:\n\""
+ _options.getMsaPrgParameters() + "\"\nError: "
+ e.getLocalizedMessage(),
- "Unexpected Exception During MSA Calculation",
- JOptionPane.ERROR_MESSAGE );
+ "Unexpected Exception During MSA Calculation",
+ JOptionPane.ERROR_MESSAGE );
if ( DEBUG ) {
e.printStackTrace();
}
return;
}
if ( msa == null ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Could not create multiple sequence alignment with "
+ _options.getMsaPrg() + "\nand the following parameters:\n\""
+ _options.getMsaPrgParameters() + "\"",
- "Failed to Calculate MSA",
- JOptionPane.ERROR_MESSAGE );
+ "Failed to Calculate MSA",
+ JOptionPane.ERROR_MESSAGE );
return;
}
if ( DEBUG ) {
System.out.println( msa.toString() );
- System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() );
+ System.out.println( MsaMethods.calcGapRatio( msa ) );
}
final MsaMethods msa_tools = MsaMethods.createInstance();
if ( _options.isExecuteMsaProcessing() ) {
- msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
+ msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
_options.getMsaProcessingMinAllowedLength(),
msa );
if ( msa == null ) {
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Less than two sequences longer than "
+ _options.getMsaProcessingMinAllowedLength()
+ " residues left after MSA processing",
- "MSA Processing Settings Too Stringent",
- JOptionPane.ERROR_MESSAGE );
+ "MSA Processing Settings Too Stringent",
+ JOptionPane.ERROR_MESSAGE );
return;
}
}
if ( DEBUG ) {
System.out.println( msa_tools.getIgnoredSequenceIds() );
System.out.println( msa.toString() );
- System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() );
+ System.out.println( MsaMethods.calcGapRatio( msa ) );
}
_msa = msa;
}
if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
- .getLength(), n, seed );
+ .getLength(), n, seed );
final Phylogeny[] eval_phys = new Phylogeny[ n ];
for( int i = 0; i < n; ++i ) {
resampleable_msa.resample( resampled_column_positions[ i ] );
ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
}
_mf.getMainPanel().addPhylogenyInNewTab( master_phy, _mf.getConfiguration(), "nj", "njpath" );
- _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+ // _mf.getMainPanel().getCurrentTreePanel().setArrowCursor();
+ end( _mf );
JOptionPane.showMessageDialog( _mf,
"Inference successfully completed",
"Inference Completed",
}
}
- private Msa runMAFFT( final File input_seqs, final List<String> opts ) throws IOException, InterruptedException {
+ private Msa runMAFFT( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
+ InterruptedException {
Msa msa = null;
- final MsaInferrer mafft = Mafft.createInstance();
+ final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
+ .getCanonicalPath() );
try {
- msa = mafft.infer( input_seqs, opts );
+ msa = mafft.infer( seqs, opts );
}
catch ( final IOException e ) {
System.out.println( mafft.getErrorDescription() );
if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
try {
final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
- + MSA_FILE_SUFFIX ) );
- _msa.write( msa_writer );
+ + MSA_FILE_SUFFIX ) );
+ _msa.write( msa_writer, MSA_FORMAT.PHYLIP );
msa_writer.close();
final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
- + PWD_FILE_SUFFIX ) );
+ + PWD_FILE_SUFFIX ) );
m.write( pwd_writer );
pwd_writer.close();
}
}
}
- public static void extractFastaInformation( final Phylogeny phy ) {
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
- if ( name_m.lookingAt() ) {
- System.out.println();
- // System.out.println( name_m.group( 1 ) );
- // System.out.println( name_m.group( 2 ) );
- // System.out.println( name_m.group( 3 ) );
- // System.out.println( name_m.group( 4 ) );
- final String acc_source = name_m.group( 1 );
- final String acc = name_m.group( 2 );
- final String seq_name = name_m.group( 3 );
- final String tax_sn = name_m.group( 4 );
- if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
- node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
- }
- if ( !ForesterUtil.isEmpty( seq_name ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
- node.getNodeData().getSequence( 0 ).setName( seq_name );
- }
- if ( !ForesterUtil.isEmpty( tax_sn ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
- node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
- }
- }
- }
- }
+ public enum MSA_PRG {
+ MAFFT;
}
}