// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.archaeopteryx.tools;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.util.regex.Matcher;
import javax.swing.JOptionPane;
-import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.MainFrameApplication;
-import org.forester.evoinference.distance.NeighborJoining;
+import org.forester.evoinference.distance.NeighborJoiningF;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.tools.BootstrapResampler;
-import org.forester.io.parsers.FastaParser;
import org.forester.msa.BasicMsa;
-import org.forester.msa.ClustalOmega;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
import org.forester.msa.Msa.MSA_FORMAT;
import org.forester.msa.MsaMethods;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
-import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.sequence.Sequence;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.sequence.MolecularSequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
private Msa _msa;
private final MainFrameApplication _mf;
private final PhylogeneticInferenceOptions _options;
- private final List<Sequence> _seqs;
+ private final List<MolecularSequence> _seqs;
private final boolean DEBUG = true;
public final static String MSA_FILE_SUFFIX = ".aln";
public final static String PWD_FILE_SUFFIX = ".pwd";
- public PhylogeneticInferrer( final List<Sequence> seqs,
+ public PhylogeneticInferrer( final List<MolecularSequence> seqs,
final PhylogeneticInferenceOptions options,
final MainFrameApplication mf ) {
_msa = null;
switch ( msa_prg ) {
case MAFFT:
return runMAFFT( _seqs, processMafftOptions() );
- case CLUSTAL_O:
- return runClustalOmega( _seqs, processMafftOptions() );
default:
return null;
}
e.printStackTrace();
}
}
- final NeighborJoining nj = NeighborJoining.createInstance();
+ final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 );
final Phylogeny phy = nj.execute( m );
- PhylogeneticInferrer.extractFastaInformation( phy );
+ PhylogenyMethods.addMolecularSeqsToTree( phy, msa );
+ PhylogenyMethods.extractFastaInformation( phy );
return phy;
}
+ _options.getMsaPrg() + "\" and the following parameters:\n\""
+ _options.getMsaPrgParameters() + "\"\nError: "
+ e.getLocalizedMessage(),
- "Failed to Calculate MSA",
- JOptionPane.ERROR_MESSAGE );
+ "Failed to Calculate MSA",
+ JOptionPane.ERROR_MESSAGE );
if ( DEBUG ) {
e.printStackTrace();
}
+ _options.getMsaPrg() + "\" and the following parameters:\n\""
+ _options.getMsaPrgParameters() + "\"\nError: "
+ e.getLocalizedMessage(),
- "Unexpected Exception During MSA Calculation",
- JOptionPane.ERROR_MESSAGE );
+ "Unexpected Exception During MSA Calculation",
+ JOptionPane.ERROR_MESSAGE );
if ( DEBUG ) {
e.printStackTrace();
}
"Could not create multiple sequence alignment with "
+ _options.getMsaPrg() + "\nand the following parameters:\n\""
+ _options.getMsaPrgParameters() + "\"",
- "Failed to Calculate MSA",
- JOptionPane.ERROR_MESSAGE );
+ "Failed to Calculate MSA",
+ JOptionPane.ERROR_MESSAGE );
return;
}
if ( DEBUG ) {
}
final MsaMethods msa_tools = MsaMethods.createInstance();
if ( _options.isExecuteMsaProcessing() ) {
- msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
+ msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
_options.getMsaProcessingMinAllowedLength(),
msa );
if ( msa == null ) {
"Less than two sequences longer than "
+ _options.getMsaProcessingMinAllowedLength()
+ " residues left after MSA processing",
- "MSA Processing Settings Too Stringent",
- JOptionPane.ERROR_MESSAGE );
+ "MSA Processing Settings Too Stringent",
+ JOptionPane.ERROR_MESSAGE );
return;
}
}
if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) {
final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa
- .getLength(), n, seed );
+ .getLength(), n, seed );
final Phylogeny[] eval_phys = new Phylogeny[ n ];
for( int i = 0; i < n; ++i ) {
resampleable_msa.resample( resampled_column_positions[ i ] );
}
}
- private Msa runMAFFT( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
+ private Msa runMAFFT( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
+ InterruptedException {
Msa msa = null;
final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
- .getCanonicalPath() );
+ .getCanonicalPath() );
try {
msa = mafft.infer( seqs, opts );
}
return msa;
}
- private Msa runClustalOmega( final List<Sequence> seqs, final List<String> opts ) throws IOException,
- InterruptedException {
- Msa msa = null;
- final MsaInferrer clustalo = ClustalOmega.createInstance( _mf.getInferenceManager().getPathToLocalClustalo()
- .getCanonicalPath() );
- try {
- msa = clustalo.infer( seqs, opts );
- }
- catch ( final IOException e ) {
- System.out.println( clustalo.getErrorDescription() );
- }
- return msa;
- }
-
private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) {
if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) {
try {
final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
- + MSA_FILE_SUFFIX ) );
+ + MSA_FILE_SUFFIX ) );
_msa.write( msa_writer, MSA_FORMAT.PHYLIP );
msa_writer.close();
final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase()
- + PWD_FILE_SUFFIX ) );
+ + PWD_FILE_SUFFIX ) );
m.write( pwd_writer );
pwd_writer.close();
}
}
}
- public static void extractFastaInformation( final Phylogeny phy ) {
- for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
- if ( name_m.lookingAt() ) {
- System.out.println();
- // System.out.println( name_m.group( 1 ) );
- // System.out.println( name_m.group( 2 ) );
- // System.out.println( name_m.group( 3 ) );
- // System.out.println( name_m.group( 4 ) );
- final String acc_source = name_m.group( 1 );
- final String acc = name_m.group( 2 );
- final String seq_name = name_m.group( 3 );
- final String tax_sn = name_m.group( 4 );
- if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
- node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
- }
- if ( !ForesterUtil.isEmpty( seq_name ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
- node.getNodeData().getSequence( 0 ).setName( seq_name );
- }
- if ( !ForesterUtil.isEmpty( tax_sn ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
- node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
- }
- }
- }
- }
- }
-
public enum MSA_PRG {
- MAFFT, CLUSTAL_O;
+ MAFFT;
}
}