X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Farchaeopteryx%2Ftools%2FPhylogeneticInferrer.java;h=cc627519c3625f4c51ce32aa4940589874eb4ea5;hb=3087ea10ac51cf1a8b2eb8d8abe010513ce9998f;hp=4cac65922df0aeca064205514ac11559d3e88fe8;hpb=e86d89ccaf293b3e50d16db81d8e151c37c5fdb1;p=jalview.git diff --git a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java index 4cac659..cc62751 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java @@ -21,41 +21,34 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.archaeopteryx.tools; import java.io.BufferedWriter; -import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.regex.Matcher; import javax.swing.JOptionPane; -import org.forester.archaeopteryx.AptxUtil; import org.forester.archaeopteryx.MainFrameApplication; -import org.forester.evoinference.distance.NeighborJoining; +import org.forester.evoinference.distance.NeighborJoiningF; import org.forester.evoinference.distance.PairwiseDistanceCalculator; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; +import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.evoinference.tools.BootstrapResampler; -import org.forester.io.parsers.FastaParser; -import org.forester.io.writers.SequenceWriter; -import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.msa.BasicMsa; -import org.forester.msa.ClustalOmega; import org.forester.msa.Mafft; import org.forester.msa.Msa; +import org.forester.msa.Msa.MSA_FORMAT; import org.forester.msa.MsaInferrer; import org.forester.msa.MsaMethods; import org.forester.msa.ResampleableMsa; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Accession; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.sequence.Sequence; +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.sequence.MolecularSequence; import org.forester.tools.ConfidenceAssessor; import org.forester.util.ForesterUtil; @@ -64,12 +57,12 @@ public class PhylogeneticInferrer extends RunnableProcess { private Msa _msa; private final MainFrameApplication _mf; private final PhylogeneticInferenceOptions _options; - private final List _seqs; + private final List _seqs; private final boolean DEBUG = true; public final static String MSA_FILE_SUFFIX = ".aln"; public final static String PWD_FILE_SUFFIX = ".pwd"; - public PhylogeneticInferrer( final List seqs, + public PhylogeneticInferrer( final List seqs, final PhylogeneticInferenceOptions options, final MainFrameApplication mf ) { _msa = null; @@ -87,19 +80,23 @@ public class PhylogeneticInferrer extends RunnableProcess { _options = options; } - private Msa inferMsa() throws IOException, InterruptedException { - final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" ); - if ( DEBUG ) { - System.out.println(); - System.out.println( "temp file: " + temp_seqs_file ); - System.out.println(); + private Msa inferMsa( final MSA_PRG msa_prg ) throws IOException, InterruptedException { + // final File temp_seqs_file = File.createTempFile( "__msa__temp__", ".fasta" ); + // if ( DEBUG ) { + // System.out.println(); + // System.out.println( "temp file: " + temp_seqs_file ); + // System.out.println(); + // } + // //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" ); + // final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) ); + // SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 ); + // writer.close(); + switch ( msa_prg ) { + case MAFFT: + return runMAFFT( _seqs, processMafftOptions() ); + default: + return null; } - //final File temp_seqs_file = new File( _options.getTempDir() + ForesterUtil.FILE_SEPARATOR + "s.fasta" ); - final BufferedWriter writer = new BufferedWriter( new FileWriter( temp_seqs_file ) ); - SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 ); - writer.close(); - final List opts = processMafftOptions(); - return runMAFFT( temp_seqs_file, opts ); } private List processMafftOptions() { @@ -120,7 +117,7 @@ public class PhylogeneticInferrer extends RunnableProcess { } private Phylogeny inferPhylogeny( final Msa msa ) { - BasicSymmetricalDistanceMatrix m = null; + DistanceMatrix m = null; switch ( _options.getPwdDistanceMethod() ) { case KIMURA_DISTANCE: m = PairwiseDistanceCalculator.calcKimuraDistances( msa ); @@ -146,9 +143,10 @@ public class PhylogeneticInferrer extends RunnableProcess { e.printStackTrace(); } } - final NeighborJoining nj = NeighborJoining.createInstance(); + final NeighborJoiningF nj = NeighborJoiningF.createInstance( false, 5 ); final Phylogeny phy = nj.execute( m ); - PhylogeneticInferrer.extractFastaInformation( phy ); + PhylogenyMethods.addMolecularSeqsToTree( phy, msa ); + PhylogenyMethods.extractFastaInformation( phy ); return phy; } @@ -161,7 +159,7 @@ public class PhylogeneticInferrer extends RunnableProcess { if ( _msa == null ) { Msa msa = null; try { - msa = inferMsa(); + msa = inferMsa( MSA_PRG.MAFFT ); } catch ( final IOException e ) { end( _mf ); @@ -170,8 +168,8 @@ public class PhylogeneticInferrer extends RunnableProcess { + _options.getMsaPrg() + "\" and the following parameters:\n\"" + _options.getMsaPrgParameters() + "\"\nError: " + e.getLocalizedMessage(), - "Failed to Calculate MSA", - JOptionPane.ERROR_MESSAGE ); + "Failed to Calculate MSA", + JOptionPane.ERROR_MESSAGE ); if ( DEBUG ) { e.printStackTrace(); } @@ -184,8 +182,8 @@ public class PhylogeneticInferrer extends RunnableProcess { + _options.getMsaPrg() + "\" and the following parameters:\n\"" + _options.getMsaPrgParameters() + "\"\nError: " + e.getLocalizedMessage(), - "Unexpected Exception During MSA Calculation", - JOptionPane.ERROR_MESSAGE ); + "Unexpected Exception During MSA Calculation", + JOptionPane.ERROR_MESSAGE ); if ( DEBUG ) { e.printStackTrace(); } @@ -197,17 +195,17 @@ public class PhylogeneticInferrer extends RunnableProcess { "Could not create multiple sequence alignment with " + _options.getMsaPrg() + "\nand the following parameters:\n\"" + _options.getMsaPrgParameters() + "\"", - "Failed to Calculate MSA", - JOptionPane.ERROR_MESSAGE ); + "Failed to Calculate MSA", + JOptionPane.ERROR_MESSAGE ); return; } if ( DEBUG ) { System.out.println( msa.toString() ); - System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() ); + System.out.println( MsaMethods.calcGapRatio( msa ) ); } final MsaMethods msa_tools = MsaMethods.createInstance(); if ( _options.isExecuteMsaProcessing() ) { - msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(), + msa = msa_tools.deleteGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(), _options.getMsaProcessingMinAllowedLength(), msa ); if ( msa == null ) { @@ -216,15 +214,15 @@ public class PhylogeneticInferrer extends RunnableProcess { "Less than two sequences longer than " + _options.getMsaProcessingMinAllowedLength() + " residues left after MSA processing", - "MSA Processing Settings Too Stringent", - JOptionPane.ERROR_MESSAGE ); + "MSA Processing Settings Too Stringent", + JOptionPane.ERROR_MESSAGE ); return; } } if ( DEBUG ) { System.out.println( msa_tools.getIgnoredSequenceIds() ); System.out.println( msa.toString() ); - System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() ); + System.out.println( MsaMethods.calcGapRatio( msa ) ); } _msa = msa; } @@ -234,7 +232,7 @@ public class PhylogeneticInferrer extends RunnableProcess { if ( _options.isPerformBootstrapResampling() && ( n > 0 ) ) { final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa ); final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa - .getLength(), n, seed ); + .getLength(), n, seed ); final Phylogeny[] eval_phys = new Phylogeny[ n ]; for( int i = 0; i < n; ++i ) { resampleable_msa.resample( resampled_column_positions[ i ] ); @@ -263,39 +261,29 @@ public class PhylogeneticInferrer extends RunnableProcess { } } - private Msa runMAFFT( final File input_seqs, final List opts ) throws IOException, InterruptedException { + private Msa runMAFFT( final List seqs, final List opts ) throws IOException, + InterruptedException { Msa msa = null; - final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft().getCanonicalPath()); + final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft() + .getCanonicalPath() ); try { - msa = mafft.infer( input_seqs, opts ); + msa = mafft.infer( seqs, opts ); } catch ( final IOException e ) { System.out.println( mafft.getErrorDescription() ); } return msa; } - - private Msa runClustalOmega( final File input_seqs, final List opts ) throws IOException, InterruptedException { - Msa msa = null; - final MsaInferrer clustalo = ClustalOmega.createInstance(_mf.getInferenceManager().getPathToLocalClustalo().getCanonicalPath()); - try { - msa = clustalo.infer( input_seqs, opts ); - } - catch ( final IOException e ) { - System.out.println( clustalo.getErrorDescription() ); - } - return msa; - } - private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) { + private void writeToFiles( final DistanceMatrix m ) { if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) { try { final BufferedWriter msa_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() - + MSA_FILE_SUFFIX ) ); - _msa.write( msa_writer ); + + MSA_FILE_SUFFIX ) ); + _msa.write( msa_writer, MSA_FORMAT.PHYLIP ); msa_writer.close(); final BufferedWriter pwd_writer = new BufferedWriter( new FileWriter( _options.getIntermediateFilesBase() - + PWD_FILE_SUFFIX ) ); + + PWD_FILE_SUFFIX ) ); m.write( pwd_writer ); pwd_writer.close(); } @@ -305,35 +293,7 @@ public class PhylogeneticInferrer extends RunnableProcess { } } - public static void extractFastaInformation( final Phylogeny phy ) { - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() ); - if ( name_m.lookingAt() ) { - System.out.println(); - // System.out.println( name_m.group( 1 ) ); - // System.out.println( name_m.group( 2 ) ); - // System.out.println( name_m.group( 3 ) ); - // System.out.println( name_m.group( 4 ) ); - final String acc_source = name_m.group( 1 ); - final String acc = name_m.group( 2 ); - final String seq_name = name_m.group( 3 ); - final String tax_sn = name_m.group( 4 ); - if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - AptxUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); - } - if ( !ForesterUtil.isEmpty( seq_name ) ) { - AptxUtil.ensurePresenceOfSequence( node ); - node.getNodeData().getSequence( 0 ).setName( seq_name ); - } - if ( !ForesterUtil.isEmpty( tax_sn ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); - node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); - } - } - } - } + public enum MSA_PRG { + MAFFT; } }