From e86d89ccaf293b3e50d16db81d8e151c37c5fdb1 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Mon, 2 Jul 2012 07:04:03 +0000 Subject: [PATCH] inference --- .../src/org/forester/archaeopteryx/MainFrame.java | 2 +- .../archaeopteryx/tools/InferenceManager.java | 61 ++++++----- .../archaeopteryx/tools/PhylogeneticInferrer.java | 15 ++- .../java/src/org/forester/msa/ClustalOmega.java | 108 ++++++++++++++++++++ forester/java/src/org/forester/msa/Mafft.java | 26 +---- forester/java/src/org/forester/test/Test.java | 16 ++- 6 files changed, 175 insertions(+), 53 deletions(-) create mode 100644 forester/java/src/org/forester/msa/ClustalOmega.java diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index ff3f6a9..0912f9a 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -907,7 +907,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { _inference_manager = i; } - InferenceManager getInferenceManager() { + public InferenceManager getInferenceManager() { return _inference_manager; } diff --git a/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java b/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java index b648038..6886243 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java @@ -2,18 +2,18 @@ package org.forester.archaeopteryx.tools; import java.io.File; -import java.io.IOException; import org.forester.archaeopteryx.Configuration; -import org.forester.util.ForesterUtil; public final class InferenceManager { - private final File _path_to_local_mafft; - private final File _path_to_local_kalign; - private final File _path_to_local_fastme; - private final File _path_to_local_raxml; - private final File _path_to_local_clustalo; + private final static String DEFAULT_PATHS[] = { "C:\\Program Files\\", "C:\\Program Files (x86)\\", "/bin/", + "/usr/local/bin/", "/usr/bin/" }; + private final File _path_to_local_mafft; + private final File _path_to_local_kalign; + private final File _path_to_local_fastme; + private final File _path_to_local_raxml; + private final File _path_to_local_clustalo; public static InferenceManager createInstance( final Configuration c ) { return new InferenceManager( c.getpathToLocalMafft(), @@ -48,17 +48,31 @@ public final class InferenceManager { return _path_to_local_clustalo; } - private final static File createLocalPath( final File path ) { - if ( path == null ) { - return null; + private final static File createLocalPath( final File path, final String name ) { + if ( ( path != null ) && path.canExecute() && !path.isDirectory() ) { + return path; } - try { - if ( path.getCanonicalFile().canExecute() && !path.getCanonicalFile().isDirectory() ) { - return new File( path.getCanonicalFile().toString() ); - } + File p1 = new File( name ); + if ( p1.canExecute() && !p1.isDirectory() ) { + return p1; } - catch ( final IOException e ) { - return null; + for( final String path_str : DEFAULT_PATHS ) { + try { + final File p2 = new File( path_str + name ); + if ( p2.canExecute() && !p2.isDirectory() ) { + return p2; + } + final File p3 = new File( path_str + name + ".exe" ); + if ( p3.canExecute() && !p3.isDirectory() ) { + return p3; + } + final File p4 = new File( path_str + name + ".bat" ); + if ( p4.canExecute() && !p4.isDirectory() ) { + return p4; + } + } + catch ( final Exception e ) { + } } return null; } @@ -68,15 +82,10 @@ public final class InferenceManager { final File path_to_local_fastme, final File path_to_local_raxml, final File path_to_local_clustalo ) { - _path_to_local_mafft = createLocalPath( path_to_local_mafft ) != null ? createLocalPath( path_to_local_mafft ) - : createLocalPath( new File( "mafft" ) ); - _path_to_local_kalign = createLocalPath( path_to_local_kalign ) != null ? createLocalPath( path_to_local_kalign ) - : createLocalPath( new File( "kalign" ) ); - _path_to_local_fastme = createLocalPath( path_to_local_fastme ) != null ? createLocalPath( path_to_local_fastme ) - : createLocalPath( new File( "fastme" ) ); - _path_to_local_raxml = createLocalPath( path_to_local_raxml ) != null ? createLocalPath( path_to_local_raxml ) - : createLocalPath( new File( "raxml" ) ); - _path_to_local_clustalo = createLocalPath( path_to_local_clustalo ) != null ? createLocalPath( path_to_local_clustalo ) - : createLocalPath( new File( ForesterUtil.isWindowns() ? "clustalo.exe" : "clustalo" ) ); + _path_to_local_mafft = createLocalPath( path_to_local_mafft, "mafft" ); + _path_to_local_kalign = createLocalPath( path_to_local_kalign, "kalign" ); + _path_to_local_fastme = createLocalPath( path_to_local_fastme, "fastme" ); + _path_to_local_raxml = createLocalPath( path_to_local_raxml, "raxml" ); + _path_to_local_clustalo = createLocalPath( path_to_local_clustalo, "clustalo" ); } } diff --git a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java index 6cf6614..4cac659 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java @@ -45,6 +45,7 @@ import org.forester.io.parsers.FastaParser; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.msa.BasicMsa; +import org.forester.msa.ClustalOmega; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.MsaInferrer; @@ -264,7 +265,7 @@ public class PhylogeneticInferrer extends RunnableProcess { private Msa runMAFFT( final File input_seqs, final List opts ) throws IOException, InterruptedException { Msa msa = null; - final MsaInferrer mafft = Mafft.createInstance(); + final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft().getCanonicalPath()); try { msa = mafft.infer( input_seqs, opts ); } @@ -273,6 +274,18 @@ public class PhylogeneticInferrer extends RunnableProcess { } return msa; } + + private Msa runClustalOmega( final File input_seqs, final List opts ) throws IOException, InterruptedException { + Msa msa = null; + final MsaInferrer clustalo = ClustalOmega.createInstance(_mf.getInferenceManager().getPathToLocalClustalo().getCanonicalPath()); + try { + msa = clustalo.infer( input_seqs, opts ); + } + catch ( final IOException e ) { + System.out.println( clustalo.getErrorDescription() ); + } + return msa; + } private void writeToFiles( final BasicSymmetricalDistanceMatrix m ) { if ( !ForesterUtil.isEmpty( _options.getIntermediateFilesBase() ) ) { diff --git a/forester/java/src/org/forester/msa/ClustalOmega.java b/forester/java/src/org/forester/msa/ClustalOmega.java new file mode 100644 index 0000000..80e73b7 --- /dev/null +++ b/forester/java/src/org/forester/msa/ClustalOmega.java @@ -0,0 +1,108 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.msa; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.forester.io.parsers.FastaParser; +import org.forester.util.SystemCommandExecutor; + +public final class ClustalOmega implements MsaInferrer { + + private final static String DEFAULT_PARAMETERS = ""; + private String _error; + private int _exit_code; + private final String _path_to_prg; + + + + public static MsaInferrer createInstance( final String path_to_prg ) throws IOException { + return new ClustalOmega( path_to_prg ); + } + + + + private ClustalOmega( final String path_to_prg ) throws IOException { + if ( !SystemCommandExecutor.isExecuteableFile( new File( path_to_prg ) ) ) { + throw new IOException( "cannot execute MAFFT with \"" + path_to_prg + "\"" ); + } + _path_to_prg = new String( path_to_prg ); + init(); + } + + public static String getDefaultParameters() { + return DEFAULT_PARAMETERS; + } + + @Override + public Object clone() { + throw new NoSuchMethodError(); + } + + @Override + public String getErrorDescription() { + return _error; + } + + @Override + public int getExitCode() { + return _exit_code; + } + + @Override + public Msa infer( final File path_to_input_seqs, final List opts ) throws IOException, InterruptedException { + init(); + final List my_opts = new ArrayList(); + my_opts.add( _path_to_prg ); + for( int i = 0; i < opts.size(); i++ ) { + my_opts.add( opts.get( i ) ); + } + my_opts.add( path_to_input_seqs.getAbsolutePath() ); + final SystemCommandExecutor command_executor = new SystemCommandExecutor( my_opts ); + final int _exit_code = command_executor.executeCommand(); + final StringBuilder stderr = command_executor.getStandardErrorFromCommand(); + _error = stderr.toString(); + if ( _exit_code != 0 ) { + throw new IOException( "Clustal Omega program failed, exit code: " + _exit_code + "\nCommand:\n" + my_opts + + "\nError:\n" + stderr ); + } + final StringBuilder stdout = command_executor.getStandardOutputFromCommand(); + if ( ( stdout == null ) || ( stdout.length() < 2 ) ) { + throw new IOException( "Clustal Omega program did not produce any output\nCommand:\n" + my_opts + "\nError:\n" + + stderr ); + } + final Msa msa = FastaParser.parseMsa( stdout.toString() ); + return msa; + } + + private void init() { + _error = null; + _exit_code = -100; + } +} diff --git a/forester/java/src/org/forester/msa/Mafft.java b/forester/java/src/org/forester/msa/Mafft.java index 6fc1675..7e4e4c0 100644 --- a/forester/java/src/org/forester/msa/Mafft.java +++ b/forester/java/src/org/forester/msa/Mafft.java @@ -41,35 +41,13 @@ public final class Mafft implements MsaInferrer { private int _exit_code; private final String _path_to_prg; - public static MsaInferrer createInstance() throws IOException { - return createInstance( getPathToCmd() ); - } + public static MsaInferrer createInstance( final String path_to_prg ) throws IOException { return new Mafft( path_to_prg ); } - private static String getPathToCmd() { - //TODO this needs to come from config file!! - //FIXME .. - //should not be in this class! - String path = ""; - final String os = ForesterUtil.OS_NAME.toLowerCase(); - if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) { - path = "/usr/local/bin/mafft"; - } - else if ( os.indexOf( "win" ) >= 0 ) { - path = "C:\\Program Files\\mafft-win\\mafft.bat"; - } - else { - path = "/home/czmasek/bin/mafft"; - } - return path; - } - - public static boolean isInstalled() { - return SystemCommandExecutor.isExecuteableFile( new File( getPathToCmd() ) ); - } + private Mafft( final String path_to_prg ) throws IOException { if ( !SystemCommandExecutor.isExecuteableFile( new File( path_to_prg ) ) ) { diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index db5e28d..55030a2 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -7951,8 +7951,22 @@ public final class Test { opts.add( "1000" ); opts.add( "--localpair" ); opts.add( "--quiet" ); + + + String path = ""; + final String os = ForesterUtil.OS_NAME.toLowerCase(); + if ( ( os.indexOf( "mac" ) >= 0 ) && ( os.indexOf( "os" ) > 0 ) ) { + path = "/usr/local/bin/mafft"; + } + else if ( os.indexOf( "win" ) >= 0 ) { + path = "C:\\Program Files\\mafft-win\\mafft.bat"; + } + else { + path = "/home/czmasek/bin/mafft"; + } + Msa msa = null; - final MsaInferrer mafft = Mafft.createInstance(); + final MsaInferrer mafft = Mafft.createInstance( path ); msa = mafft.infer( new File( PATH_TO_TEST_DATA + "ncbi_sn.fasta" ), opts ); if ( ( msa == null ) || ( msa.getLength() < 20 ) || ( msa.getNumberOfSequences() != 19 ) ) { return false; -- 1.7.10.2