From 0ac19dbd836236ac1c4da82415702836b7c54843 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 22 Nov 2013 23:53:39 +0000 Subject: [PATCH] new tool --- .../src/org/forester/application/annotator.java | 132 ++++++++++++++++++++ .../org/forester/ws/seqdb/SequenceDbWsTools.java | 6 +- 2 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 forester/java/src/org/forester/application/annotator.java diff --git a/forester/java/src/org/forester/application/annotator.java b/forester/java/src/org/forester/application/annotator.java new file mode 100644 index 0000000..8ecc9a4 --- /dev/null +++ b/forester/java/src/org/forester/application/annotator.java @@ -0,0 +1,132 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester + +package org.forester.application; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.forester.analysis.AncestralTaxonomyInference; +import org.forester.analysis.AncestralTaxonomyInferenceException; +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.io.writers.PhylogenyWriter; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.util.CommandLineArguments; +import org.forester.util.ForesterUtil; +import org.forester.ws.seqdb.SequenceDbWsTools; + +public final class annotator { + + final static private String PRG_NAME = "annotator"; + final static private String PRG_VERSION = "1.00"; + final static private String PRG_DATE = "131122"; + + public static void main( final String args[] ) { + ForesterUtil.printProgramInformation( annotator.PRG_NAME, annotator.PRG_VERSION, annotator.PRG_DATE ); + System.out.println(); + if ( ( args.length != 2 ) ) { + annotator.argumentsError(); + } + CommandLineArguments cla = null; + try { + cla = new CommandLineArguments( args ); + } + catch ( final Exception e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + final File indir = cla.getFile( 0 ); + final File outdir = cla.getFile( 1 ); + if ( !indir.isDirectory() ) { + ForesterUtil.fatalError( PRG_NAME, indir + " is not a directory" ); + } + if ( !outdir.isDirectory() ) { + ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" ); + } + final File[] list_of_files = indir.listFiles(); + final List infiles = new ArrayList(); + for( final File file : list_of_files ) { + if ( file.isFile() && file.canRead() && file.toString().toLowerCase().endsWith( ".xml" ) ) { + infiles.add( file ); + } + } + int c = 0; + for( final File infile : infiles ) { + System.out.println( ++c + "/" + infiles.size() + ": " + infile ); + Phylogeny phy = null; + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] phylogenies = factory.create( infile, + PhyloXmlParser.createPhyloXmlParserXsdValidating() ); + phy = phylogenies[ 0 ]; + } + catch ( final Exception e ) { + ForesterUtil.fatalError( PRG_NAME, "failed to read phylgenies from [" + infile + "] [" + e.getMessage() + + "]" ); + } + try { + obtainSeqInformation( phy ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + try { + inferTaxonomyFromDescendents( phy ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + catch ( final AncestralTaxonomyInferenceException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + final File outfile = new File( outdir.getAbsolutePath().toString() + "/" + infile.getName() ); + try { + final PhylogenyWriter w = new PhylogenyWriter(); + w.toPhyloXML( phy, 0, outfile ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, "failed to write output [" + e.getMessage() + "]" ); + } + } + } + + private static void obtainSeqInformation( final Phylogeny phy ) throws IOException { + SequenceDbWsTools.obtainSeqInformation( phy, true, true, SequenceDbWsTools.DEFAULT_LINES_TO_RETURN ); + } + + private static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException, + AncestralTaxonomyInferenceException { + AncestralTaxonomyInference.inferTaxonomyFromDescendents( phy ); + } + + private static void argumentsError() { + System.out.println( annotator.PRG_NAME + " " ); + System.out.println(); + System.exit( -1 ); + } +} diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index d7cafd1..ed6387e 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -56,7 +56,6 @@ public final class SequenceDbWsTools { public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; public final static int DEFAULT_LINES_TO_RETURN = 4000; - //public final static String EMBL_DBS_EMBL = "embl"; public final static String EMBL_DBS_REFSEQ_N = "refseqn"; public final static String EMBL_DBS_REFSEQ_P = "refseqp"; public final static String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id="; @@ -64,6 +63,7 @@ public final class SequenceDbWsTools { public final static String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id="; private final static boolean DEBUG = true; private final static String URL_ENC = "UTF-8"; + private final static int SLEEP = 200; public static List getTaxonomiesFromCommonNameStrict( final String cn, final int max_taxonomies_return ) @@ -250,7 +250,7 @@ public final class SequenceDbWsTools { in.close(); try { // To prevent accessing online dbs in too quick succession. - Thread.sleep( 20 ); + Thread.sleep( SLEEP ); } catch ( final InterruptedException e ) { e.printStackTrace(); @@ -417,7 +417,7 @@ public final class SequenceDbWsTools { } } try { - Thread.sleep( 10 );// Sleep for 10 ms + Thread.sleep( SLEEP ); } catch ( final InterruptedException ie ) { } -- 1.7.10.2