From 299c529fea070487998aee60ed7810af57f3a071 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 3 Oct 2013 23:57:29 +0000 Subject: [PATCH] inprogress --- .../org/forester/archaeopteryx/tools/Blast.java | 13 +- .../archaeopteryx/tools/SequenceDataRetriver.java | 5 +- .../io/parsers/phyloxml/PhyloXmlMapping.java | 5 - .../src/org/forester/phylogeny/PhylogenyNode.java | 342 +++++++++----------- .../src/org/forester/phylogeny/data/NodeData.java | 36 +-- .../org/forester/ws/seqdb/SequenceDbWsTools.java | 281 ++++++++-------- 6 files changed, 312 insertions(+), 370 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java index 72c23c1..cddfba3 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/Blast.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/Blast.java @@ -81,27 +81,30 @@ public final class Blast { } if ( ForesterUtil.isEmpty( query ) && ( node.getNodeData().getSequence().getAccession() != null ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) { - final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getAccession() - .getValue() ); + final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() + .getAccession().getValue() ); if ( id != null ) { query = id.getValue(); } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { - final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getName() ); + final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() + .getName() ); if ( id != null ) { query = id.getValue(); } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { - final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getSymbol() ); + final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() + .getSymbol() ); if ( id != null ) { query = id.getValue(); } } if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { - final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getGeneName() ); + final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence() + .getGeneName() ); if ( id != null ) { query = id.getValue(); } diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index 9f3bcb1..a805a43 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -91,11 +91,10 @@ public final class SequenceDataRetriver extends RunnableProcess { } final StringBuffer sb = new StringBuffer(); if ( not_found.size() == 1 ) { - sb.append( "Data for the following sequence identifier was not found:\n" ); + sb.append( "For the following node no data was found:\n" ); } else { - sb.append( "Data for the following sequence identifiers was not found (total: " + not_found.size() - + "):\n" ); + sb.append( "For the following nodes no data was found: (total: " + not_found.size() + "):\n" ); } int i = 0; for( final String string : not_found ) { diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java index 643a401..ab19a1c 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java @@ -27,10 +27,6 @@ package org.forester.io.parsers.phyloxml; -/* - * @author Christian Zmasek TODO To change the template for this generated type - * comment go to Window - Preferences - Java - Code Style - Code Templates - */ public final class PhyloXmlMapping { public static final String ACCESSION = "accession"; @@ -74,7 +70,6 @@ public final class PhyloXmlMapping { public final static String EVENT_TYPE = "type"; public final static String EVENTS = "events"; public static final String ID_REF = "id_ref"; - // public final static String NODE_IDENTIFIER = "node_id"; public final static String IDENTIFIER = "id"; public final static String IDENTIFIER_PROVIDER_ATTR = "provider"; public static final String NODE_COLLAPSE = "collapse"; diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index 01416eb..2b7c2cf 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -49,23 +49,20 @@ import org.forester.util.ForesterUtil; */ public final class PhylogenyNode implements Comparable { - public enum NH_CONVERSION_SUPPORT_VALUE_STYLE { - NONE, IN_SQUARE_BRACKETS, AS_INTERNAL_NODE_NAMES; - } private static long NODE_COUNT = 0; - private byte _indicator; - private long _id; - private int _sum_ext_nodes; - private float _x; - private float _y; - private double _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; + private BranchData _branch_data; private boolean _collapse; - private PhylogenyNode _parent; - private PhylogenyNode _link; private ArrayList _descendants; + private double _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; + private long _id; + private byte _indicator; + private PhylogenyNode _link; private NodeData _node_data; - private BranchData _branch_data; + private PhylogenyNode _parent; + private int _sum_ext_nodes; + private float _x; private float _x_secondary; + private float _y; private float _y_secondary; /** @@ -77,13 +74,13 @@ public final class PhylogenyNode implements Comparable { setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!) } - public void removeConnections() { - _parent = null; - _link = null; - _descendants = null; - } - - public boolean isEmpty() { + private PhylogenyNode( final String nhx, + final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction, + final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { + NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores ); + setId( PhylogenyNode.getNodeCount() ); + PhylogenyNode.increaseNodeCount(); + setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!). } /** @@ -99,16 +96,26 @@ public final class PhylogenyNode implements Comparable { n.setParent( this ); } - /** - * Adds PhylogenyNode n to the list of child nodes. But does NOT set the - * _parent of n to this. - * - * @see addAsChild( PhylogenyNode n ) - * @param n - * the PhylogenyNode to add - */ - final private void addChildNode( final PhylogenyNode child ) { - getDescendants().add( child ); + public final int calculateDepth() { + PhylogenyNode n = this; + int steps = 0; + while ( n._parent != null ) { + steps++; + n = n._parent; + } + return steps; + } + + public final double calculateDistanceToRoot() { + PhylogenyNode n = this; + double d = 0.0; + while ( n._parent != null ) { + if ( n._distance_parent > 0.0 ) { + d += n._distance_parent; + } + n = n._parent; + } + return d; } @Override @@ -121,9 +128,6 @@ public final class PhylogenyNode implements Comparable { return getName().compareTo( n.getName() ); } - // --------------------------------------------------------- - // Copy and delete Nodes, copy subtress - // --------------------------------------------------------- /** * Returns a new PhylogenyNode which has its data copied from this * PhylogenyNode. Links to the other Nodes in the same Phylogeny are NOT @@ -219,9 +223,10 @@ public final class PhylogenyNode implements Comparable { } } - // --------------------------------------------------------- - // Obtaining of Nodes - // --------------------------------------------------------- + final public List getAllDescendants() { + return _descendants; + } + /** * Returns a List containing references to all external children of this * PhylogenyNode. @@ -272,10 +277,6 @@ public final class PhylogenyNode implements Comparable { return _branch_data; } - final BranchData getBranchDataDirectly() { - return _branch_data; - } - /** * This return child node n of this node. * @@ -375,6 +376,13 @@ public final class PhylogenyNode implements Comparable { } /** + * Returns the ID (int) of this PhylogenyNode. + */ + final public long getId() { + return _id; + } + + /** * Returns the _indicator value of this PhylogenyNode. */ public final byte getIndicator() { @@ -401,6 +409,10 @@ public final class PhylogenyNode implements Comparable { return _link; } + final public String getName() { + return getNodeData().getNodeName(); + } + /** * Returns a refernce to the next external PhylogenyNode of this * PhylogenyNode. TODO should be in Phylogeny. Returns null if no next @@ -479,28 +491,6 @@ public final class PhylogenyNode implements Comparable { return _node_data; } - final NodeData getNodeDataDirectly() { - return _node_data; - } - - // --------------------------------------------------------- - // Set and get methods for Nodes - // --------------------------------------------------------- - /** - * Returns the ID (int) of this PhylogenyNode. - */ - final public long getId() { - return _id; - } - - final public String getName() { - return getNodeData().getNodeName(); - } - - final public List getAllDescendants() { - return _descendants; - } - final public int getNumberOfDescendants() { if ( _descendants == null ) { return 0; @@ -596,30 +586,6 @@ public final class PhylogenyNode implements Comparable { return result; } - // final private void init() { - //_descendants = new ArrayList(); - // _parent = null; //TODO not needed? - // _id = 0; //TODO not needed? - //initializeData(); //TODO not needed? - //} - /** - * Deletes data of this PhylogenyNode. Links to the other Nodes in the - * Phylogeny, the ID and the sum of external nodes are NOT deleted. Field - * "_link" (_link to Nodes in other Phylogeny) IS deleted. - * - * @see #getLink() (Last modified: 12/20/03) - */ - // final private void initializeData() { - // _indicator = 0; - // _x = 0; - // _y = 0; - // //_node_name = ""; - // _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT; - // _collapse = false; - // _link = null; - // _branch_data = null; - // _node_data = null; - // } /** * Returns whether this PhylogenyNode should be drawn as collapsed. */ @@ -635,6 +601,10 @@ public final class PhylogenyNode implements Comparable { return getNodeData().isHasEvent() && getNodeData().getEvent().isDuplication(); } + public boolean isEmpty() { + return ( ( _node_data == null ) || _node_data.isEmpty() ); + } + /** * Checks whether this PhylogenyNode is external (tip). * @@ -647,11 +617,6 @@ public final class PhylogenyNode implements Comparable { return ( getNumberOfDescendants() < 1 ); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ final public boolean isFirstChildNode() { if ( isRoot() /* and tree is rooted TODO */) { throw new UnsupportedOperationException( "Cannot determine whether the root is the first child node of its _parent." ); @@ -659,11 +624,6 @@ public final class PhylogenyNode implements Comparable { return ( getChildNodeIndex() == 0 ); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ final public boolean isFirstExternalNode() { if ( isInternal() ) { return false; @@ -716,11 +676,6 @@ public final class PhylogenyNode implements Comparable { return ( getChildNodeIndex() == ( getParent().getNumberOfDescendants() - 1 ) ); } - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ final public boolean isLastExternalNode() { if ( isInternal() ) { return false; @@ -735,28 +690,6 @@ public final class PhylogenyNode implements Comparable { return true; } - public final int calculateDepth() { - PhylogenyNode n = this; - int steps = 0; - while ( n._parent != null ) { - steps++; - n = n._parent; - } - return steps; - } - - public final double calculateDistanceToRoot() { - PhylogenyNode n = this; - double d = 0.0; - while ( n._parent != null ) { - if ( n._distance_parent > 0.0 ) { - d += n._distance_parent; - } - n = n._parent; - } - return d; - } - /** * Checks whether this PhylogenyNode is a root. * @@ -801,6 +734,12 @@ public final class PhylogenyNode implements Comparable { removeChildNode( remove_me.getChildNodeIndex() ); } + public void removeConnections() { + _parent = null; + _link = null; + _descendants = null; + } + final public void setBranchData( final BranchData branch_data ) { _branch_data = branch_data; } @@ -840,15 +779,6 @@ public final class PhylogenyNode implements Comparable { } } - final void setChildNodeOnly( final int i, final PhylogenyNode node ) { - if ( getNumberOfDescendants() <= i ) { - addChildNode( node ); - } - else { - getDescendants().set( i, node ); - } - } - /** * Sets whether this PhylogenyNode should be drawn as collapsed. */ @@ -871,19 +801,6 @@ public final class PhylogenyNode implements Comparable { _indicator = i; } - // -------------------------------------------------------------------- - // Adjust methods (related to Phylogeny construction and - // Phylogeny modification) - // -------------------------------------------------------------------- - /** - * Sets the indicators of all the children of this PhylogenyNode to zero. - */ - final void setIndicatorsToZero() { - for( final PreorderTreeIterator it = new PreorderTreeIterator( this ); it.hasNext(); ) { - it.next().setIndicator( ( byte ) 0 ); - } - } - /** * Sets the linked PhylogenyNode of this PhylogenyNode to n. Currently, this * method is only used for the speciation-_duplication assignment @@ -901,18 +818,6 @@ public final class PhylogenyNode implements Comparable { } /** - * Sets the Id of this PhylogenyNode to i. In most cases, this number - * should not be set to values lower than getNodeCount() -- which this method - * does not allow. - */ - synchronized final protected void setId( final long i ) { - if ( i < getNodeCount() ) { - throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" ); - } - _id = i; - } - - /** * Sets the _parent PhylogenyNode of this PhylogenyNode to n. */ final public void setParent( final PhylogenyNode n ) { @@ -953,6 +858,23 @@ public final class PhylogenyNode implements Comparable { _y_secondary = y_secondary; } + /** + * Swaps the the two childern of a PhylogenyNode node of this Phylogeny. + */ + public final void swapChildren() throws RuntimeException { + if ( isExternal() ) { + throw new RuntimeException( "attempt to swap descendants of external node" ); + } + if ( getNumberOfDescendants() != 2 ) { + throw new RuntimeException( "attempt to swap descendants of node with " + getNumberOfDescendants() + + " descendants" ); + } + final PhylogenyNode a = getChildNode( 0 ); + final PhylogenyNode b = getChildNode( 1 ); + setChildNode( 0, b ); + setChildNode( 1, a ); + } + // --------------------------------------------------------- // Writing of Nodes to Strings // --------------------------------------------------------- @@ -1022,23 +944,6 @@ public final class PhylogenyNode implements Comparable { } /** - * Swaps the the two childern of a PhylogenyNode node of this Phylogeny. - */ - public final void swapChildren() throws RuntimeException { - if ( isExternal() ) { - throw new RuntimeException( "attempt to swap descendants of external node" ); - } - if ( getNumberOfDescendants() != 2 ) { - throw new RuntimeException( "attempt to swap descendants of node with " + getNumberOfDescendants() - + " descendants" ); - } - final PhylogenyNode a = getChildNode( 0 ); - final PhylogenyNode b = getChildNode( 1 ); - setChildNode( 0, b ); - setChildNode( 1, a ); - } - - /** * Converts this PhylogenyNode to a New Hampshire X (NHX) String * representation. */ @@ -1122,33 +1027,53 @@ public final class PhylogenyNode implements Comparable { } /** - * Decreases the total number of all Nodes created so far by one. + * Sets the Id of this PhylogenyNode to i. In most cases, this number + * should not be set to values lower than getNodeCount() -- which this method + * does not allow. */ - final static synchronized void decreaseNodeCount() { - --NODE_COUNT; + synchronized final protected void setId( final long i ) { + if ( i < getNodeCount() ) { + throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" ); + } + _id = i; } - /** - * Returns the total number of all Nodes created so far. - * - * @return total number of Nodes (long) - */ - synchronized final public static long getNodeCount() { - return NODE_COUNT; + final BranchData getBranchDataDirectly() { + return _branch_data; + } + + final NodeData getNodeDataDirectly() { + return _node_data; + } + + final void setChildNodeOnly( final int i, final PhylogenyNode node ) { + if ( getNumberOfDescendants() <= i ) { + addChildNode( node ); + } + else { + getDescendants().set( i, node ); + } } /** - * Increases the total number of all Nodes created so far by one. + * Sets the indicators of all the children of this PhylogenyNode to zero. */ - synchronized final private static void increaseNodeCount() { - ++NODE_COUNT; + final void setIndicatorsToZero() { + for( final PreorderTreeIterator it = new PreorderTreeIterator( this ); it.hasNext(); ) { + it.next().setIndicator( ( byte ) 0 ); + } } /** - * Sets the total number of all Nodes created so far to i. + * Adds PhylogenyNode n to the list of child nodes. But does NOT set the + * _parent of n to this. + * + * @see addAsChild( PhylogenyNode n ) + * @param n + * the PhylogenyNode to add */ - synchronized final static void setNodeCount( final long i ) { - PhylogenyNode.NODE_COUNT = i; + final private void addChildNode( final PhylogenyNode child ) { + getDescendants().add( child ); } public static PhylogenyNode createInstanceFromNhxString( final String nhx ) throws NHXFormatException, @@ -1169,12 +1094,37 @@ public final class PhylogenyNode implements Comparable { return new PhylogenyNode( nhx, taxonomy_extraction, replace_underscores ); } - private PhylogenyNode( final String nhx, - final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction, - final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException { - NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores ); - setId( PhylogenyNode.getNodeCount() ); - PhylogenyNode.increaseNodeCount(); - setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!). + /** + * Returns the total number of all Nodes created so far. + * + * @return total number of Nodes (long) + */ + synchronized final public static long getNodeCount() { + return NODE_COUNT; + } + + /** + * Decreases the total number of all Nodes created so far by one. + */ + final static synchronized void decreaseNodeCount() { + --NODE_COUNT; + } + + /** + * Sets the total number of all Nodes created so far to i. + */ + synchronized final static void setNodeCount( final long i ) { + PhylogenyNode.NODE_COUNT = i; + } + + /** + * Increases the total number of all Nodes created so far by one. + */ + synchronized final private static void increaseNodeCount() { + ++NODE_COUNT; + } + + public enum NH_CONVERSION_SUPPORT_VALUE_STYLE { + AS_INTERNAL_NODE_NAMES, IN_SQUARE_BRACKETS, NONE; } } diff --git a/forester/java/src/org/forester/phylogeny/data/NodeData.java b/forester/java/src/org/forester/phylogeny/data/NodeData.java index da6d557..c5c5d40 100644 --- a/forester/java/src/org/forester/phylogeny/data/NodeData.java +++ b/forester/java/src/org/forester/phylogeny/data/NodeData.java @@ -55,7 +55,6 @@ public class NodeData implements PhylogenyData { private String _node_name; private Event _event; private List _sequences; - private Identifier _node_identifier; private List _taxonomies; private List _distributions; private Date _date; @@ -126,9 +125,6 @@ public class NodeData implements PhylogenyData { if ( isHasEvent() ) { new_data.setEvent( ( Event ) getEvent().copy() ); } - if ( isHasNodeIdentifier() ) { - new_data.setNodeIdentifier( ( Identifier ) getNodeIdentifier().copy() ); - } if ( ( getTaxonomies() != null ) && ( getTaxonomies().size() > 0 ) ) { new_data.setTaxonomies( new ArrayList() ); for( final Taxonomy t : getTaxonomies() ) { @@ -202,10 +198,6 @@ public class NodeData implements PhylogenyData { return _event; } - public Identifier getNodeIdentifier() { - return _node_identifier; - } - public PropertiesMap getProperties() { return _properties; } @@ -265,13 +257,19 @@ public class NodeData implements PhylogenyData { @Override public boolean isEqual( final PhylogenyData data ) { - throw new UnsupportedOperationException(); + throw new NoSuchMethodError(); } public boolean isHasBinaryCharacters() { return getBinaryCharacters() != null; } + public boolean isEmpty() { + return ( ForesterUtil.isEmpty( _node_name ) && !isHasSequence() && !isHasTaxonomy() && !isHasBinaryCharacters() + && !isHasDate() && !isHasDistribution() && !isHasEvent() && !isHasProperties() && !isHasReference() && ( ( _vector == null ) || _vector + .isEmpty() ) ); + } + public boolean isHasDate() { return ( getDate() != null ) && ( !ForesterUtil.isEmpty( getDate().getDesc() ) || !ForesterUtil.isNull( getDate().getMax() ) @@ -290,10 +288,6 @@ public class NodeData implements PhylogenyData { return getEvent() != null; } - public boolean isHasNodeIdentifier() { - return getNodeIdentifier() != null; - } - public boolean isHasProperties() { return ( getProperties() != null ) && ( getProperties().size() > 0 ); } @@ -351,10 +345,6 @@ public class NodeData implements PhylogenyData { _event = event; } - public void setNodeIdentifier( final Identifier node_identifier ) { - _node_identifier = node_identifier; - } - public void setProperties( final PropertiesMap custom_data ) { _properties = custom_data; } @@ -457,18 +447,6 @@ public class NodeData implements PhylogenyData { @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { - if ( isHasNodeIdentifier() ) { - writer.write( ForesterUtil.LINE_SEPARATOR ); - writer.write( indentation ); - // if ( !org.forester.util.ForesterUtil.isEmpty( getNodeIdentifier().getProvider() ) ) { - // PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.NODE_IDENTIFIER, getNodeIdentifier() - // .getValue(), PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR, getNodeIdentifier().getProvider() ); - // } - // else { - // PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.NODE_IDENTIFIER, getNodeIdentifier() - // .getValue() ); - // } - } if ( isHasTaxonomy() ) { for( final Taxonomy t : getTaxonomies() ) { if ( !t.isEmpty() ) { diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index 17c56d1..05efea9 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -53,22 +53,13 @@ import org.forester.util.SequenceAccessionTools; public final class SequenceDbWsTools { - public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/"; + public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/"; public final static String EMBL_DBS_EMBL = "embl"; - public final static String EMBL_DBS_REFSEQ_P = "refseqp"; public final static String EMBL_DBS_REFSEQ_N = "refseqn"; - private final static String URL_ENC = "UTF-8"; + public final static String EMBL_DBS_REFSEQ_P = "refseqp"; private final static boolean DEBUG = true; - - private static List getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return ) - throws IOException { - final List result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return ); - if ( result.size() > 0 ) { - return parseUniProtTaxonomy( result ); - } - return null; - } + private final static String URL_ENC = "UTF-8"; public static List getTaxonomiesFromCommonNameStrict( final String cn, final int max_taxonomies_return ) @@ -95,16 +86,6 @@ public final class SequenceDbWsTools { return null; } - private static List getTaxonomiesFromScientificName( final String sn, - final int max_taxonomies_return ) - throws IOException { - final List result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return ); - if ( result.size() > 0 ) { - return parseUniProtTaxonomy( result ); - } - return null; - } - /** * Does not return "sub-types". * For example, for "Mus musculus" only returns "Mus musculus" @@ -144,125 +125,44 @@ public final class SequenceDbWsTools { return EbiDbEntry.createInstanceFromPlainText( lines ); } + public final static Accession obtainFromSeqAccession( final PhylogenyNode node ) { + Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node ); + if ( !isAccessionAcceptable( acc ) ) { + acc = SequenceAccessionTools.obtainAccessorFromDataFields( node ); + } + return acc; + } + public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession id, final int max_lines_to_return ) throws IOException { final List lines = queryEmblDb( id, max_lines_to_return ); return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines ); } - public static SortedSet obtainSeqInformation( final Phylogeny phy, - final boolean ext_nodes_only, - final boolean allow_to_set_taxonomic_data, - final int lines_to_return ) throws IOException { + public final static void obtainSeqInformation( final boolean allow_to_set_taxonomic_data, + final int lines_to_return, + final SortedSet not_found, + final PhylogenyNode node ) throws IOException { + final Accession acc = obtainFromSeqAccession( node ); + if ( !isAccessionAcceptable( acc ) ) { + if ( node.isExternal() || !node.isEmpty() ) { + not_found.add( node.toString() ); + } + } + else { + addDataFromDbToNode( allow_to_set_taxonomic_data, lines_to_return, not_found, node, acc ); + } + } + + public final static SortedSet obtainSeqInformation( final Phylogeny phy, + final boolean ext_nodes_only, + final boolean allow_to_set_taxonomic_data, + final int lines_to_return ) throws IOException { final SortedSet not_found = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); - if ( ext_nodes_only && node.isInternal() ) { - continue; - } - Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node ); - if ( ( acc == null ) - || ForesterUtil.isEmpty( acc.getSource() ) - || ForesterUtil.isEmpty( acc.getValue() ) - || ( ( acc.getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc - .getSource() != Accession.REFSEQ ) ) ) { - acc = SequenceAccessionTools.obtainAccessorFromDataFields( node ); - } - if ( ( acc == null ) - || ForesterUtil.isEmpty( acc.getSource() ) - || ForesterUtil.isEmpty( acc.getValue() ) - || ( ( acc.getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc - .getSource() != Accession.REFSEQ ) ) ) { - not_found.add( node.toString() ); - } - else { - SequenceDatabaseEntry db_entry = null; - final String query = acc.getValue(); - if ( acc.getSource() == Accession.UNIPROT ) { - if ( DEBUG ) { - System.out.println( "uniprot: " + query ); - } - try { - db_entry = obtainUniProtEntry( query, lines_to_return ); - } - catch ( FileNotFoundException e ) { - // Eat this, and move to next. - } - } - else if ( acc.getSource() == Accession.EMBL ) { - if ( DEBUG ) { - System.out.println( "embl: " + query ); - } - try { - db_entry = obtainEmblEntry( new Accession( query ), lines_to_return ); - } - catch ( FileNotFoundException e ) { - // Eat this, and move to next. - } - } - else if ( acc.getSource() == Accession.REFSEQ ) { - if ( DEBUG ) { - System.out.println( "refseq: " + query ); - } - try { - db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return ); - } - catch ( FileNotFoundException e ) { - // Eat this, and move to next. - } - } - if ( ( db_entry != null ) && !db_entry.isEmpty() ) { - final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() - : new Sequence(); - if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) { - seq.setAccession( new Accession( db_entry.getAccession(), acc.getSource() ) ); - } - if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) { - seq.setName( db_entry.getSequenceName() ); - } - if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { - seq.setGeneName( db_entry.getGeneName() ); - } - if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) { - try { - seq.setSymbol( db_entry.getSequenceSymbol() ); - } - catch ( final PhyloXmlDataFormatException e ) { - // Eat this exception. - } - } - if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) { - for( final GoTerm go : db_entry.getGoTerms() ) { - final Annotation ann = new Annotation( go.getGoId().getId() ); - ann.setDesc( go.getName() ); - seq.addAnnotation( ann ); - } - } - if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) { - for( final Accession x : db_entry.getCrossReferences() ) { - seq.addCrossReference( x ); - } - } - final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() - : new Taxonomy(); - if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) { - tax.setScientificName( db_entry.getTaxonomyScientificName() ); - } - if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) { - tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) ); - } - node.getNodeData().setTaxonomy( tax ); - node.getNodeData().setSequence( seq ); - } - else { - node.i - not_found.add( node.getName() ); - } - try { - Thread.sleep( 10 );// Sleep for 10 ms - } - catch ( final InterruptedException ie ) { - } + if ( node.isExternal() || !ext_nodes_only ) { + obtainSeqInformation( allow_to_set_taxonomic_data, lines_to_return, not_found, node ); } } return not_found; @@ -334,10 +234,122 @@ public final class SequenceDbWsTools { return queryDb( query, max_lines_to_return, BASE_UNIPROT_URL ); } + private static void addDataFromDbToNode( final boolean allow_to_set_taxonomic_data, + final int lines_to_return, + final SortedSet not_found, + final PhylogenyNode node, + final Accession acc ) throws IOException { + SequenceDatabaseEntry db_entry = null; + final String query = acc.getValue(); + if ( acc.getSource() == Accession.UNIPROT ) { + if ( DEBUG ) { + System.out.println( "uniprot: " + query ); + } + try { + db_entry = obtainUniProtEntry( query, lines_to_return ); + } + catch ( final FileNotFoundException e ) { + // Eat this, and move to next. + } + } + else if ( acc.getSource() == Accession.EMBL ) { + if ( DEBUG ) { + System.out.println( "embl: " + query ); + } + try { + db_entry = obtainEmblEntry( new Accession( query ), lines_to_return ); + } + catch ( final FileNotFoundException e ) { + // Eat this, and move to next. + } + } + else if ( acc.getSource() == Accession.REFSEQ ) { + if ( DEBUG ) { + System.out.println( "refseq: " + query ); + } + try { + db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return ); + } + catch ( final FileNotFoundException e ) { + // Eat this, and move to next. + } + } + if ( ( db_entry != null ) && !db_entry.isEmpty() ) { + final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence(); + if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) { + seq.setAccession( new Accession( db_entry.getAccession(), acc.getSource() ) ); + } + if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) { + seq.setName( db_entry.getSequenceName() ); + } + if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { + seq.setGeneName( db_entry.getGeneName() ); + } + if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) { + try { + seq.setSymbol( db_entry.getSequenceSymbol() ); + } + catch ( final PhyloXmlDataFormatException e ) { + // Eat this exception. + } + } + if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) { + for( final GoTerm go : db_entry.getGoTerms() ) { + final Annotation ann = new Annotation( go.getGoId().getId() ); + ann.setDesc( go.getName() ); + seq.addAnnotation( ann ); + } + } + if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) { + for( final Accession x : db_entry.getCrossReferences() ) { + seq.addCrossReference( x ); + } + } + final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy(); + if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) { + tax.setScientificName( db_entry.getTaxonomyScientificName() ); + } + if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) { + tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) ); + } + node.getNodeData().setTaxonomy( tax ); + node.getNodeData().setSequence( seq ); + } + else { + if ( node.isExternal() || !node.isEmpty() ) { + not_found.add( node.toString() ); + } + } + try { + Thread.sleep( 10 );// Sleep for 10 ms + } + catch ( final InterruptedException ie ) { + } + } + private static String encode( final String str ) throws UnsupportedEncodingException { return URLEncoder.encode( str.trim(), URL_ENC ); } + private static List getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return ) + throws IOException { + final List result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return ); + if ( result.size() > 0 ) { + return parseUniProtTaxonomy( result ); + } + return null; + } + + private static List getTaxonomiesFromScientificName( final String sn, + final int max_taxonomies_return ) + throws IOException { + final List result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return ); + if ( result.size() > 0 ) { + return parseUniProtTaxonomy( result ); + } + return null; + } + private static List getTaxonomyStringFromCommonName( final String cn, final int max_lines_to_return ) throws IOException { return queryUniprot( "taxonomy/?query=common%3a%22" + encode( cn ) + "%22&format=tab", max_lines_to_return ); @@ -358,6 +370,11 @@ public final class SequenceDbWsTools { return queryUniprot( "taxonomy/?query=mnemonic%3a%22" + encode( code ) + "%22&format=tab", max_lines_to_return ); } + private final static boolean isAccessionAcceptable( final Accession acc ) { + return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc + .getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc.getSource() != Accession.REFSEQ ) ) ) ); + } + private static List parseUniProtTaxonomy( final List result ) throws IOException { final List taxonomies = new ArrayList(); for( final String line : result ) { -- 1.7.10.2