}
if ( ForesterUtil.isEmpty( query ) && ( node.getNodeData().getSequence().getAccession() != null )
&& !ForesterUtil.isEmpty( node.getNodeData().getSequence().getAccession().getValue() ) ) {
- final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getAccession()
- .getValue() );
+ final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence()
+ .getAccession().getValue() );
if ( id != null ) {
query = id.getValue();
}
}
if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
- final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getName() );
+ final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence()
+ .getName() );
if ( id != null ) {
query = id.getValue();
}
}
if ( ForesterUtil.isEmpty( query ) && !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) {
- final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getSymbol() );
+ final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence()
+ .getSymbol() );
if ( id != null ) {
query = id.getValue();
}
}
if ( ForesterUtil.isEmpty( query )
&& !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) {
- final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence().getGeneName() );
+ final Accession id = SequenceAccessionTools.parseAccessorFromString( node.getNodeData().getSequence()
+ .getGeneName() );
if ( id != null ) {
query = id.getValue();
}
}
final StringBuffer sb = new StringBuffer();
if ( not_found.size() == 1 ) {
- sb.append( "Data for the following sequence identifier was not found:\n" );
+ sb.append( "For the following node no data was found:\n" );
}
else {
- sb.append( "Data for the following sequence identifiers was not found (total: " + not_found.size()
- + "):\n" );
+ sb.append( "For the following nodes no data was found: (total: " + not_found.size() + "):\n" );
}
int i = 0;
for( final String string : not_found ) {
package org.forester.io.parsers.phyloxml;
-/*
- * @author Christian Zmasek TODO To change the template for this generated type
- * comment go to Window - Preferences - Java - Code Style - Code Templates
- */
public final class PhyloXmlMapping {
public static final String ACCESSION = "accession";
public final static String EVENT_TYPE = "type";
public final static String EVENTS = "events";
public static final String ID_REF = "id_ref";
- // public final static String NODE_IDENTIFIER = "node_id";
public final static String IDENTIFIER = "id";
public final static String IDENTIFIER_PROVIDER_ATTR = "provider";
public static final String NODE_COLLAPSE = "collapse";
*/
public final class PhylogenyNode implements Comparable<PhylogenyNode> {
- public enum NH_CONVERSION_SUPPORT_VALUE_STYLE {
- NONE, IN_SQUARE_BRACKETS, AS_INTERNAL_NODE_NAMES;
- }
private static long NODE_COUNT = 0;
- private byte _indicator;
- private long _id;
- private int _sum_ext_nodes;
- private float _x;
- private float _y;
- private double _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+ private BranchData _branch_data;
private boolean _collapse;
- private PhylogenyNode _parent;
- private PhylogenyNode _link;
private ArrayList<PhylogenyNode> _descendants;
+ private double _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
+ private long _id;
+ private byte _indicator;
+ private PhylogenyNode _link;
private NodeData _node_data;
- private BranchData _branch_data;
+ private PhylogenyNode _parent;
+ private int _sum_ext_nodes;
+ private float _x;
private float _x_secondary;
+ private float _y;
private float _y_secondary;
/**
setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!)
}
- public void removeConnections() {
- _parent = null;
- _link = null;
- _descendants = null;
- }
-
- public boolean isEmpty() {
+ private PhylogenyNode( final String nhx,
+ final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction,
+ final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException {
+ NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores );
+ setId( PhylogenyNode.getNodeCount() );
+ PhylogenyNode.increaseNodeCount();
+ setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!).
}
/**
n.setParent( this );
}
- /**
- * Adds PhylogenyNode n to the list of child nodes. But does NOT set the
- * _parent of n to this.
- *
- * @see addAsChild( PhylogenyNode n )
- * @param n
- * the PhylogenyNode to add
- */
- final private void addChildNode( final PhylogenyNode child ) {
- getDescendants().add( child );
+ public final int calculateDepth() {
+ PhylogenyNode n = this;
+ int steps = 0;
+ while ( n._parent != null ) {
+ steps++;
+ n = n._parent;
+ }
+ return steps;
+ }
+
+ public final double calculateDistanceToRoot() {
+ PhylogenyNode n = this;
+ double d = 0.0;
+ while ( n._parent != null ) {
+ if ( n._distance_parent > 0.0 ) {
+ d += n._distance_parent;
+ }
+ n = n._parent;
+ }
+ return d;
}
@Override
return getName().compareTo( n.getName() );
}
- // ---------------------------------------------------------
- // Copy and delete Nodes, copy subtress
- // ---------------------------------------------------------
/**
* Returns a new PhylogenyNode which has its data copied from this
* PhylogenyNode. Links to the other Nodes in the same Phylogeny are NOT
}
}
- // ---------------------------------------------------------
- // Obtaining of Nodes
- // ---------------------------------------------------------
+ final public List<PhylogenyNode> getAllDescendants() {
+ return _descendants;
+ }
+
/**
* Returns a List containing references to all external children of this
* PhylogenyNode.
return _branch_data;
}
- final BranchData getBranchDataDirectly() {
- return _branch_data;
- }
-
/**
* This return child node n of this node.
*
}
/**
+ * Returns the ID (int) of this PhylogenyNode.
+ */
+ final public long getId() {
+ return _id;
+ }
+
+ /**
* Returns the _indicator value of this PhylogenyNode.
*/
public final byte getIndicator() {
return _link;
}
+ final public String getName() {
+ return getNodeData().getNodeName();
+ }
+
/**
* Returns a refernce to the next external PhylogenyNode of this
* PhylogenyNode. TODO should be in Phylogeny. Returns null if no next
return _node_data;
}
- final NodeData getNodeDataDirectly() {
- return _node_data;
- }
-
- // ---------------------------------------------------------
- // Set and get methods for Nodes
- // ---------------------------------------------------------
- /**
- * Returns the ID (int) of this PhylogenyNode.
- */
- final public long getId() {
- return _id;
- }
-
- final public String getName() {
- return getNodeData().getNodeName();
- }
-
- final public List<PhylogenyNode> getAllDescendants() {
- return _descendants;
- }
-
final public int getNumberOfDescendants() {
if ( _descendants == null ) {
return 0;
return result;
}
- // final private void init() {
- //_descendants = new ArrayList<PhylogenyNode>();
- // _parent = null; //TODO not needed?
- // _id = 0; //TODO not needed?
- //initializeData(); //TODO not needed?
- //}
- /**
- * Deletes data of this PhylogenyNode. Links to the other Nodes in the
- * Phylogeny, the ID and the sum of external nodes are NOT deleted. Field
- * "_link" (_link to Nodes in other Phylogeny) IS deleted.
- *
- * @see #getLink() (Last modified: 12/20/03)
- */
- // final private void initializeData() {
- // _indicator = 0;
- // _x = 0;
- // _y = 0;
- // //_node_name = "";
- // _distance_parent = PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT;
- // _collapse = false;
- // _link = null;
- // _branch_data = null;
- // _node_data = null;
- // }
/**
* Returns whether this PhylogenyNode should be drawn as collapsed.
*/
return getNodeData().isHasEvent() && getNodeData().getEvent().isDuplication();
}
+ public boolean isEmpty() {
+ return ( ( _node_data == null ) || _node_data.isEmpty() );
+ }
+
/**
* Checks whether this PhylogenyNode is external (tip).
*
return ( getNumberOfDescendants() < 1 );
}
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
final public boolean isFirstChildNode() {
if ( isRoot() /* and tree is rooted TODO */) {
throw new UnsupportedOperationException( "Cannot determine whether the root is the first child node of its _parent." );
return ( getChildNodeIndex() == 0 );
}
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
final public boolean isFirstExternalNode() {
if ( isInternal() ) {
return false;
return ( getChildNodeIndex() == ( getParent().getNumberOfDescendants() - 1 ) );
}
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
final public boolean isLastExternalNode() {
if ( isInternal() ) {
return false;
return true;
}
- public final int calculateDepth() {
- PhylogenyNode n = this;
- int steps = 0;
- while ( n._parent != null ) {
- steps++;
- n = n._parent;
- }
- return steps;
- }
-
- public final double calculateDistanceToRoot() {
- PhylogenyNode n = this;
- double d = 0.0;
- while ( n._parent != null ) {
- if ( n._distance_parent > 0.0 ) {
- d += n._distance_parent;
- }
- n = n._parent;
- }
- return d;
- }
-
/**
* Checks whether this PhylogenyNode is a root.
*
removeChildNode( remove_me.getChildNodeIndex() );
}
+ public void removeConnections() {
+ _parent = null;
+ _link = null;
+ _descendants = null;
+ }
+
final public void setBranchData( final BranchData branch_data ) {
_branch_data = branch_data;
}
}
}
- final void setChildNodeOnly( final int i, final PhylogenyNode node ) {
- if ( getNumberOfDescendants() <= i ) {
- addChildNode( node );
- }
- else {
- getDescendants().set( i, node );
- }
- }
-
/**
* Sets whether this PhylogenyNode should be drawn as collapsed.
*/
_indicator = i;
}
- // --------------------------------------------------------------------
- // Adjust methods (related to Phylogeny construction and
- // Phylogeny modification)
- // --------------------------------------------------------------------
- /**
- * Sets the indicators of all the children of this PhylogenyNode to zero.
- */
- final void setIndicatorsToZero() {
- for( final PreorderTreeIterator it = new PreorderTreeIterator( this ); it.hasNext(); ) {
- it.next().setIndicator( ( byte ) 0 );
- }
- }
-
/**
* Sets the linked PhylogenyNode of this PhylogenyNode to n. Currently, this
* method is only used for the speciation-_duplication assignment
}
/**
- * Sets the Id of this PhylogenyNode to i. In most cases, this number
- * should not be set to values lower than getNodeCount() -- which this method
- * does not allow.
- */
- synchronized final protected void setId( final long i ) {
- if ( i < getNodeCount() ) {
- throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" );
- }
- _id = i;
- }
-
- /**
* Sets the _parent PhylogenyNode of this PhylogenyNode to n.
*/
final public void setParent( final PhylogenyNode n ) {
_y_secondary = y_secondary;
}
+ /**
+ * Swaps the the two childern of a PhylogenyNode node of this Phylogeny.
+ */
+ public final void swapChildren() throws RuntimeException {
+ if ( isExternal() ) {
+ throw new RuntimeException( "attempt to swap descendants of external node" );
+ }
+ if ( getNumberOfDescendants() != 2 ) {
+ throw new RuntimeException( "attempt to swap descendants of node with " + getNumberOfDescendants()
+ + " descendants" );
+ }
+ final PhylogenyNode a = getChildNode( 0 );
+ final PhylogenyNode b = getChildNode( 1 );
+ setChildNode( 0, b );
+ setChildNode( 1, a );
+ }
+
// ---------------------------------------------------------
// Writing of Nodes to Strings
// ---------------------------------------------------------
}
/**
- * Swaps the the two childern of a PhylogenyNode node of this Phylogeny.
- */
- public final void swapChildren() throws RuntimeException {
- if ( isExternal() ) {
- throw new RuntimeException( "attempt to swap descendants of external node" );
- }
- if ( getNumberOfDescendants() != 2 ) {
- throw new RuntimeException( "attempt to swap descendants of node with " + getNumberOfDescendants()
- + " descendants" );
- }
- final PhylogenyNode a = getChildNode( 0 );
- final PhylogenyNode b = getChildNode( 1 );
- setChildNode( 0, b );
- setChildNode( 1, a );
- }
-
- /**
* Converts this PhylogenyNode to a New Hampshire X (NHX) String
* representation.
*/
}
/**
- * Decreases the total number of all Nodes created so far by one.
+ * Sets the Id of this PhylogenyNode to i. In most cases, this number
+ * should not be set to values lower than getNodeCount() -- which this method
+ * does not allow.
*/
- final static synchronized void decreaseNodeCount() {
- --NODE_COUNT;
+ synchronized final protected void setId( final long i ) {
+ if ( i < getNodeCount() ) {
+ throw new IllegalArgumentException( "attempt to set node id to a value less than total node count (thus violating the uniqueness of node ids)" );
+ }
+ _id = i;
}
- /**
- * Returns the total number of all Nodes created so far.
- *
- * @return total number of Nodes (long)
- */
- synchronized final public static long getNodeCount() {
- return NODE_COUNT;
+ final BranchData getBranchDataDirectly() {
+ return _branch_data;
+ }
+
+ final NodeData getNodeDataDirectly() {
+ return _node_data;
+ }
+
+ final void setChildNodeOnly( final int i, final PhylogenyNode node ) {
+ if ( getNumberOfDescendants() <= i ) {
+ addChildNode( node );
+ }
+ else {
+ getDescendants().set( i, node );
+ }
}
/**
- * Increases the total number of all Nodes created so far by one.
+ * Sets the indicators of all the children of this PhylogenyNode to zero.
*/
- synchronized final private static void increaseNodeCount() {
- ++NODE_COUNT;
+ final void setIndicatorsToZero() {
+ for( final PreorderTreeIterator it = new PreorderTreeIterator( this ); it.hasNext(); ) {
+ it.next().setIndicator( ( byte ) 0 );
+ }
}
/**
- * Sets the total number of all Nodes created so far to i.
+ * Adds PhylogenyNode n to the list of child nodes. But does NOT set the
+ * _parent of n to this.
+ *
+ * @see addAsChild( PhylogenyNode n )
+ * @param n
+ * the PhylogenyNode to add
*/
- synchronized final static void setNodeCount( final long i ) {
- PhylogenyNode.NODE_COUNT = i;
+ final private void addChildNode( final PhylogenyNode child ) {
+ getDescendants().add( child );
}
public static PhylogenyNode createInstanceFromNhxString( final String nhx ) throws NHXFormatException,
return new PhylogenyNode( nhx, taxonomy_extraction, replace_underscores );
}
- private PhylogenyNode( final String nhx,
- final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction,
- final boolean replace_underscores ) throws NHXFormatException, PhyloXmlDataFormatException {
- NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores );
- setId( PhylogenyNode.getNodeCount() );
- PhylogenyNode.increaseNodeCount();
- setSumExtNodes( 1 ); // For ext node, this number is 1 (not 0!!).
+ /**
+ * Returns the total number of all Nodes created so far.
+ *
+ * @return total number of Nodes (long)
+ */
+ synchronized final public static long getNodeCount() {
+ return NODE_COUNT;
+ }
+
+ /**
+ * Decreases the total number of all Nodes created so far by one.
+ */
+ final static synchronized void decreaseNodeCount() {
+ --NODE_COUNT;
+ }
+
+ /**
+ * Sets the total number of all Nodes created so far to i.
+ */
+ synchronized final static void setNodeCount( final long i ) {
+ PhylogenyNode.NODE_COUNT = i;
+ }
+
+ /**
+ * Increases the total number of all Nodes created so far by one.
+ */
+ synchronized final private static void increaseNodeCount() {
+ ++NODE_COUNT;
+ }
+
+ public enum NH_CONVERSION_SUPPORT_VALUE_STYLE {
+ AS_INTERNAL_NODE_NAMES, IN_SQUARE_BRACKETS, NONE;
}
}
private String _node_name;
private Event _event;
private List<Sequence> _sequences;
- private Identifier _node_identifier;
private List<Taxonomy> _taxonomies;
private List<Distribution> _distributions;
private Date _date;
if ( isHasEvent() ) {
new_data.setEvent( ( Event ) getEvent().copy() );
}
- if ( isHasNodeIdentifier() ) {
- new_data.setNodeIdentifier( ( Identifier ) getNodeIdentifier().copy() );
- }
if ( ( getTaxonomies() != null ) && ( getTaxonomies().size() > 0 ) ) {
new_data.setTaxonomies( new ArrayList<Taxonomy>() );
for( final Taxonomy t : getTaxonomies() ) {
return _event;
}
- public Identifier getNodeIdentifier() {
- return _node_identifier;
- }
-
public PropertiesMap getProperties() {
return _properties;
}
@Override
public boolean isEqual( final PhylogenyData data ) {
- throw new UnsupportedOperationException();
+ throw new NoSuchMethodError();
}
public boolean isHasBinaryCharacters() {
return getBinaryCharacters() != null;
}
+ public boolean isEmpty() {
+ return ( ForesterUtil.isEmpty( _node_name ) && !isHasSequence() && !isHasTaxonomy() && !isHasBinaryCharacters()
+ && !isHasDate() && !isHasDistribution() && !isHasEvent() && !isHasProperties() && !isHasReference() && ( ( _vector == null ) || _vector
+ .isEmpty() ) );
+ }
+
public boolean isHasDate() {
return ( getDate() != null )
&& ( !ForesterUtil.isEmpty( getDate().getDesc() ) || !ForesterUtil.isNull( getDate().getMax() )
return getEvent() != null;
}
- public boolean isHasNodeIdentifier() {
- return getNodeIdentifier() != null;
- }
-
public boolean isHasProperties() {
return ( getProperties() != null ) && ( getProperties().size() > 0 );
}
_event = event;
}
- public void setNodeIdentifier( final Identifier node_identifier ) {
- _node_identifier = node_identifier;
- }
-
public void setProperties( final PropertiesMap custom_data ) {
_properties = custom_data;
}
@Override
public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
- if ( isHasNodeIdentifier() ) {
- writer.write( ForesterUtil.LINE_SEPARATOR );
- writer.write( indentation );
- // if ( !org.forester.util.ForesterUtil.isEmpty( getNodeIdentifier().getProvider() ) ) {
- // PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.NODE_IDENTIFIER, getNodeIdentifier()
- // .getValue(), PhyloXmlMapping.IDENTIFIER_PROVIDER_ATTR, getNodeIdentifier().getProvider() );
- // }
- // else {
- // PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.NODE_IDENTIFIER, getNodeIdentifier()
- // .getValue() );
- // }
- }
if ( isHasTaxonomy() ) {
for( final Taxonomy t : getTaxonomies() ) {
if ( !t.isEmpty() ) {
public final class SequenceDbWsTools {
- public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/";
public final static String BASE_EMBL_DB_URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/";
+ public final static String BASE_UNIPROT_URL = "http://www.uniprot.org/";
public final static String EMBL_DBS_EMBL = "embl";
- public final static String EMBL_DBS_REFSEQ_P = "refseqp";
public final static String EMBL_DBS_REFSEQ_N = "refseqn";
- private final static String URL_ENC = "UTF-8";
+ public final static String EMBL_DBS_REFSEQ_P = "refseqp";
private final static boolean DEBUG = true;
-
- private static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return )
- throws IOException {
- final List<String> result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return );
- if ( result.size() > 0 ) {
- return parseUniProtTaxonomy( result );
- }
- return null;
- }
+ private final static String URL_ENC = "UTF-8";
public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict( final String cn,
final int max_taxonomies_return )
return null;
}
- private static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String sn,
- final int max_taxonomies_return )
- throws IOException {
- final List<String> result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return );
- if ( result.size() > 0 ) {
- return parseUniProtTaxonomy( result );
- }
- return null;
- }
-
/**
* Does not return "sub-types".
* For example, for "Mus musculus" only returns "Mus musculus"
return EbiDbEntry.createInstanceFromPlainText( lines );
}
+ public final static Accession obtainFromSeqAccession( final PhylogenyNode node ) {
+ Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node );
+ if ( !isAccessionAcceptable( acc ) ) {
+ acc = SequenceAccessionTools.obtainAccessorFromDataFields( node );
+ }
+ return acc;
+ }
+
public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl( final Accession id, final int max_lines_to_return )
throws IOException {
final List<String> lines = queryEmblDb( id, max_lines_to_return );
return EbiDbEntry.createInstanceFromPlainTextForRefSeq( lines );
}
- public static SortedSet<String> obtainSeqInformation( final Phylogeny phy,
- final boolean ext_nodes_only,
- final boolean allow_to_set_taxonomic_data,
- final int lines_to_return ) throws IOException {
+ public final static void obtainSeqInformation( final boolean allow_to_set_taxonomic_data,
+ final int lines_to_return,
+ final SortedSet<String> not_found,
+ final PhylogenyNode node ) throws IOException {
+ final Accession acc = obtainFromSeqAccession( node );
+ if ( !isAccessionAcceptable( acc ) ) {
+ if ( node.isExternal() || !node.isEmpty() ) {
+ not_found.add( node.toString() );
+ }
+ }
+ else {
+ addDataFromDbToNode( allow_to_set_taxonomic_data, lines_to_return, not_found, node, acc );
+ }
+ }
+
+ public final static SortedSet<String> obtainSeqInformation( final Phylogeny phy,
+ final boolean ext_nodes_only,
+ final boolean allow_to_set_taxonomic_data,
+ final int lines_to_return ) throws IOException {
final SortedSet<String> not_found = new TreeSet<String>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
- if ( ext_nodes_only && node.isInternal() ) {
- continue;
- }
- Accession acc = SequenceAccessionTools.obtainFromSeqAccession( node );
- if ( ( acc == null )
- || ForesterUtil.isEmpty( acc.getSource() )
- || ForesterUtil.isEmpty( acc.getValue() )
- || ( ( acc.getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc
- .getSource() != Accession.REFSEQ ) ) ) {
- acc = SequenceAccessionTools.obtainAccessorFromDataFields( node );
- }
- if ( ( acc == null )
- || ForesterUtil.isEmpty( acc.getSource() )
- || ForesterUtil.isEmpty( acc.getValue() )
- || ( ( acc.getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc
- .getSource() != Accession.REFSEQ ) ) ) {
- not_found.add( node.toString() );
- }
- else {
- SequenceDatabaseEntry db_entry = null;
- final String query = acc.getValue();
- if ( acc.getSource() == Accession.UNIPROT ) {
- if ( DEBUG ) {
- System.out.println( "uniprot: " + query );
- }
- try {
- db_entry = obtainUniProtEntry( query, lines_to_return );
- }
- catch ( FileNotFoundException e ) {
- // Eat this, and move to next.
- }
- }
- else if ( acc.getSource() == Accession.EMBL ) {
- if ( DEBUG ) {
- System.out.println( "embl: " + query );
- }
- try {
- db_entry = obtainEmblEntry( new Accession( query ), lines_to_return );
- }
- catch ( FileNotFoundException e ) {
- // Eat this, and move to next.
- }
- }
- else if ( acc.getSource() == Accession.REFSEQ ) {
- if ( DEBUG ) {
- System.out.println( "refseq: " + query );
- }
- try {
- db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return );
- }
- catch ( FileNotFoundException e ) {
- // Eat this, and move to next.
- }
- }
- if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
- final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence()
- : new Sequence();
- if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
- seq.setAccession( new Accession( db_entry.getAccession(), acc.getSource() ) );
- }
- if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
- seq.setName( db_entry.getSequenceName() );
- }
- if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
- seq.setGeneName( db_entry.getGeneName() );
- }
- if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
- try {
- seq.setSymbol( db_entry.getSequenceSymbol() );
- }
- catch ( final PhyloXmlDataFormatException e ) {
- // Eat this exception.
- }
- }
- if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
- for( final GoTerm go : db_entry.getGoTerms() ) {
- final Annotation ann = new Annotation( go.getGoId().getId() );
- ann.setDesc( go.getName() );
- seq.addAnnotation( ann );
- }
- }
- if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) {
- for( final Accession x : db_entry.getCrossReferences() ) {
- seq.addCrossReference( x );
- }
- }
- final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy()
- : new Taxonomy();
- if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
- tax.setScientificName( db_entry.getTaxonomyScientificName() );
- }
- if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
- tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) );
- }
- node.getNodeData().setTaxonomy( tax );
- node.getNodeData().setSequence( seq );
- }
- else {
- node.i
- not_found.add( node.getName() );
- }
- try {
- Thread.sleep( 10 );// Sleep for 10 ms
- }
- catch ( final InterruptedException ie ) {
- }
+ if ( node.isExternal() || !ext_nodes_only ) {
+ obtainSeqInformation( allow_to_set_taxonomic_data, lines_to_return, not_found, node );
}
}
return not_found;
return queryDb( query, max_lines_to_return, BASE_UNIPROT_URL );
}
+ private static void addDataFromDbToNode( final boolean allow_to_set_taxonomic_data,
+ final int lines_to_return,
+ final SortedSet<String> not_found,
+ final PhylogenyNode node,
+ final Accession acc ) throws IOException {
+ SequenceDatabaseEntry db_entry = null;
+ final String query = acc.getValue();
+ if ( acc.getSource() == Accession.UNIPROT ) {
+ if ( DEBUG ) {
+ System.out.println( "uniprot: " + query );
+ }
+ try {
+ db_entry = obtainUniProtEntry( query, lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ else if ( acc.getSource() == Accession.EMBL ) {
+ if ( DEBUG ) {
+ System.out.println( "embl: " + query );
+ }
+ try {
+ db_entry = obtainEmblEntry( new Accession( query ), lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ else if ( acc.getSource() == Accession.REFSEQ ) {
+ if ( DEBUG ) {
+ System.out.println( "refseq: " + query );
+ }
+ try {
+ db_entry = obtainRefSeqEntryFromEmbl( new Accession( query ), lines_to_return );
+ }
+ catch ( final FileNotFoundException e ) {
+ // Eat this, and move to next.
+ }
+ }
+ if ( ( db_entry != null ) && !db_entry.isEmpty() ) {
+ final Sequence seq = node.getNodeData().isHasSequence() ? node.getNodeData().getSequence() : new Sequence();
+ if ( !ForesterUtil.isEmpty( db_entry.getAccession() ) ) {
+ seq.setAccession( new Accession( db_entry.getAccession(), acc.getSource() ) );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceName() ) ) {
+ seq.setName( db_entry.getSequenceName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
+ seq.setGeneName( db_entry.getGeneName() );
+ }
+ if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
+ try {
+ seq.setSymbol( db_entry.getSequenceSymbol() );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ // Eat this exception.
+ }
+ }
+ if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
+ for( final GoTerm go : db_entry.getGoTerms() ) {
+ final Annotation ann = new Annotation( go.getGoId().getId() );
+ ann.setDesc( go.getName() );
+ seq.addAnnotation( ann );
+ }
+ }
+ if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) {
+ for( final Accession x : db_entry.getCrossReferences() ) {
+ seq.addCrossReference( x );
+ }
+ }
+ final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy();
+ if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
+ tax.setScientificName( db_entry.getTaxonomyScientificName() );
+ }
+ if ( allow_to_set_taxonomic_data && !ForesterUtil.isEmpty( db_entry.getTaxonomyIdentifier() ) ) {
+ tax.setIdentifier( new Identifier( db_entry.getTaxonomyIdentifier(), "uniprot" ) );
+ }
+ node.getNodeData().setTaxonomy( tax );
+ node.getNodeData().setSequence( seq );
+ }
+ else {
+ if ( node.isExternal() || !node.isEmpty() ) {
+ not_found.add( node.toString() );
+ }
+ }
+ try {
+ Thread.sleep( 10 );// Sleep for 10 ms
+ }
+ catch ( final InterruptedException ie ) {
+ }
+ }
+
private static String encode( final String str ) throws UnsupportedEncodingException {
return URLEncoder.encode( str.trim(), URL_ENC );
}
+ private static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String cn, final int max_taxonomies_return )
+ throws IOException {
+ final List<String> result = getTaxonomyStringFromCommonName( cn, max_taxonomies_return );
+ if ( result.size() > 0 ) {
+ return parseUniProtTaxonomy( result );
+ }
+ return null;
+ }
+
+ private static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String sn,
+ final int max_taxonomies_return )
+ throws IOException {
+ final List<String> result = getTaxonomyStringFromScientificName( sn, max_taxonomies_return );
+ if ( result.size() > 0 ) {
+ return parseUniProtTaxonomy( result );
+ }
+ return null;
+ }
+
private static List<String> getTaxonomyStringFromCommonName( final String cn, final int max_lines_to_return )
throws IOException {
return queryUniprot( "taxonomy/?query=common%3a%22" + encode( cn ) + "%22&format=tab", max_lines_to_return );
return queryUniprot( "taxonomy/?query=mnemonic%3a%22" + encode( code ) + "%22&format=tab", max_lines_to_return );
}
+ private final static boolean isAccessionAcceptable( final Accession acc ) {
+ return ( !( ( acc == null ) || ForesterUtil.isEmpty( acc.getSource() ) || ForesterUtil.isEmpty( acc.getValue() ) || ( ( acc
+ .getSource() != Accession.UNIPROT ) && ( acc.getSource() != Accession.EMBL ) && ( acc.getSource() != Accession.REFSEQ ) ) ) );
+ }
+
private static List<UniProtTaxonomy> parseUniProtTaxonomy( final List<String> result ) throws IOException {
final List<UniProtTaxonomy> taxonomies = new ArrayList<UniProtTaxonomy>();
for( final String line : result ) {