// Copyright (C) 2010 Christian M Zmasek
// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
// All rights reserved
-//
+//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
-//
+//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
public final class AncestralTaxonomyInference {
- private static final int MAX_CACHE_SIZE = 100000;
- private static final int MAX_TAXONOMIES_TO_RETURN = 100;
- private static final HashMap<String, UniProtTaxonomy> _sn_up_cache_map = new HashMap<String, UniProtTaxonomy>();
- private static final HashMap<String, UniProtTaxonomy> _code_up_cache_map = new HashMap<String, UniProtTaxonomy>();
- private static final HashMap<String, UniProtTaxonomy> _cn_up_cache_map = new HashMap<String, UniProtTaxonomy>();
- private static final HashMap<String, UniProtTaxonomy> _id_up_cache_map = new HashMap<String, UniProtTaxonomy>();
-
- synchronized private static void clearCachesIfTooLarge() {
- if (getSnTaxCacheMap().size() > MAX_CACHE_SIZE) {
- getSnTaxCacheMap().clear();
- }
- if (getCnTaxCacheMap().size() > MAX_CACHE_SIZE) {
- getCnTaxCacheMap().clear();
- }
- if (getCodeTaxCacheMap().size() > MAX_CACHE_SIZE) {
- getCodeTaxCacheMap().clear();
- }
- if (getIdTaxCacheMap().size() > MAX_CACHE_SIZE) {
- getIdTaxCacheMap().clear();
- }
- }
+ private static final int MAX_CACHE_SIZE = 100000;
+ private static final int MAX_TAXONOMIES_TO_RETURN = 100;
+ private static final HashMap<String, UniProtTaxonomy> _sn_up_cache_map = new HashMap<String, UniProtTaxonomy>();
+ private static final HashMap<String, UniProtTaxonomy> _code_up_cache_map = new HashMap<String, UniProtTaxonomy>();
+ private static final HashMap<String, UniProtTaxonomy> _cn_up_cache_map = new HashMap<String, UniProtTaxonomy>();
+ private static final HashMap<String, UniProtTaxonomy> _id_up_cache_map = new HashMap<String, UniProtTaxonomy>();
- synchronized private static HashMap<String, UniProtTaxonomy> getCnTaxCacheMap() {
- return _cn_up_cache_map;
- }
+ synchronized private static void clearCachesIfTooLarge() {
+ if ( getSnTaxCacheMap().size() > MAX_CACHE_SIZE ) {
+ getSnTaxCacheMap().clear();
+ }
+ if ( getCnTaxCacheMap().size() > MAX_CACHE_SIZE ) {
+ getCnTaxCacheMap().clear();
+ }
+ if ( getCodeTaxCacheMap().size() > MAX_CACHE_SIZE ) {
+ getCodeTaxCacheMap().clear();
+ }
+ if ( getIdTaxCacheMap().size() > MAX_CACHE_SIZE ) {
+ getIdTaxCacheMap().clear();
+ }
+ }
- synchronized private static HashMap<String, UniProtTaxonomy> getCodeTaxCacheMap() {
- return _code_up_cache_map;
- }
+ synchronized private static HashMap<String, UniProtTaxonomy> getCnTaxCacheMap() {
+ return _cn_up_cache_map;
+ }
- synchronized private static HashMap<String, UniProtTaxonomy> getIdTaxCacheMap() {
- return _id_up_cache_map;
- }
+ synchronized private static HashMap<String, UniProtTaxonomy> getCodeTaxCacheMap() {
+ return _code_up_cache_map;
+ }
- synchronized private static HashMap<String, UniProtTaxonomy> getSnTaxCacheMap() {
- return _sn_up_cache_map;
- }
+ synchronized private static HashMap<String, UniProtTaxonomy> getIdTaxCacheMap() {
+ return _id_up_cache_map;
+ }
- synchronized private static UniProtTaxonomy getTaxonomies(
- final HashMap<String, UniProtTaxonomy> cache, final String query,
- final QUERY_TYPE qt) throws IOException {
- if (cache.containsKey(query)) {
- return cache.get(query).copy();
- } else {
- List<UniProtTaxonomy> up_taxonomies = null;
- switch (qt) {
- case ID:
- up_taxonomies = getTaxonomiesFromId(query);
- break;
- case CODE:
- up_taxonomies = getTaxonomiesFromTaxonomyCode(query);
- break;
- case SN:
- up_taxonomies = getTaxonomiesFromScientificName(query);
- break;
- case CN:
- up_taxonomies = getTaxonomiesFromCommonName(query);
- break;
- default:
- throw new RuntimeException();
- }
- if ((up_taxonomies != null) && (up_taxonomies.size() == 1)) {
- final UniProtTaxonomy up_tax = up_taxonomies.get(0);
- if (!ForesterUtil.isEmpty(up_tax.getScientificName())) {
- getSnTaxCacheMap().put(up_tax.getScientificName(), up_tax);
- }
- if (!ForesterUtil.isEmpty(up_tax.getCode())) {
- getCodeTaxCacheMap().put(up_tax.getCode(), up_tax);
- }
- if (!ForesterUtil.isEmpty(up_tax.getCommonName())) {
- getCnTaxCacheMap().put(up_tax.getCommonName(), up_tax);
- }
- if (!ForesterUtil.isEmpty(up_tax.getId())) {
- getIdTaxCacheMap().put(up_tax.getId(), up_tax);
- }
- return up_tax;
- } else {
- return null;
- }
- }
- }
+ synchronized private static HashMap<String, UniProtTaxonomy> getSnTaxCacheMap() {
+ return _sn_up_cache_map;
+ }
- synchronized private static List<UniProtTaxonomy> getTaxonomiesFromCommonName(
- final String query) throws IOException {
- return UniProtWsTools.getTaxonomiesFromCommonNameStrict(query,
- MAX_TAXONOMIES_TO_RETURN);
- }
+ synchronized private static UniProtTaxonomy getTaxonomies( final HashMap<String, UniProtTaxonomy> cache,
+ final String query,
+ final QUERY_TYPE qt ) throws IOException {
+ if ( cache.containsKey( query ) ) {
+ return cache.get( query ).copy();
+ }
+ else {
+ List<UniProtTaxonomy> up_taxonomies = null;
+ switch ( qt ) {
+ case ID:
+ up_taxonomies = getTaxonomiesFromId( query );
+ break;
+ case CODE:
+ up_taxonomies = getTaxonomiesFromTaxonomyCode( query );
+ break;
+ case SN:
+ up_taxonomies = getTaxonomiesFromScientificName( query );
+ break;
+ case CN:
+ up_taxonomies = getTaxonomiesFromCommonName( query );
+ break;
+ default:
+ throw new RuntimeException();
+ }
+ if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) {
+ final UniProtTaxonomy up_tax = up_taxonomies.get( 0 );
+ if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) {
+ getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) {
+ getCodeTaxCacheMap().put( up_tax.getCode(), up_tax );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
+ getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
+ getIdTaxCacheMap().put( up_tax.getId(), up_tax );
+ }
+ return up_tax;
+ }
+ else {
+ return null;
+ }
+ }
+ }
- synchronized private static List<UniProtTaxonomy> getTaxonomiesFromId(
- final String query) throws IOException {
- return UniProtWsTools.getTaxonomiesFromId(query,
- MAX_TAXONOMIES_TO_RETURN);
- }
+ synchronized private static List<UniProtTaxonomy> getTaxonomiesFromCommonName( final String query )
+ throws IOException {
+ return UniProtWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
+ }
- synchronized private static List<UniProtTaxonomy> getTaxonomiesFromScientificName(
- final String query) throws IOException {
- return UniProtWsTools.getTaxonomiesFromScientificNameStrict(query,
- MAX_TAXONOMIES_TO_RETURN);
- }
+ synchronized private static List<UniProtTaxonomy> getTaxonomiesFromId( final String query ) throws IOException {
+ return UniProtWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN );
+ }
- synchronized private static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode(
- final String query) throws IOException {
- return UniProtWsTools.getTaxonomiesFromTaxonomyCode(query,
- MAX_TAXONOMIES_TO_RETURN);
- }
+ synchronized private static List<UniProtTaxonomy> getTaxonomiesFromScientificName( final String query )
+ throws IOException {
+ return UniProtWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN );
+ }
- synchronized public static SortedSet<String> inferTaxonomyFromDescendents(
- final Phylogeny phy) throws IOException {
- clearCachesIfTooLarge();
- final SortedSet<String> not_found = new TreeSet<String>();
- for (final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter
- .hasNext();) {
- final PhylogenyNode node = iter.next();
- // final QUERY_TYPE qt = null;
- // Taxonomy tax = null;
- // if ( node.getNodeData().isHasTaxonomy() ) {
- // tax = node.getNodeData().getTaxonomy();
- // }
- // UniProtTaxonomy up_tax = null;
- // if ( ( tax != null )
- // && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty(
- // tax.getScientificName() )
- // || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ||
- // !ForesterUtil.isEmpty( tax
- // .getCommonName() ) ) ) {
- // final String query = null;
- // up_tax = obtainUniProtTaxonomy( tax, query, qt );
- // if ( up_tax == null ) {
- // not_found.add( query );
- // }
- // else {
- // updateTaxonomy( qt, node, tax, up_tax );
- // }
- // }
- if (!node.isExternal()) {
- inferTaxonomyFromDescendents(node, not_found);
- }
- }
- return not_found;
- }
+ synchronized private static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String query )
+ throws IOException {
+ return UniProtWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN );
+ }
- synchronized private static void inferTaxonomyFromDescendents(
- final PhylogenyNode n, final SortedSet<String> not_found)
- throws IOException {
- if (n.isExternal()) {
- throw new IllegalArgumentException(
- "attempt to infer taxonomy from descendants of external node");
- }
- n.getNodeData().setTaxonomy(null);
- final List<PhylogenyNode> descs = n.getDescendants();
- final List<String[]> lineages = new ArrayList<String[]>();
- int shortest_lin_length = Integer.MAX_VALUE;
- for (final PhylogenyNode desc : descs) {
- if (desc.getNodeData().isHasTaxonomy()
- && (isHasAppropriateId(desc.getNodeData().getTaxonomy())
- || !ForesterUtil.isEmpty(desc.getNodeData()
- .getTaxonomy().getScientificName())
- || !ForesterUtil.isEmpty(desc.getNodeData()
- .getTaxonomy().getTaxonomyCode()) || !ForesterUtil
- .isEmpty(desc.getNodeData().getTaxonomy()
- .getCommonName()))) {
- final QUERY_TYPE qt = null;
- final String query = null;
- final UniProtTaxonomy up_tax = obtainUniProtTaxonomy(desc
- .getNodeData().getTaxonomy(), query, qt);
- String[] lineage = null;
- if (up_tax != null) {
- lineage = obtainLineagePlusOwnScientificName(up_tax);
- }
- if ((lineage == null) || (lineage.length < 1)) {
- not_found.add(desc.getNodeData().getTaxonomy().asText()
- .toString());
- return;
- }
- if (lineage.length < shortest_lin_length) {
- shortest_lin_length = lineage.length;
- }
- lineages.add(lineage);
- } else {
- String msg = "Node(s) with no or inappropriate taxonomic information found";
- if (!ForesterUtil.isEmpty(desc.getName())) {
- msg = "Node " + desc.getName()
- + " has no or inappropriate taxonomic information";
- }
- throw new IllegalArgumentException(msg);
- }
- }
- String last_common_lineage = null;
- if (shortest_lin_length > 0) {
- I: for (int i = 0; i < shortest_lin_length; ++i) {
- final String lineage_0 = lineages.get(0)[i];
- for (int j = 1; j < lineages.size(); ++j) {
- if (!lineage_0.equals(lineages.get(j)[i])) {
- break I;
- }
- }
- last_common_lineage = lineage_0;
- }
- }
- if (last_common_lineage == null) {
- return;
- }
- // if ( !n.getNodeData().isHasTaxonomy() ) {
- // n.getNodeData().setTaxonomy( new Taxonomy() );
- // }
- final Taxonomy tax = new Taxonomy();
- n.getNodeData().setTaxonomy(tax);
- tax.setScientificName(last_common_lineage);
- final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn(last_common_lineage);
- if (up_tax != null) {
- if (!ForesterUtil.isEmpty(up_tax.getRank())) {
- try {
- tax.setRank(up_tax.getRank().toLowerCase());
- } catch (final PhyloXmlDataFormatException ex) {
- tax.setRank("");
- }
- }
- if (!ForesterUtil.isEmpty(up_tax.getId())) {
- tax.setIdentifier(new Identifier(up_tax.getId(), "uniprot"));
- }
- if (!ForesterUtil.isEmpty(up_tax.getCommonName())) {
- tax.setCommonName(up_tax.getCommonName());
- }
- if (!ForesterUtil.isEmpty(up_tax.getSynonym())
- && !tax.getSynonyms().contains(up_tax.getSynonym())) {
- tax.getSynonyms().add(up_tax.getSynonym());
- }
- }
- for (final PhylogenyNode desc : descs) {
- if (!desc.isExternal() && desc.getNodeData().isHasTaxonomy()
- && desc.getNodeData().getTaxonomy().isEqual(tax)) {
- desc.getNodeData().setTaxonomy(null);
- }
- }
- }
+ synchronized public static SortedSet<String> inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException {
+ clearCachesIfTooLarge();
+ final SortedSet<String> not_found = new TreeSet<String>();
+ for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !node.isExternal() ) {
+ inferTaxonomyFromDescendents( node, not_found );
+ }
+ }
+ return not_found;
+ }
- synchronized private static boolean isHasAppropriateId(final Taxonomy tax) {
- return ((tax.getIdentifier() != null) && (!ForesterUtil.isEmpty(tax
- .getIdentifier().getValue()) && (tax.getIdentifier()
- .getProvider().equalsIgnoreCase("ncbi")
- || tax.getIdentifier().getProvider()
- .equalsIgnoreCase("uniprot") || tax.getIdentifier()
- .getProvider().equalsIgnoreCase("uniprotkb"))));
- }
+ synchronized private static void inferTaxonomyFromDescendents( final PhylogenyNode n,
+ final SortedSet<String> not_found )
+ throws IOException {
+ if ( n.isExternal() ) {
+ throw new IllegalArgumentException( "attempt to infer taxonomy from descendants of external node" );
+ }
+ n.getNodeData().setTaxonomy( null );
+ final List<PhylogenyNode> descs = n.getDescendants();
+ final List<String[]> lineages = new ArrayList<String[]>();
+ int shortest_lin_length = Integer.MAX_VALUE;
+ for( final PhylogenyNode desc : descs ) {
+ if ( desc.getNodeData().isHasTaxonomy()
+ && ( isHasAppropriateId( desc.getNodeData().getTaxonomy() )
+ || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() )
+ || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil
+ .isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) {
+
+ final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), null, null );
+ String[] lineage = null;
+ if ( up_tax != null ) {
+ //lineage = obtainLineagePlusOwnScientificName( up_tax );
+ lineage = up_tax.getLineageAsArray();
+ }
+ if ( ( lineage == null ) || ( lineage.length < 1 ) ) {
+ //TODO remove me
+ System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" );
+ not_found.add( desc.getNodeData().getTaxonomy().asText().toString() );
+ return;
+ }
+ if ( lineage.length < shortest_lin_length ) {
+ shortest_lin_length = lineage.length;
+ }
+ lineages.add( lineage );
+ }
+ else {
+ String msg = "Node(s) with no or inappropriate taxonomic information found";
+ String node = "";
+ if ( !ForesterUtil.isEmpty( desc.getName() ) ) {
+ node = "\"" + desc.getName() + "\"";
+ }
+ else {
+ node = "[" + desc.getId() + "]";
+ }
+ msg = "Node " + node + " has no or inappropriate taxonomic information";
+ // final List<PhylogenyNode> e = desc.getAllExternalDescendants();
+ //TODO remove me!
+// System.out.println();
+// int x = 0;
+// for( final PhylogenyNode object : e ) {
+// System.out.println( x + ":" );
+// System.out.println( object.getName() + " " );
+// x++;
+// }
+// System.out.println();
+ //
+ throw new IllegalArgumentException( msg );
+ }
+ }
+ String last_common_lineage = null;
+ if ( shortest_lin_length > 0 ) {
+ I: for( int i = 0; i < shortest_lin_length; ++i ) {
+ final String lineage_0 = lineages.get( 0 )[ i ];
+ for( int j = 1; j < lineages.size(); ++j ) {
+ if ( !lineage_0.equals( lineages.get( j )[ i ] ) ) {
+ break I;
+ }
+ }
+ last_common_lineage = lineage_0;
+ }
+ }
+ if ( last_common_lineage == null ) {
+ System.out.println( "No common lineage for:" );
+ int counter = 0;
+ for( final String[] strings : lineages ) {
+ System.out.print( counter + ": " );
+ ++counter;
+ for( final String string : strings ) {
+ System.out.print( string + " " );
+ }
+ System.out.println();
+ }
+ return;
+ }
+ final Taxonomy tax = new Taxonomy();
+ n.getNodeData().setTaxonomy( tax );
+ tax.setScientificName( last_common_lineage );
+ final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage, lineage );
+ if ( up_tax != null ) {
+ if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) {
+ try {
+ tax.setRank( up_tax.getRank().toLowerCase() );
+ }
+ catch ( final PhyloXmlDataFormatException ex ) {
+ tax.setRank( "" );
+ }
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
+ tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
+ tax.setCommonName( up_tax.getCommonName() );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
+ tax.getSynonyms().add( up_tax.getSynonym() );
+ }
+ if ( up_tax.getLineage() != null ) {
+ tax.setLineage( new ArrayList<String>() );
+ for( final String lin : up_tax.getLineage() ) {
+ if ( !ForesterUtil.isEmpty( lin ) ) {
+ tax.getLineage().add( lin );
+ }
+ }
+ }
+
+ }
+ for( final PhylogenyNode desc : descs ) {
+ if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy()
+ && desc.getNodeData().getTaxonomy().isEqual( tax ) ) {
+ desc.getNodeData().setTaxonomy( null );
+ }
+ }
+ }
- synchronized public static SortedSet<String> obtainDetailedTaxonomicInformation(
- final Phylogeny phy) throws IOException {
- clearCachesIfTooLarge();
- final SortedSet<String> not_found = new TreeSet<String>();
- for (final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter
- .hasNext();) {
- final PhylogenyNode node = iter.next();
- final QUERY_TYPE qt = null;
- Taxonomy tax = null;
- if (node.getNodeData().isHasTaxonomy()) {
- tax = node.getNodeData().getTaxonomy();
- } else if (node.isExternal()) {
- if (!ForesterUtil.isEmpty(node.getName())) {
- not_found.add(node.getName());
- } else {
- not_found.add(node.toString());
- }
- }
- UniProtTaxonomy up_tax = null;
- if ((tax != null)
- && (isHasAppropriateId(tax)
- || !ForesterUtil.isEmpty(tax.getScientificName())
- || !ForesterUtil.isEmpty(tax.getTaxonomyCode()) || !ForesterUtil
- .isEmpty(tax.getCommonName()))) {
- up_tax = obtainUniProtTaxonomy(tax, null, qt);
- if (up_tax != null) {
- updateTaxonomy(qt, node, tax, up_tax);
- } else {
- not_found.add(tax.toString());
- }
- }
- }
- return not_found;
- }
+ synchronized private static boolean isHasAppropriateId( final Taxonomy tax ) {
+ return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax
+ .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" )
+ || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider()
+ .equalsIgnoreCase( "uniprotkb" ) ) ) );
+ }
- synchronized private static String[] obtainLineagePlusOwnScientificName(
- final UniProtTaxonomy up_tax) {
- final String[] lineage = up_tax.getLineage();
- final String[] lin_plus_self = new String[lineage.length + 1];
- for (int i = 0; i < lineage.length; ++i) {
- lin_plus_self[i] = lineage[i];
- }
- lin_plus_self[lineage.length] = up_tax.getScientificName();
- return lin_plus_self;
- }
+ synchronized public static SortedSet<String> obtainDetailedTaxonomicInformation( final Phylogeny phy,
+ final boolean delete )
+ throws IOException {
+ clearCachesIfTooLarge();
+ final SortedSet<String> not_found = new TreeSet<String>();
+ List<PhylogenyNode> not_found_external_nodes = null;
+ if ( delete ) {
+ not_found_external_nodes = new ArrayList<PhylogenyNode>();
+ }
+ for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ final QUERY_TYPE qt = null;
+ Taxonomy tax = null;
+ if ( node.getNodeData().isHasTaxonomy() ) {
+ tax = node.getNodeData().getTaxonomy();
+ }
+ else if ( node.isExternal() ) {
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ not_found.add( node.getName() );
+ }
+ else {
+ not_found.add( node.toString() );
+ }
+ if ( delete ) {
+ not_found_external_nodes.add( node );
+ }
+ }
+ UniProtTaxonomy uniprot_tax = null;
+ if ( ( tax != null )
+ && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() )
+ || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax
+ .getCommonName() ) ) ) {
+ uniprot_tax = obtainUniProtTaxonomy( tax, null, qt );
+ if ( uniprot_tax != null ) {
+ updateTaxonomy( qt, node, tax, uniprot_tax );
+ }
+ else {
+ not_found.add( tax.toString() );
+ if ( delete && node.isExternal() ) {
+ not_found_external_nodes.add( node );
+ }
+ }
+ }
+ }
+ if ( delete ) {
+ for( final PhylogenyNode node : not_found_external_nodes ) {
+ phy.deleteSubtree( node, true );
+ }
+ phy.externalNodesHaveChanged();
+ phy.hashIDs();
+ phy.recalculateNumberOfExternalDescendants( true );
+ }
+ return not_found;
+ }
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomy(
- final Taxonomy tax, String query, QUERY_TYPE qt) throws IOException {
- if (isHasAppropriateId(tax)) {
- query = tax.getIdentifier().getValue();
- qt = QUERY_TYPE.ID;
- return getTaxonomies(getIdTaxCacheMap(), query, qt);
- } else if (!ForesterUtil.isEmpty(tax.getScientificName())) {
- query = tax.getScientificName();
- qt = QUERY_TYPE.SN;
- return getTaxonomies(getSnTaxCacheMap(), query, qt);
- } else if (!ForesterUtil.isEmpty(tax.getTaxonomyCode())) {
- query = tax.getTaxonomyCode();
- qt = QUERY_TYPE.CODE;
- return getTaxonomies(getCodeTaxCacheMap(), query, qt);
- } else {
- query = tax.getCommonName();
- qt = QUERY_TYPE.CN;
- return getTaxonomies(getCnTaxCacheMap(), query, qt);
- }
- }
+ // TODO this might not be needed anymore
+ // synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) {
+ // final String[] lineage = up_tax.getLineageAsArray();
+ // final String[] lin_plus_self = new String[ lineage.length + 1 ];
+ // for( int i = 0; i < lineage.length; ++i ) {
+ // lin_plus_self[ i ] = lineage[ i ];
+ // }
+ // lin_plus_self[ lineage.length ] = up_tax.getScientificName();
+ // return lin_plus_self;
+ // }
+ synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt )
+ throws IOException {
+ if ( isHasAppropriateId( tax ) ) {
+ query = tax.getIdentifier().getValue();
+ qt = QUERY_TYPE.ID;
+ System.out.println( "query by id: " + query);
+ return getTaxonomies( getIdTaxCacheMap(), query, qt );
+ }
+ else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+ query = tax.getScientificName();
+ qt = QUERY_TYPE.SN;
+ System.out.println( "query by sn: " + query);
+ return getTaxonomies( getSnTaxCacheMap(), query, qt );
+ }
+ else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+ query = tax.getTaxonomyCode();
+ qt = QUERY_TYPE.CODE;
+ return getTaxonomies( getCodeTaxCacheMap(), query, qt );
+ }
+ else {
+ query = tax.getCommonName();
+ qt = QUERY_TYPE.CN;
+ return getTaxonomies( getCnTaxCacheMap(), query, qt );
+ }
+ }
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn(
- final String sn) throws IOException {
- UniProtTaxonomy up_tax = null;
- if (getSnTaxCacheMap().containsKey(sn)) {
- up_tax = getSnTaxCacheMap().get(sn).copy();
- } else {
- final List<UniProtTaxonomy> up_taxonomies = getTaxonomiesFromScientificName(sn);
- if ((up_taxonomies != null) && (up_taxonomies.size() == 1)) {
- up_tax = up_taxonomies.get(0);
- getSnTaxCacheMap().put(sn, up_tax);
- if (!ForesterUtil.isEmpty(up_tax.getCode())) {
- getCodeTaxCacheMap().put(up_tax.getCode(), up_tax);
- }
- if (!ForesterUtil.isEmpty(up_tax.getCommonName())) {
- getCnTaxCacheMap().put(up_tax.getCommonName(), up_tax);
- }
- if (!ForesterUtil.isEmpty(up_tax.getId())) {
- getIdTaxCacheMap().put(up_tax.getId(), up_tax);
- }
- }
- }
- return up_tax;
- }
+ synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn, List<String> lineage ) throws IOException {
+ UniProtTaxonomy up_tax = null;
+ if ( getSnTaxCacheMap().containsKey( sn ) ) {
+ up_tax = getSnTaxCacheMap().get( sn ).copy();
+ }
+ else {
+ final List<UniProtTaxonomy> up_taxonomies = getTaxonomiesFromScientificName( sn );
+ if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) {
+ up_tax = up_taxonomies.get( 0 );
+ getSnTaxCacheMap().put( sn, up_tax );
+ if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) {
+ getCodeTaxCacheMap().put( up_tax.getCode(), up_tax );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
+ getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
+ getIdTaxCacheMap().put( up_tax.getId(), up_tax );
+ }
+
+ }
+ }
+ return up_tax;
+ }
- synchronized private static void updateTaxonomy(final QUERY_TYPE qt,
- final PhylogenyNode node, final Taxonomy tax,
- final UniProtTaxonomy up_tax) {
- if ((qt != QUERY_TYPE.SN)
- && !ForesterUtil.isEmpty(up_tax.getScientificName())
- && ForesterUtil.isEmpty(tax.getScientificName())) {
- tax.setScientificName(up_tax.getScientificName());
- }
- if (node.isExternal()
- && ((qt != QUERY_TYPE.CODE)
- && !ForesterUtil.isEmpty(up_tax.getCode()) && ForesterUtil
- .isEmpty(tax.getTaxonomyCode()))) {
- tax.setTaxonomyCode(up_tax.getCode());
- }
- if ((qt != QUERY_TYPE.CN)
- && !ForesterUtil.isEmpty(up_tax.getCommonName())
- && ForesterUtil.isEmpty(tax.getCommonName())) {
- tax.setCommonName(up_tax.getCommonName());
- }
- if (!ForesterUtil.isEmpty(up_tax.getSynonym())
- && !tax.getSynonyms().contains(up_tax.getSynonym())) {
- tax.getSynonyms().add(up_tax.getSynonym());
- }
- if (!ForesterUtil.isEmpty(up_tax.getRank())
- && ForesterUtil.isEmpty(tax.getRank())) {
- try {
- tax.setRank(up_tax.getRank().toLowerCase());
- } catch (final PhyloXmlDataFormatException ex) {
- tax.setRank("");
- }
- }
- if ((qt != QUERY_TYPE.ID) && !ForesterUtil.isEmpty(up_tax.getId())
- && (tax.getIdentifier() == null)) {
- tax.setIdentifier(new Identifier(up_tax.getId(), "uniprot"));
- }
- }
+ synchronized private static void updateTaxonomy( final QUERY_TYPE qt,
+ final PhylogenyNode node,
+ final Taxonomy tax,
+ final UniProtTaxonomy up_tax ) {
+ if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() )
+ && ForesterUtil.isEmpty( tax.getScientificName() ) ) {
+ tax.setScientificName( up_tax.getScientificName() );
+ }
+ // if ( node.isExternal()
+ if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
+ && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
+ tax.setTaxonomyCode( up_tax.getCode() );
+ }
+ if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() )
+ && ForesterUtil.isEmpty( tax.getCommonName() ) ) {
+ tax.setCommonName( up_tax.getCommonName() );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
+ tax.getSynonyms().add( up_tax.getSynonym() );
+ }
+ if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) {
+ try {
+ tax.setRank( up_tax.getRank().toLowerCase() );
+ }
+ catch ( final PhyloXmlDataFormatException ex ) {
+ tax.setRank( "" );
+ }
+ }
+ if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( tax.getIdentifier() == null ) ) {
+ tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
+ }
+ if ( up_tax.getLineage() != null ) {
+ tax.setLineage( new ArrayList<String>() );
+ for( final String lin : up_tax.getLineage() ) {
+ if ( !ForesterUtil.isEmpty( lin ) ) {
+ tax.getLineage().add( lin );
+ }
+ }
+ }
+
+ }
- private enum QUERY_TYPE {
- CODE, SN, CN, ID;
- }
+ private enum QUERY_TYPE {
+ CODE, SN, CN, ID;
+ }
}