X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fanalysis%2FAncestralTaxonomyInference.java;h=dec0a635a22def5b0097f958aaca0c0fc2fe536e;hb=c365c2e336ee79677d9e0f5d5c8d280afb56a3ab;hp=b9e6b464d3c18a01838c5f0ee7c021461dfddc0c;hpb=2343f209dd616cf44434e3750dfcd4e334446d46;p=jalview.git diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index b9e6b46..dec0a63 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -4,7 +4,7 @@ // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -14,7 +14,7 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -43,380 +43,410 @@ import org.forester.ws.uniprot.UniProtWsTools; public final class AncestralTaxonomyInference { - private static final int MAX_CACHE_SIZE = 100000; - private static final int MAX_TAXONOMIES_TO_RETURN = 100; - private static final HashMap _sn_up_cache_map = new HashMap(); - private static final HashMap _code_up_cache_map = new HashMap(); - private static final HashMap _cn_up_cache_map = new HashMap(); - private static final HashMap _id_up_cache_map = new HashMap(); - - synchronized private static void clearCachesIfTooLarge() { - if (getSnTaxCacheMap().size() > MAX_CACHE_SIZE) { - getSnTaxCacheMap().clear(); - } - if (getCnTaxCacheMap().size() > MAX_CACHE_SIZE) { - getCnTaxCacheMap().clear(); - } - if (getCodeTaxCacheMap().size() > MAX_CACHE_SIZE) { - getCodeTaxCacheMap().clear(); - } - if (getIdTaxCacheMap().size() > MAX_CACHE_SIZE) { - getIdTaxCacheMap().clear(); - } - } + private static final int MAX_CACHE_SIZE = 100000; + private static final int MAX_TAXONOMIES_TO_RETURN = 100; + private static final HashMap _sn_up_cache_map = new HashMap(); + private static final HashMap _code_up_cache_map = new HashMap(); + private static final HashMap _cn_up_cache_map = new HashMap(); + private static final HashMap _id_up_cache_map = new HashMap(); - synchronized private static HashMap getCnTaxCacheMap() { - return _cn_up_cache_map; - } + synchronized private static void clearCachesIfTooLarge() { + if ( getSnTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getSnTaxCacheMap().clear(); + } + if ( getCnTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getCnTaxCacheMap().clear(); + } + if ( getCodeTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getCodeTaxCacheMap().clear(); + } + if ( getIdTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getIdTaxCacheMap().clear(); + } + } - synchronized private static HashMap getCodeTaxCacheMap() { - return _code_up_cache_map; - } + synchronized private static HashMap getCnTaxCacheMap() { + return _cn_up_cache_map; + } - synchronized private static HashMap getIdTaxCacheMap() { - return _id_up_cache_map; - } + synchronized private static HashMap getCodeTaxCacheMap() { + return _code_up_cache_map; + } - synchronized private static HashMap getSnTaxCacheMap() { - return _sn_up_cache_map; - } + synchronized private static HashMap getIdTaxCacheMap() { + return _id_up_cache_map; + } - synchronized private static UniProtTaxonomy getTaxonomies( - final HashMap cache, final String query, - final QUERY_TYPE qt) throws IOException { - if (cache.containsKey(query)) { - return cache.get(query).copy(); - } else { - List up_taxonomies = null; - switch (qt) { - case ID: - up_taxonomies = getTaxonomiesFromId(query); - break; - case CODE: - up_taxonomies = getTaxonomiesFromTaxonomyCode(query); - break; - case SN: - up_taxonomies = getTaxonomiesFromScientificName(query); - break; - case CN: - up_taxonomies = getTaxonomiesFromCommonName(query); - break; - default: - throw new RuntimeException(); - } - if ((up_taxonomies != null) && (up_taxonomies.size() == 1)) { - final UniProtTaxonomy up_tax = up_taxonomies.get(0); - if (!ForesterUtil.isEmpty(up_tax.getScientificName())) { - getSnTaxCacheMap().put(up_tax.getScientificName(), up_tax); - } - if (!ForesterUtil.isEmpty(up_tax.getCode())) { - getCodeTaxCacheMap().put(up_tax.getCode(), up_tax); - } - if (!ForesterUtil.isEmpty(up_tax.getCommonName())) { - getCnTaxCacheMap().put(up_tax.getCommonName(), up_tax); - } - if (!ForesterUtil.isEmpty(up_tax.getId())) { - getIdTaxCacheMap().put(up_tax.getId(), up_tax); - } - return up_tax; - } else { - return null; - } - } - } + synchronized private static HashMap getSnTaxCacheMap() { + return _sn_up_cache_map; + } - synchronized private static List getTaxonomiesFromCommonName( - final String query) throws IOException { - return UniProtWsTools.getTaxonomiesFromCommonNameStrict(query, - MAX_TAXONOMIES_TO_RETURN); - } + synchronized private static UniProtTaxonomy getTaxonomies( final HashMap cache, + final String query, + final QUERY_TYPE qt ) throws IOException { + if ( cache.containsKey( query ) ) { + return cache.get( query ).copy(); + } + else { + List up_taxonomies = null; + switch ( qt ) { + case ID: + up_taxonomies = getTaxonomiesFromId( query ); + break; + case CODE: + up_taxonomies = getTaxonomiesFromTaxonomyCode( query ); + break; + case SN: + up_taxonomies = getTaxonomiesFromScientificName( query ); + break; + case CN: + up_taxonomies = getTaxonomiesFromCommonName( query ); + break; + default: + throw new RuntimeException(); + } + if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { + final UniProtTaxonomy up_tax = up_taxonomies.get( 0 ); + if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) { + getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { + getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { + getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { + getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + } + return up_tax; + } + else { + return null; + } + } + } - synchronized private static List getTaxonomiesFromId( - final String query) throws IOException { - return UniProtWsTools.getTaxonomiesFromId(query, - MAX_TAXONOMIES_TO_RETURN); - } + synchronized private static List getTaxonomiesFromCommonName( final String query ) + throws IOException { + return UniProtWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); + } - synchronized private static List getTaxonomiesFromScientificName( - final String query) throws IOException { - return UniProtWsTools.getTaxonomiesFromScientificNameStrict(query, - MAX_TAXONOMIES_TO_RETURN); - } + synchronized private static List getTaxonomiesFromId( final String query ) throws IOException { + return UniProtWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN ); + } - synchronized private static List getTaxonomiesFromTaxonomyCode( - final String query) throws IOException { - return UniProtWsTools.getTaxonomiesFromTaxonomyCode(query, - MAX_TAXONOMIES_TO_RETURN); - } + synchronized private static List getTaxonomiesFromScientificName( final String query ) + throws IOException { + return UniProtWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); + } - synchronized public static SortedSet inferTaxonomyFromDescendents( - final Phylogeny phy) throws IOException { - clearCachesIfTooLarge(); - final SortedSet not_found = new TreeSet(); - for (final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter - .hasNext();) { - final PhylogenyNode node = iter.next(); - // final QUERY_TYPE qt = null; - // Taxonomy tax = null; - // if ( node.getNodeData().isHasTaxonomy() ) { - // tax = node.getNodeData().getTaxonomy(); - // } - // UniProtTaxonomy up_tax = null; - // if ( ( tax != null ) - // && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( - // tax.getScientificName() ) - // || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || - // !ForesterUtil.isEmpty( tax - // .getCommonName() ) ) ) { - // final String query = null; - // up_tax = obtainUniProtTaxonomy( tax, query, qt ); - // if ( up_tax == null ) { - // not_found.add( query ); - // } - // else { - // updateTaxonomy( qt, node, tax, up_tax ); - // } - // } - if (!node.isExternal()) { - inferTaxonomyFromDescendents(node, not_found); - } - } - return not_found; - } + synchronized private static List getTaxonomiesFromTaxonomyCode( final String query ) + throws IOException { + return UniProtWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN ); + } - synchronized private static void inferTaxonomyFromDescendents( - final PhylogenyNode n, final SortedSet not_found) - throws IOException { - if (n.isExternal()) { - throw new IllegalArgumentException( - "attempt to infer taxonomy from descendants of external node"); - } - n.getNodeData().setTaxonomy(null); - final List descs = n.getDescendants(); - final List lineages = new ArrayList(); - int shortest_lin_length = Integer.MAX_VALUE; - for (final PhylogenyNode desc : descs) { - if (desc.getNodeData().isHasTaxonomy() - && (isHasAppropriateId(desc.getNodeData().getTaxonomy()) - || !ForesterUtil.isEmpty(desc.getNodeData() - .getTaxonomy().getScientificName()) - || !ForesterUtil.isEmpty(desc.getNodeData() - .getTaxonomy().getTaxonomyCode()) || !ForesterUtil - .isEmpty(desc.getNodeData().getTaxonomy() - .getCommonName()))) { - final QUERY_TYPE qt = null; - final String query = null; - final UniProtTaxonomy up_tax = obtainUniProtTaxonomy(desc - .getNodeData().getTaxonomy(), query, qt); - String[] lineage = null; - if (up_tax != null) { - lineage = obtainLineagePlusOwnScientificName(up_tax); - } - if ((lineage == null) || (lineage.length < 1)) { - not_found.add(desc.getNodeData().getTaxonomy().asText() - .toString()); - return; - } - if (lineage.length < shortest_lin_length) { - shortest_lin_length = lineage.length; - } - lineages.add(lineage); - } else { - String msg = "Node(s) with no or inappropriate taxonomic information found"; - if (!ForesterUtil.isEmpty(desc.getName())) { - msg = "Node " + desc.getName() - + " has no or inappropriate taxonomic information"; - } - throw new IllegalArgumentException(msg); - } - } - String last_common_lineage = null; - if (shortest_lin_length > 0) { - I: for (int i = 0; i < shortest_lin_length; ++i) { - final String lineage_0 = lineages.get(0)[i]; - for (int j = 1; j < lineages.size(); ++j) { - if (!lineage_0.equals(lineages.get(j)[i])) { - break I; - } - } - last_common_lineage = lineage_0; - } - } - if (last_common_lineage == null) { - return; - } - // if ( !n.getNodeData().isHasTaxonomy() ) { - // n.getNodeData().setTaxonomy( new Taxonomy() ); - // } - final Taxonomy tax = new Taxonomy(); - n.getNodeData().setTaxonomy(tax); - tax.setScientificName(last_common_lineage); - final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn(last_common_lineage); - if (up_tax != null) { - if (!ForesterUtil.isEmpty(up_tax.getRank())) { - try { - tax.setRank(up_tax.getRank().toLowerCase()); - } catch (final PhyloXmlDataFormatException ex) { - tax.setRank(""); - } - } - if (!ForesterUtil.isEmpty(up_tax.getId())) { - tax.setIdentifier(new Identifier(up_tax.getId(), "uniprot")); - } - if (!ForesterUtil.isEmpty(up_tax.getCommonName())) { - tax.setCommonName(up_tax.getCommonName()); - } - if (!ForesterUtil.isEmpty(up_tax.getSynonym()) - && !tax.getSynonyms().contains(up_tax.getSynonym())) { - tax.getSynonyms().add(up_tax.getSynonym()); - } - } - for (final PhylogenyNode desc : descs) { - if (!desc.isExternal() && desc.getNodeData().isHasTaxonomy() - && desc.getNodeData().getTaxonomy().isEqual(tax)) { - desc.getNodeData().setTaxonomy(null); - } - } - } + synchronized public static SortedSet inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException { + clearCachesIfTooLarge(); + final SortedSet not_found = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( !node.isExternal() ) { + inferTaxonomyFromDescendents( node, not_found ); + } + } + return not_found; + } - synchronized private static boolean isHasAppropriateId(final Taxonomy tax) { - return ((tax.getIdentifier() != null) && (!ForesterUtil.isEmpty(tax - .getIdentifier().getValue()) && (tax.getIdentifier() - .getProvider().equalsIgnoreCase("ncbi") - || tax.getIdentifier().getProvider() - .equalsIgnoreCase("uniprot") || tax.getIdentifier() - .getProvider().equalsIgnoreCase("uniprotkb")))); - } + synchronized private static void inferTaxonomyFromDescendents( final PhylogenyNode n, + final SortedSet not_found ) + throws IOException { + if ( n.isExternal() ) { + throw new IllegalArgumentException( "attempt to infer taxonomy from descendants of external node" ); + } + n.getNodeData().setTaxonomy( null ); + final List descs = n.getDescendants(); + final List lineages = new ArrayList(); + int shortest_lin_length = Integer.MAX_VALUE; + for( final PhylogenyNode desc : descs ) { + if ( desc.getNodeData().isHasTaxonomy() + && ( isHasAppropriateId( desc.getNodeData().getTaxonomy() ) + || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() ) + || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil + .isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) { + + final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), null, null ); + String[] lineage = null; + if ( up_tax != null ) { + //lineage = obtainLineagePlusOwnScientificName( up_tax ); + lineage = up_tax.getLineageAsArray(); + } + if ( ( lineage == null ) || ( lineage.length < 1 ) ) { + //TODO remove me + System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" ); + not_found.add( desc.getNodeData().getTaxonomy().asText().toString() ); + return; + } + if ( lineage.length < shortest_lin_length ) { + shortest_lin_length = lineage.length; + } + lineages.add( lineage ); + } + else { + String msg = "Node(s) with no or inappropriate taxonomic information found"; + String node = ""; + if ( !ForesterUtil.isEmpty( desc.getName() ) ) { + node = "\"" + desc.getName() + "\""; + } + else { + node = "[" + desc.getId() + "]"; + } + msg = "Node " + node + " has no or inappropriate taxonomic information"; + // final List e = desc.getAllExternalDescendants(); + //TODO remove me! +// System.out.println(); +// int x = 0; +// for( final PhylogenyNode object : e ) { +// System.out.println( x + ":" ); +// System.out.println( object.getName() + " " ); +// x++; +// } +// System.out.println(); + // + throw new IllegalArgumentException( msg ); + } + } + String last_common_lineage = null; + if ( shortest_lin_length > 0 ) { + I: for( int i = 0; i < shortest_lin_length; ++i ) { + final String lineage_0 = lineages.get( 0 )[ i ]; + for( int j = 1; j < lineages.size(); ++j ) { + if ( !lineage_0.equals( lineages.get( j )[ i ] ) ) { + break I; + } + } + last_common_lineage = lineage_0; + } + } + if ( last_common_lineage == null ) { + System.out.println( "No common lineage for:" ); + int counter = 0; + for( final String[] strings : lineages ) { + System.out.print( counter + ": " ); + ++counter; + for( final String string : strings ) { + System.out.print( string + " " ); + } + System.out.println(); + } + return; + } + final Taxonomy tax = new Taxonomy(); + n.getNodeData().setTaxonomy( tax ); + tax.setScientificName( last_common_lineage ); + final UniProtTaxonomy up_tax = obtainUniProtTaxonomyFromSn( last_common_lineage, lineage ); + if ( up_tax != null ) { + if ( !ForesterUtil.isEmpty( up_tax.getRank() ) ) { + try { + tax.setRank( up_tax.getRank().toLowerCase() ); + } + catch ( final PhyloXmlDataFormatException ex ) { + tax.setRank( "" ); + } + } + if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { + tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); + } + if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { + tax.setCommonName( up_tax.getCommonName() ); + } + if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { + tax.getSynonyms().add( up_tax.getSynonym() ); + } + if ( up_tax.getLineage() != null ) { + tax.setLineage( new ArrayList() ); + for( final String lin : up_tax.getLineage() ) { + if ( !ForesterUtil.isEmpty( lin ) ) { + tax.getLineage().add( lin ); + } + } + } + + } + for( final PhylogenyNode desc : descs ) { + if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy() + && desc.getNodeData().getTaxonomy().isEqual( tax ) ) { + desc.getNodeData().setTaxonomy( null ); + } + } + } - synchronized public static SortedSet obtainDetailedTaxonomicInformation( - final Phylogeny phy) throws IOException { - clearCachesIfTooLarge(); - final SortedSet not_found = new TreeSet(); - for (final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter - .hasNext();) { - final PhylogenyNode node = iter.next(); - final QUERY_TYPE qt = null; - Taxonomy tax = null; - if (node.getNodeData().isHasTaxonomy()) { - tax = node.getNodeData().getTaxonomy(); - } else if (node.isExternal()) { - if (!ForesterUtil.isEmpty(node.getName())) { - not_found.add(node.getName()); - } else { - not_found.add(node.toString()); - } - } - UniProtTaxonomy up_tax = null; - if ((tax != null) - && (isHasAppropriateId(tax) - || !ForesterUtil.isEmpty(tax.getScientificName()) - || !ForesterUtil.isEmpty(tax.getTaxonomyCode()) || !ForesterUtil - .isEmpty(tax.getCommonName()))) { - up_tax = obtainUniProtTaxonomy(tax, null, qt); - if (up_tax != null) { - updateTaxonomy(qt, node, tax, up_tax); - } else { - not_found.add(tax.toString()); - } - } - } - return not_found; - } + synchronized private static boolean isHasAppropriateId( final Taxonomy tax ) { + return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax + .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" ) + || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider() + .equalsIgnoreCase( "uniprotkb" ) ) ) ); + } - synchronized private static String[] obtainLineagePlusOwnScientificName( - final UniProtTaxonomy up_tax) { - final String[] lineage = up_tax.getLineage(); - final String[] lin_plus_self = new String[lineage.length + 1]; - for (int i = 0; i < lineage.length; ++i) { - lin_plus_self[i] = lineage[i]; - } - lin_plus_self[lineage.length] = up_tax.getScientificName(); - return lin_plus_self; - } + synchronized public static SortedSet obtainDetailedTaxonomicInformation( final Phylogeny phy, + final boolean delete ) + throws IOException { + clearCachesIfTooLarge(); + final SortedSet not_found = new TreeSet(); + List not_found_external_nodes = null; + if ( delete ) { + not_found_external_nodes = new ArrayList(); + } + for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final QUERY_TYPE qt = null; + Taxonomy tax = null; + if ( node.getNodeData().isHasTaxonomy() ) { + tax = node.getNodeData().getTaxonomy(); + } + else if ( node.isExternal() ) { + if ( !ForesterUtil.isEmpty( node.getName() ) ) { + not_found.add( node.getName() ); + } + else { + not_found.add( node.toString() ); + } + if ( delete ) { + not_found_external_nodes.add( node ); + } + } + UniProtTaxonomy uniprot_tax = null; + if ( ( tax != null ) + && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) + || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax + .getCommonName() ) ) ) { + uniprot_tax = obtainUniProtTaxonomy( tax, null, qt ); + if ( uniprot_tax != null ) { + updateTaxonomy( qt, node, tax, uniprot_tax ); + } + else { + not_found.add( tax.toString() ); + if ( delete && node.isExternal() ) { + not_found_external_nodes.add( node ); + } + } + } + } + if ( delete ) { + for( final PhylogenyNode node : not_found_external_nodes ) { + phy.deleteSubtree( node, true ); + } + phy.externalNodesHaveChanged(); + phy.hashIDs(); + phy.recalculateNumberOfExternalDescendants( true ); + } + return not_found; + } - synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( - final Taxonomy tax, String query, QUERY_TYPE qt) throws IOException { - if (isHasAppropriateId(tax)) { - query = tax.getIdentifier().getValue(); - qt = QUERY_TYPE.ID; - return getTaxonomies(getIdTaxCacheMap(), query, qt); - } else if (!ForesterUtil.isEmpty(tax.getScientificName())) { - query = tax.getScientificName(); - qt = QUERY_TYPE.SN; - return getTaxonomies(getSnTaxCacheMap(), query, qt); - } else if (!ForesterUtil.isEmpty(tax.getTaxonomyCode())) { - query = tax.getTaxonomyCode(); - qt = QUERY_TYPE.CODE; - return getTaxonomies(getCodeTaxCacheMap(), query, qt); - } else { - query = tax.getCommonName(); - qt = QUERY_TYPE.CN; - return getTaxonomies(getCnTaxCacheMap(), query, qt); - } - } + // TODO this might not be needed anymore + // synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) { + // final String[] lineage = up_tax.getLineageAsArray(); + // final String[] lin_plus_self = new String[ lineage.length + 1 ]; + // for( int i = 0; i < lineage.length; ++i ) { + // lin_plus_self[ i ] = lineage[ i ]; + // } + // lin_plus_self[ lineage.length ] = up_tax.getScientificName(); + // return lin_plus_self; + // } + synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt ) + throws IOException { + if ( isHasAppropriateId( tax ) ) { + query = tax.getIdentifier().getValue(); + qt = QUERY_TYPE.ID; + System.out.println( "query by id: " + query); + return getTaxonomies( getIdTaxCacheMap(), query, qt ); + } + else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { + query = tax.getScientificName(); + qt = QUERY_TYPE.SN; + System.out.println( "query by sn: " + query); + return getTaxonomies( getSnTaxCacheMap(), query, qt ); + } + else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + query = tax.getTaxonomyCode(); + qt = QUERY_TYPE.CODE; + return getTaxonomies( getCodeTaxCacheMap(), query, qt ); + } + else { + query = tax.getCommonName(); + qt = QUERY_TYPE.CN; + return getTaxonomies( getCnTaxCacheMap(), query, qt ); + } + } - synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( - final String sn) throws IOException { - UniProtTaxonomy up_tax = null; - if (getSnTaxCacheMap().containsKey(sn)) { - up_tax = getSnTaxCacheMap().get(sn).copy(); - } else { - final List up_taxonomies = getTaxonomiesFromScientificName(sn); - if ((up_taxonomies != null) && (up_taxonomies.size() == 1)) { - up_tax = up_taxonomies.get(0); - getSnTaxCacheMap().put(sn, up_tax); - if (!ForesterUtil.isEmpty(up_tax.getCode())) { - getCodeTaxCacheMap().put(up_tax.getCode(), up_tax); - } - if (!ForesterUtil.isEmpty(up_tax.getCommonName())) { - getCnTaxCacheMap().put(up_tax.getCommonName(), up_tax); - } - if (!ForesterUtil.isEmpty(up_tax.getId())) { - getIdTaxCacheMap().put(up_tax.getId(), up_tax); - } - } - } - return up_tax; - } + synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn, List lineage ) throws IOException { + UniProtTaxonomy up_tax = null; + if ( getSnTaxCacheMap().containsKey( sn ) ) { + up_tax = getSnTaxCacheMap().get( sn ).copy(); + } + else { + final List up_taxonomies = getTaxonomiesFromScientificName( sn ); + if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { + up_tax = up_taxonomies.get( 0 ); + getSnTaxCacheMap().put( sn, up_tax ); + if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { + getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { + getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { + getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + } + + } + } + return up_tax; + } - synchronized private static void updateTaxonomy(final QUERY_TYPE qt, - final PhylogenyNode node, final Taxonomy tax, - final UniProtTaxonomy up_tax) { - if ((qt != QUERY_TYPE.SN) - && !ForesterUtil.isEmpty(up_tax.getScientificName()) - && ForesterUtil.isEmpty(tax.getScientificName())) { - tax.setScientificName(up_tax.getScientificName()); - } - if (node.isExternal() - && ((qt != QUERY_TYPE.CODE) - && !ForesterUtil.isEmpty(up_tax.getCode()) && ForesterUtil - .isEmpty(tax.getTaxonomyCode()))) { - tax.setTaxonomyCode(up_tax.getCode()); - } - if ((qt != QUERY_TYPE.CN) - && !ForesterUtil.isEmpty(up_tax.getCommonName()) - && ForesterUtil.isEmpty(tax.getCommonName())) { - tax.setCommonName(up_tax.getCommonName()); - } - if (!ForesterUtil.isEmpty(up_tax.getSynonym()) - && !tax.getSynonyms().contains(up_tax.getSynonym())) { - tax.getSynonyms().add(up_tax.getSynonym()); - } - if (!ForesterUtil.isEmpty(up_tax.getRank()) - && ForesterUtil.isEmpty(tax.getRank())) { - try { - tax.setRank(up_tax.getRank().toLowerCase()); - } catch (final PhyloXmlDataFormatException ex) { - tax.setRank(""); - } - } - if ((qt != QUERY_TYPE.ID) && !ForesterUtil.isEmpty(up_tax.getId()) - && (tax.getIdentifier() == null)) { - tax.setIdentifier(new Identifier(up_tax.getId(), "uniprot")); - } - } + synchronized private static void updateTaxonomy( final QUERY_TYPE qt, + final PhylogenyNode node, + final Taxonomy tax, + final UniProtTaxonomy up_tax ) { + if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() ) + && ForesterUtil.isEmpty( tax.getScientificName() ) ) { + tax.setScientificName( up_tax.getScientificName() ); + } + // if ( node.isExternal() + if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) + && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + tax.setTaxonomyCode( up_tax.getCode() ); + } + if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() ) + && ForesterUtil.isEmpty( tax.getCommonName() ) ) { + tax.setCommonName( up_tax.getCommonName() ); + } + if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { + tax.getSynonyms().add( up_tax.getSynonym() ); + } + if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) { + try { + tax.setRank( up_tax.getRank().toLowerCase() ); + } + catch ( final PhyloXmlDataFormatException ex ) { + tax.setRank( "" ); + } + } + if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( tax.getIdentifier() == null ) ) { + tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); + } + if ( up_tax.getLineage() != null ) { + tax.setLineage( new ArrayList() ); + for( final String lin : up_tax.getLineage() ) { + if ( !ForesterUtil.isEmpty( lin ) ) { + tax.getLineage().add( lin ); + } + } + } + + } - private enum QUERY_TYPE { - CODE, SN, CN, ID; - } + private enum QUERY_TYPE { + CODE, SN, CN, ID; + } }