-
- synchronized private static boolean isHasAppropriateId( final Taxonomy tax ) {
- return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax
- .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" )
- || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider()
- .equalsIgnoreCase( "uniprotkb" ) ) ) );
- }
-
- synchronized public static SortedSet<String> obtainDetailedTaxonomicInformation( final Phylogeny phy,
- final boolean delete )
- throws IOException, AncestralTaxonomyInferenceException {
- clearCachesIfTooLarge();
- final SortedSet<String> not_found = new TreeSet<String>();
- List<PhylogenyNode> not_found_external_nodes = null;
- if ( delete ) {
- not_found_external_nodes = new ArrayList<PhylogenyNode>();
- }
- for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- final QUERY_TYPE qt = null;
- Taxonomy tax = null;
- if ( node.getNodeData().isHasTaxonomy() ) {
- tax = node.getNodeData().getTaxonomy();
- }
- else if ( node.isExternal() ) {
- if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- not_found.add( node.getName() );
- }
- else {
- not_found.add( node.toString() );
- }
- if ( delete ) {
- not_found_external_nodes.add( node );
- }
- }
- UniProtTaxonomy uniprot_tax = null;
- if ( ( tax != null )
- && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() )
- || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax
- .getCommonName() ) ) ) {
- uniprot_tax = obtainUniProtTaxonomy( tax, null, qt );
- if ( uniprot_tax != null ) {
- updateTaxonomy( qt, node, tax, uniprot_tax );
- }
- else {
- not_found.add( tax.toString() );
- if ( delete && node.isExternal() ) {
- not_found_external_nodes.add( node );
- }
- }
- }
- }
- if ( delete ) {
- for( final PhylogenyNode node : not_found_external_nodes ) {
- phy.deleteSubtree( node, true );
- }
- phy.externalNodesHaveChanged();
- phy.hashIDs();
- phy.recalculateNumberOfExternalDescendants( true );
- }
- return not_found;
- }
-
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, Object query, QUERY_TYPE qt )
- throws IOException, AncestralTaxonomyInferenceException {
- if ( isHasAppropriateId( tax ) ) {
- query = tax.getIdentifier().getValue();
- qt = QUERY_TYPE.ID;
- return getTaxonomies( getIdTaxCacheMap(), query, qt );
- }
- else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
- if ( !ForesterUtil.isEmpty( tax.getLineage() ) ) {
- query = tax.getLineage();
- qt = QUERY_TYPE.LIN;
- return getTaxonomies( getLineageTaxCacheMap(), query, qt );
- }
- else {
- query = tax.getScientificName();
- qt = QUERY_TYPE.SN;
- return getTaxonomies( getSnTaxCacheMap(), query, qt );
- }
- }
- else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
- query = tax.getTaxonomyCode();
- qt = QUERY_TYPE.CODE;
- return getTaxonomies( getCodeTaxCacheMap(), query, qt );
- }
- else {
- query = tax.getCommonName();
- qt = QUERY_TYPE.CN;
- return getTaxonomies( getCnTaxCacheMap(), query, qt );
- }
- }
-
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromLineage( final List<String> lineage )
- throws AncestralTaxonomyInferenceException, IOException {
- final String lineage_str = ForesterUtil.stringListToString( lineage, ">" );
- UniProtTaxonomy up_tax = null;
- if ( getLineageTaxCacheMap().containsKey( lineage_str ) ) {
- up_tax = getLineageTaxCacheMap().get( lineage_str ).copy();
- }
- else {
- final List<UniProtTaxonomy> up_taxonomies = getTaxonomiesFromScientificName( lineage
- .get( lineage.size() - 1 ) );
- if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) {
- for( final UniProtTaxonomy up_taxonomy : up_taxonomies ) {
- boolean match = true;
- I: for( int i = 0; i < lineage.size(); ++i ) {
- if ( !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) {
- match = false;
- break I;
- }
- }
- if ( match ) {
- if ( up_tax != null ) {
- throw new AncestralTaxonomyInferenceException( "lineage \""
- + ForesterUtil.stringListToString( lineage, " > " ) + "\" is not unique" );
- }
- up_tax = up_taxonomy;
- }
- }
- if ( up_tax == null ) {
- throw new AncestralTaxonomyInferenceException( "lineage \""
- + ForesterUtil.stringListToString( lineage, " > " ) + "\" not found" );
- }
- getLineageTaxCacheMap().put( lineage_str, up_tax );
- if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) {
- getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) {
- getCodeTaxCacheMap().put( up_tax.getCode(), up_tax );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
- getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
- getIdTaxCacheMap().put( up_tax.getId(), up_tax );
- }
- }
- }
- return up_tax;
- }
-
- synchronized private static void updateTaxonomy( final QUERY_TYPE qt,
- final PhylogenyNode node,
- final Taxonomy tax,
- final UniProtTaxonomy up_tax ) {
- if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() )
- && ForesterUtil.isEmpty( tax.getScientificName() ) ) {
- tax.setScientificName( up_tax.getScientificName() );
- }
- if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
- && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
- tax.setTaxonomyCode( up_tax.getCode() );
- }
- if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() )
- && ForesterUtil.isEmpty( tax.getCommonName() ) ) {
- tax.setCommonName( up_tax.getCommonName() );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
- tax.getSynonyms().add( up_tax.getSynonym() );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) {
- try {
- tax.setRank( up_tax.getRank().toLowerCase() );
- }
- catch ( final PhyloXmlDataFormatException ex ) {
- tax.setRank( "" );
- }
- }
- if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( tax.getIdentifier() == null ) ) {
- tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
- }
- if ( up_tax.getLineage() != null ) {
- tax.setLineage( new ArrayList<String>() );
- for( final String lin : up_tax.getLineage() ) {
- if ( !ForesterUtil.isEmpty( lin ) ) {
- tax.getLineage().add( lin );
- }
- }
- }
- }
-
- private enum QUERY_TYPE {
- CODE, SN, CN, ID, LIN;
- }