- }
-
- synchronized private static boolean isHasAppropriateId( final Taxonomy tax ) {
- return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax
- .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" )
- || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider()
- .equalsIgnoreCase( "uniprotkb" ) ) ) );
- }
-
- synchronized public static SortedSet<String> obtainDetailedTaxonomicInformation( final Phylogeny phy,
- final boolean delete )
- throws IOException {
- clearCachesIfTooLarge();
- final SortedSet<String> not_found = new TreeSet<String>();
- List<PhylogenyNode> not_found_external_nodes = null;
- if ( delete ) {
- not_found_external_nodes = new ArrayList<PhylogenyNode>();
- }
- for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
- final PhylogenyNode node = iter.next();
- final QUERY_TYPE qt = null;
- Taxonomy tax = null;
- if ( node.getNodeData().isHasTaxonomy() ) {
- tax = node.getNodeData().getTaxonomy();
- }
- else if ( node.isExternal() ) {
- if ( !ForesterUtil.isEmpty( node.getName() ) ) {
- not_found.add( node.getName() );
- }
- else {
- not_found.add( node.toString() );
- }
- if ( delete ) {
- not_found_external_nodes.add( node );
- }
- }
- UniProtTaxonomy uniprot_tax = null;
- if ( ( tax != null )
- && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() )
- || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax
- .getCommonName() ) ) ) {
- uniprot_tax = obtainUniProtTaxonomy( tax, null, qt );
- if ( uniprot_tax != null ) {
- updateTaxonomy( qt, node, tax, uniprot_tax );
- }
- else {
- not_found.add( tax.toString() );
- if ( delete && node.isExternal() ) {
- not_found_external_nodes.add( node );
- }
- }
- }
- }
- if ( delete ) {
- for( final PhylogenyNode node : not_found_external_nodes ) {
- phy.deleteSubtree( node, true );
- }
- phy.externalNodesHaveChanged();
- phy.hashIDs();
- phy.recalculateNumberOfExternalDescendants( true );
- }
- return not_found;
- }
-
- // TODO this might not be needed anymore
- // synchronized private static String[] obtainLineagePlusOwnScientificName( final UniProtTaxonomy up_tax ) {
- // final String[] lineage = up_tax.getLineageAsArray();
- // final String[] lin_plus_self = new String[ lineage.length + 1 ];
- // for( int i = 0; i < lineage.length; ++i ) {
- // lin_plus_self[ i ] = lineage[ i ];
- // }
- // lin_plus_self[ lineage.length ] = up_tax.getScientificName();
- // return lin_plus_self;
- // }
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, String query, QUERY_TYPE qt )
- throws IOException {
- if ( isHasAppropriateId( tax ) ) {
- query = tax.getIdentifier().getValue();
- qt = QUERY_TYPE.ID;
- System.out.println( "query by id: " + query);
- return getTaxonomies( getIdTaxCacheMap(), query, qt );
- }
- else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
- query = tax.getScientificName();
- qt = QUERY_TYPE.SN;
- System.out.println( "query by sn: " + query);
- return getTaxonomies( getSnTaxCacheMap(), query, qt );
- }
- else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
- query = tax.getTaxonomyCode();
- qt = QUERY_TYPE.CODE;
- return getTaxonomies( getCodeTaxCacheMap(), query, qt );
- }
- else {
- query = tax.getCommonName();
- qt = QUERY_TYPE.CN;
- return getTaxonomies( getCnTaxCacheMap(), query, qt );
- }
- }
-
- synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn, List<String> lineage ) throws IOException {
- UniProtTaxonomy up_tax = null;
- if ( getSnTaxCacheMap().containsKey( sn ) ) {
- up_tax = getSnTaxCacheMap().get( sn ).copy();
- }
- else {
- final List<UniProtTaxonomy> up_taxonomies = getTaxonomiesFromScientificName( sn );
- if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) {
- up_tax = up_taxonomies.get( 0 );
- getSnTaxCacheMap().put( sn, up_tax );
- if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) {
- getCodeTaxCacheMap().put( up_tax.getCode(), up_tax );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) {
- getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) {
- getIdTaxCacheMap().put( up_tax.getId(), up_tax );
- }
-
- }
- }
- return up_tax;
- }
-
- synchronized private static void updateTaxonomy( final QUERY_TYPE qt,
- final PhylogenyNode node,
- final Taxonomy tax,
- final UniProtTaxonomy up_tax ) {
- if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() )
- && ForesterUtil.isEmpty( tax.getScientificName() ) ) {
- tax.setScientificName( up_tax.getScientificName() );
- }
- // if ( node.isExternal()
- if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
- && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
- tax.setTaxonomyCode( up_tax.getCode() );
- }
- if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() )
- && ForesterUtil.isEmpty( tax.getCommonName() ) ) {
- tax.setCommonName( up_tax.getCommonName() );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
- tax.getSynonyms().add( up_tax.getSynonym() );
- }
- if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) {
- try {
- tax.setRank( up_tax.getRank().toLowerCase() );
- }
- catch ( final PhyloXmlDataFormatException ex ) {
- tax.setRank( "" );
- }
- }
- if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) && ( tax.getIdentifier() == null ) ) {
- tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
- }
- if ( up_tax.getLineage() != null ) {