X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fanalysis%2FTaxonomyDataManager.java;h=1a1bef65828f29b38e50a528834a276c10e5b60e;hb=41ea5973f93687513d29e5b7cad1abff8f3adb4b;hp=15d58a286554db84c604575a2b7cc630ebc6aac7;hpb=1a94ef601e050dcda8d2b5492918f49ea7c430d9;p=jalview.git diff --git a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java index 15d58a2..1a1bef6 100644 --- a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java +++ b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.analysis; @@ -38,6 +38,7 @@ import javax.swing.JOptionPane; import org.forester.archaeopteryx.MainFrameApplication; import org.forester.archaeopteryx.TreePanel; +import org.forester.archaeopteryx.tools.AncestralTaxonomyInferrer; import org.forester.archaeopteryx.tools.RunnableProcess; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; @@ -46,8 +47,8 @@ import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; -import org.forester.ws.uniprot.UniProtTaxonomy; -import org.forester.ws.uniprot.UniProtWsTools; +import org.forester.ws.seqdb.SequenceDbWsTools; +import org.forester.ws.seqdb.UniProtTaxonomy; public final class TaxonomyDataManager extends RunnableProcess { @@ -175,19 +176,19 @@ public final class TaxonomyDataManager extends RunnableProcess { } private final static List getTaxonomiesFromCommonName( final String query ) throws IOException { - return UniProtWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); + return SequenceDbWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } private final static List getTaxonomiesFromId( final String query ) throws IOException { - return UniProtWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN ); + return SequenceDbWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN ); } private final static List getTaxonomiesFromScientificName( final String query ) throws IOException { - return UniProtWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); + return SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } private final static List getTaxonomiesFromTaxonomyCode( final String query ) throws IOException { - return UniProtWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN ); + return SequenceDbWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN ); } static final boolean isHasAppropriateId( final Taxonomy tax ) { @@ -242,6 +243,7 @@ public final class TaxonomyDataManager extends RunnableProcess { if ( tax == null ) { tax = new Taxonomy(); node.getNodeData().addTaxonomy( tax ); + node.setName( "" ); } updateTaxonomy( qt, node, tax, uniprot_tax ); } @@ -250,7 +252,7 @@ public final class TaxonomyDataManager extends RunnableProcess { not_found.add( tax.toString() ); } else { - not_found.add(node.getName() ); + not_found.add( node.getName() ); } if ( delete && node.isExternal() ) { not_found_external_nodes.add( node ); @@ -263,7 +265,7 @@ public final class TaxonomyDataManager extends RunnableProcess { phy.deleteSubtree( node, true ); } phy.externalNodesHaveChanged(); - phy.hashIDs(); + phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); } return not_found; @@ -324,56 +326,71 @@ public final class TaxonomyDataManager extends RunnableProcess { static final UniProtTaxonomy obtainUniProtTaxonomyFromLineage( final List lineage ) throws AncestralTaxonomyInferenceException, IOException { final String lineage_str = ForesterUtil.stringListToString( lineage, ">" ); - UniProtTaxonomy up_tax = null; if ( TaxonomyDataManager.getLineageTaxCacheMap().containsKey( lineage_str ) ) { - up_tax = TaxonomyDataManager.getLineageTaxCacheMap().get( lineage_str ).copy(); + return TaxonomyDataManager.getLineageTaxCacheMap().get( lineage_str ).copy(); } else { + final List matching_taxonomies = new ArrayList(); final List up_taxonomies = getTaxonomiesFromScientificName( lineage .get( lineage.size() - 1 ) ); if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) { for( final UniProtTaxonomy up_taxonomy : up_taxonomies ) { boolean match = true; I: for( int i = 0; i < lineage.size(); ++i ) { - if ( !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { + if ( ( i == up_taxonomy.getLineage().size() ) + || !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { match = false; break I; } } if ( match ) { - if ( up_tax != null ) { - throw new AncestralTaxonomyInferenceException( "lineage \"" - + ForesterUtil.stringListToString( lineage, " > " ) + "\" is not unique" ); - } - up_tax = up_taxonomy; + matching_taxonomies.add( up_taxonomy ); } } - if ( up_tax == null ) { + if ( matching_taxonomies.isEmpty() ) { throw new AncestralTaxonomyInferenceException( "lineage \"" + ForesterUtil.stringListToString( lineage, " > " ) + "\" not found" ); } - TaxonomyDataManager.getLineageTaxCacheMap().put( lineage_str, up_tax ); - if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) { - TaxonomyDataManager.getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); + //in case of more than one (e.g. "Xenopus" Genus and Subgenus), keep shorter, less specific one: + int shortest = Integer.MAX_VALUE; + UniProtTaxonomy least_specific_up_tax = null; + for( final UniProtTaxonomy m : matching_taxonomies ) { + final int s = m.getLineage().size(); + if ( s < shortest ) { + shortest = s; + least_specific_up_tax = m; + } } - if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { - TaxonomyDataManager.getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + TaxonomyDataManager.getLineageTaxCacheMap().put( lineage_str, least_specific_up_tax ); + if ( !ForesterUtil.isEmpty( least_specific_up_tax.getScientificName() ) ) { + TaxonomyDataManager.getSnTaxCacheMap().put( least_specific_up_tax.getScientificName(), + least_specific_up_tax ); } - if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { - TaxonomyDataManager.getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + if ( !ForesterUtil.isEmpty( least_specific_up_tax.getCode() ) ) { + TaxonomyDataManager.getCodeTaxCacheMap().put( least_specific_up_tax.getCode(), + least_specific_up_tax ); } - if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { - TaxonomyDataManager.getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + if ( !ForesterUtil.isEmpty( least_specific_up_tax.getCommonName() ) ) { + TaxonomyDataManager.getCnTaxCacheMap().put( least_specific_up_tax.getCommonName(), + least_specific_up_tax ); } + if ( !ForesterUtil.isEmpty( least_specific_up_tax.getId() ) ) { + TaxonomyDataManager.getIdTaxCacheMap().put( least_specific_up_tax.getId(), least_specific_up_tax ); + } + return least_specific_up_tax; + } + else { + throw new AncestralTaxonomyInferenceException( "taxonomy \"" + ( lineage.get( lineage.size() - 1 ) ) + + "\" not found" ); } } - return up_tax; } synchronized final private static void updateTaxonomy( final QUERY_TYPE qt, final PhylogenyNode node, final Taxonomy tax, - final UniProtTaxonomy up_tax ) { + final UniProtTaxonomy up_tax ) + throws PhyloXmlDataFormatException { if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() ) && ForesterUtil.isEmpty( tax.getScientificName() ) ) { tax.setScientificName( up_tax.getScientificName() ); @@ -520,7 +537,7 @@ public final class TaxonomyDataManager extends RunnableProcess { } private final String getBaseUrl() { - return UniProtWsTools.BASE_URL; + return AncestralTaxonomyInferrer.getBaseUrl(); } @Override