From: cmzmasek Date: Mon, 4 Jun 2012 14:10:55 +0000 (+0000) Subject: in progress (broken) X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=9c419241040e70c793e6d3b2f18a0f93279e367d;p=jalview.git in progress (broken) --- diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index 73af2c1..2166110 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -28,9 +28,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import java.util.SortedSet; -import java.util.TreeSet; +import org.forester.analysis.TaxonomyDataObtainer.QUERY_TYPE; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -43,53 +42,9 @@ import org.forester.ws.uniprot.UniProtWsTools; public final class AncestralTaxonomyInference { - private static final int MAX_CACHE_SIZE = 100000; private static final int MAX_TAXONOMIES_TO_RETURN = 10; - private static final HashMap _sn_up_cache_map = new HashMap(); - private static final HashMap _lineage_up_cache_map = new HashMap(); - private static final HashMap _code_up_cache_map = new HashMap(); - private static final HashMap _cn_up_cache_map = new HashMap(); - private static final HashMap _id_up_cache_map = new HashMap(); - - synchronized private static void clearCachesIfTooLarge() { - if ( getSnTaxCacheMap().size() > MAX_CACHE_SIZE ) { - getSnTaxCacheMap().clear(); - } - if ( getLineageTaxCacheMap().size() > MAX_CACHE_SIZE ) { - getLineageTaxCacheMap().clear(); - } - if ( getCnTaxCacheMap().size() > MAX_CACHE_SIZE ) { - getCnTaxCacheMap().clear(); - } - if ( getCodeTaxCacheMap().size() > MAX_CACHE_SIZE ) { - getCodeTaxCacheMap().clear(); - } - if ( getIdTaxCacheMap().size() > MAX_CACHE_SIZE ) { - getIdTaxCacheMap().clear(); - } - } - - synchronized private static HashMap getCnTaxCacheMap() { - return _cn_up_cache_map; - } - - synchronized private static HashMap getCodeTaxCacheMap() { - return _code_up_cache_map; - } - - synchronized private static HashMap getIdTaxCacheMap() { - return _id_up_cache_map; - } - - synchronized private static HashMap getSnTaxCacheMap() { - return _sn_up_cache_map; - } - - synchronized private static HashMap getLineageTaxCacheMap() { - return _lineage_up_cache_map; - } - - synchronized private static UniProtTaxonomy getTaxonomies( final HashMap cache, + + private static UniProtTaxonomy getTaxonomies( final HashMap cache, final Object query, final QUERY_TYPE qt ) throws IOException, AncestralTaxonomyInferenceException { @@ -119,16 +74,16 @@ public final class AncestralTaxonomyInference { if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { final UniProtTaxonomy up_tax = up_taxonomies.get( 0 ); if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) { - getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); + TaxonomyDataObtainer.getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { - getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + TaxonomyDataObtainer.getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { - getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + TaxonomyDataObtainer.getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { - getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + TaxonomyDataObtainer.getIdTaxCacheMap().put( up_tax.getId(), up_tax ); } return up_tax; } @@ -138,28 +93,28 @@ public final class AncestralTaxonomyInference { } } - synchronized private static List getTaxonomiesFromCommonName( final String query ) + private static List getTaxonomiesFromCommonName( final String query ) throws IOException { return UniProtWsTools.getTaxonomiesFromCommonNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } - synchronized private static List getTaxonomiesFromId( final String query ) throws IOException { + private static List getTaxonomiesFromId( final String query ) throws IOException { return UniProtWsTools.getTaxonomiesFromId( query, MAX_TAXONOMIES_TO_RETURN ); } - synchronized private static List getTaxonomiesFromScientificName( final String query ) + private static List getTaxonomiesFromScientificName( final String query ) throws IOException { return UniProtWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } - synchronized private static List getTaxonomiesFromTaxonomyCode( final String query ) + private static List getTaxonomiesFromTaxonomyCode( final String query ) throws IOException { return UniProtWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN ); } - synchronized public static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException, + public static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException, AncestralTaxonomyInferenceException { - clearCachesIfTooLarge(); + TaxonomyDataObtainer.clearCachesIfTooLarge(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !node.isExternal() ) { @@ -168,7 +123,7 @@ public final class AncestralTaxonomyInference { } } - synchronized private static void inferTaxonomyFromDescendents( final PhylogenyNode n ) throws IOException, + private static void inferTaxonomyFromDescendents( final PhylogenyNode n ) throws IOException, AncestralTaxonomyInferenceException { if ( n.isExternal() ) { throw new IllegalArgumentException( "attempt to infer taxonomy from descendants of external node" ); @@ -179,7 +134,7 @@ public final class AncestralTaxonomyInference { int shortest_lin_length = Integer.MAX_VALUE; for( final PhylogenyNode desc : descs ) { if ( desc.getNodeData().isHasTaxonomy() - && ( isHasAppropriateId( desc.getNodeData().getTaxonomy() ) + && ( TaxonomyDataObtainer.isHasAppropriateId( desc.getNodeData().getTaxonomy() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getLineage() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil @@ -330,105 +285,15 @@ public final class AncestralTaxonomyInference { } } - synchronized private static boolean isHasAppropriateId( final Taxonomy tax ) { - return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax - .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" ) - || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider() - .equalsIgnoreCase( "uniprotkb" ) ) ) ); - } + - synchronized public static SortedSet obtainDetailedTaxonomicInformation( final Phylogeny phy, - final boolean delete ) - throws IOException, AncestralTaxonomyInferenceException { - clearCachesIfTooLarge(); - final SortedSet not_found = new TreeSet(); - List not_found_external_nodes = null; - if ( delete ) { - not_found_external_nodes = new ArrayList(); - } - for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - final QUERY_TYPE qt = null; - Taxonomy tax = null; - if ( node.getNodeData().isHasTaxonomy() ) { - tax = node.getNodeData().getTaxonomy(); - } - else if ( node.isExternal() ) { - if ( !ForesterUtil.isEmpty( node.getName() ) ) { - not_found.add( node.getName() ); - } - else { - not_found.add( node.toString() ); - } - if ( delete ) { - not_found_external_nodes.add( node ); - } - } - UniProtTaxonomy uniprot_tax = null; - if ( ( tax != null ) - && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) - || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax - .getCommonName() ) ) ) { - uniprot_tax = obtainUniProtTaxonomy( tax, null, qt ); - if ( uniprot_tax != null ) { - updateTaxonomy( qt, node, tax, uniprot_tax ); - } - else { - not_found.add( tax.toString() ); - if ( delete && node.isExternal() ) { - not_found_external_nodes.add( node ); - } - } - } - } - if ( delete ) { - for( final PhylogenyNode node : not_found_external_nodes ) { - phy.deleteSubtree( node, true ); - } - phy.externalNodesHaveChanged(); - phy.hashIDs(); - phy.recalculateNumberOfExternalDescendants( true ); - } - return not_found; - } - - synchronized public static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, Object query, QUERY_TYPE qt ) - throws IOException, AncestralTaxonomyInferenceException { - if ( isHasAppropriateId( tax ) ) { - query = tax.getIdentifier().getValue(); - qt = QUERY_TYPE.ID; - return getTaxonomies( getIdTaxCacheMap(), query, qt ); - } - else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { - if ( !ForesterUtil.isEmpty( tax.getLineage() ) ) { - query = tax.getLineage(); - qt = QUERY_TYPE.LIN; - return getTaxonomies( getLineageTaxCacheMap(), query, qt ); - } - else { - query = tax.getScientificName(); - qt = QUERY_TYPE.SN; - return getTaxonomies( getSnTaxCacheMap(), query, qt ); - } - } - else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { - query = tax.getTaxonomyCode(); - qt = QUERY_TYPE.CODE; - return getTaxonomies( getCodeTaxCacheMap(), query, qt ); - } - else { - query = tax.getCommonName(); - qt = QUERY_TYPE.CN; - return getTaxonomies( getCnTaxCacheMap(), query, qt ); - } - } - - synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromLineage( final List lineage ) + + private static UniProtTaxonomy obtainUniProtTaxonomyFromLineage( final List lineage ) throws AncestralTaxonomyInferenceException, IOException { final String lineage_str = ForesterUtil.stringListToString( lineage, ">" ); UniProtTaxonomy up_tax = null; - if ( getLineageTaxCacheMap().containsKey( lineage_str ) ) { - up_tax = getLineageTaxCacheMap().get( lineage_str ).copy(); + if ( TaxonomyDataObtainer.getLineageTaxCacheMap().containsKey( lineage_str ) ) { + up_tax = TaxonomyDataObtainer.getLineageTaxCacheMap().get( lineage_str ).copy(); } else { final List up_taxonomies = getTaxonomiesFromScientificName( lineage @@ -454,66 +319,24 @@ public final class AncestralTaxonomyInference { throw new AncestralTaxonomyInferenceException( "lineage \"" + ForesterUtil.stringListToString( lineage, " > " ) + "\" not found" ); } - getLineageTaxCacheMap().put( lineage_str, up_tax ); + TaxonomyDataObtainer.getLineageTaxCacheMap().put( lineage_str, up_tax ); if ( !ForesterUtil.isEmpty( up_tax.getScientificName() ) ) { - getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); + TaxonomyDataObtainer.getSnTaxCacheMap().put( up_tax.getScientificName(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { - getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + TaxonomyDataObtainer.getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { - getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + TaxonomyDataObtainer.getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); } if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { - getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + TaxonomyDataObtainer.getIdTaxCacheMap().put( up_tax.getId(), up_tax ); } } } return up_tax; } - synchronized private static void updateTaxonomy( final QUERY_TYPE qt, - final PhylogenyNode node, - final Taxonomy tax, - final UniProtTaxonomy up_tax ) { - if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() ) - && ForesterUtil.isEmpty( tax.getScientificName() ) ) { - tax.setScientificName( up_tax.getScientificName() ); - } - if ( node.isExternal() && ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) - && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { - tax.setTaxonomyCode( up_tax.getCode() ); - } - if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() ) - && ForesterUtil.isEmpty( tax.getCommonName() ) ) { - tax.setCommonName( up_tax.getCommonName() ); - } - if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { - tax.getSynonyms().add( up_tax.getSynonym() ); - } - if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) { - try { - tax.setRank( up_tax.getRank().toLowerCase() ); - } - catch ( final PhyloXmlDataFormatException ex ) { - tax.setRank( "" ); - } - } - if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) - && ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) ) { - tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); - } - if ( up_tax.getLineage() != null ) { - tax.setLineage( new ArrayList() ); - for( final String lin : up_tax.getLineage() ) { - if ( !ForesterUtil.isEmpty( lin ) ) { - tax.getLineage().add( lin ); - } - } - } - } - - private enum QUERY_TYPE { - CODE, SN, CN, ID, LIN; - } + + } diff --git a/forester/java/src/org/forester/analysis/TaxonomyDataObtainer.java b/forester/java/src/org/forester/analysis/TaxonomyDataObtainer.java new file mode 100644 index 0000000..003cb9c --- /dev/null +++ b/forester/java/src/org/forester/analysis/TaxonomyDataObtainer.java @@ -0,0 +1,387 @@ +// $Id: +// +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.analysis; + +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import javax.swing.JOptionPane; + +import org.forester.archaeopteryx.MainFrameApplication; +import org.forester.archaeopteryx.TreePanel; +import org.forester.archaeopteryx.tools.RunnableProcess; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Identifier; +import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.ForesterUtil; +import org.forester.ws.uniprot.UniProtTaxonomy; +import org.forester.ws.uniprot.UniProtWsTools; + +public class TaxonomyDataObtainer extends RunnableProcess { + + private static final int MAX_CACHE_SIZE = 100000; + + private static final HashMap _sn_up_cache_map = new HashMap(); + private static final HashMap _lineage_up_cache_map = new HashMap(); + private static final HashMap _code_up_cache_map = new HashMap(); + private static final HashMap _cn_up_cache_map = new HashMap(); + private static final HashMap _id_up_cache_map = new HashMap(); + + + private final Phylogeny _phy; + private final MainFrameApplication _mf; + private final TreePanel _treepanel; + private final boolean _delete; + + public TaxonomyDataObtainer( final MainFrameApplication mf, + final TreePanel treepanel, + final Phylogeny phy, + final boolean delete ) { + _phy = phy; + _mf = mf; + _treepanel = treepanel; + _delete = delete; + } + + public TaxonomyDataObtainer( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { + _phy = phy; + _mf = mf; + _treepanel = treepanel; + _delete = false; + } + + private String getBaseUrl() { + return UniProtWsTools.BASE_URL; + } + + private void execute() { + start( _mf, "taxonomy data" ); + SortedSet not_found = null; + try { + not_found = obtainDetailedTaxonomicInformation( _phy, _delete ); + } + catch ( final UnknownHostException e ) { + JOptionPane.showMessageDialog( _mf, + "Could not connect to \"" + getBaseUrl() + "\"", + "Network error during taxonomic information gathering", + JOptionPane.ERROR_MESSAGE ); + return; + } + catch ( final IOException e ) { + e.printStackTrace(); + JOptionPane.showMessageDialog( _mf, + e.toString(), + "Failed to obtain taxonomic information", + JOptionPane.ERROR_MESSAGE ); + return; + } + catch ( final AncestralTaxonomyInferenceException e ) { + e.printStackTrace(); + JOptionPane.showMessageDialog( _mf, + e.toString(), + "Failed to obtain taxonomic information", + JOptionPane.ERROR_MESSAGE ); + return; + } + finally { + end( _mf ); + } + if ( ( _phy == null ) || _phy.isEmpty() ) { + try { + JOptionPane.showMessageDialog( _mf, + "None of the external node taxonomies could be resolved", + "Taxonomy Tool Failed", + JOptionPane.WARNING_MESSAGE ); + } + catch ( final Exception e ) { + // Not important if this fails, do nothing. + } + return; + } + _treepanel.setTree( _phy ); + _mf.showWhole(); + _treepanel.setEdited( true ); + if ( ( not_found != null ) && ( not_found.size() > 0 ) ) { + int max = not_found.size(); + boolean more = false; + if ( max > 20 ) { + more = true; + max = 20; + } + final StringBuffer sb = new StringBuffer(); + sb.append( "Not all taxonomies could be resolved.\n" ); + if ( not_found.size() == 1 ) { + if ( _delete ) { + sb.append( "The following taxonomy was not found and deleted (if external):\n" ); + } + else { + sb.append( "The following taxonomy was not found:\n" ); + } + } + else { + if ( _delete ) { + sb.append( "The following taxonomies were not found and deleted (if external) (total: " + + not_found.size() + "):\n" ); + } + else { + sb.append( "The following taxonomies were not found (total: " + not_found.size() + "):\n" ); + } + } + int i = 0; + for( final String string : not_found ) { + if ( i > 19 ) { + break; + } + sb.append( string ); + sb.append( "\n" ); + ++i; + } + if ( more ) { + sb.append( "..." ); + } + try { + JOptionPane.showMessageDialog( _mf, + sb.toString(), + "Taxonomy Tool Completed", + JOptionPane.WARNING_MESSAGE ); + } + catch ( final Exception e ) { + // Not important if this fails, do nothing. + } + } + else { + try { + JOptionPane.showMessageDialog( _mf, + "Taxonomy tool successfully completed", + "Taxonomy Tool Completed", + JOptionPane.INFORMATION_MESSAGE ); + } + catch ( final Exception e ) { + // Not important if this fails, do nothing. + } + } + } + + + synchronized static void clearCachesIfTooLarge() { + if ( getSnTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getSnTaxCacheMap().clear(); + } + if ( getLineageTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getLineageTaxCacheMap().clear(); + } + if ( getCnTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getCnTaxCacheMap().clear(); + } + if ( getCodeTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getCodeTaxCacheMap().clear(); + } + if ( getIdTaxCacheMap().size() > MAX_CACHE_SIZE ) { + getIdTaxCacheMap().clear(); + } + } + + synchronized private static SortedSet obtainDetailedTaxonomicInformation( final Phylogeny phy, + final boolean delete ) + throws IOException, AncestralTaxonomyInferenceException { + clearCachesIfTooLarge(); + final SortedSet not_found = new TreeSet(); + List not_found_external_nodes = null; + if ( delete ) { + not_found_external_nodes = new ArrayList(); + } + for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final QUERY_TYPE qt = null; + Taxonomy tax = null; + if ( node.getNodeData().isHasTaxonomy() ) { + tax = node.getNodeData().getTaxonomy(); + } + else if ( node.isExternal() ) { + if ( !ForesterUtil.isEmpty( node.getName() ) ) { + not_found.add( node.getName() ); + } + else { + not_found.add( node.toString() ); + } + if ( delete ) { + not_found_external_nodes.add( node ); + } + } + UniProtTaxonomy uniprot_tax = null; + if ( ( tax != null ) + && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) + || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax + .getCommonName() ) ) ) { + uniprot_tax = obtainUniProtTaxonomy( tax, null, qt ); + if ( uniprot_tax != null ) { + updateTaxonomy( qt, node, tax, uniprot_tax ); + } + else { + not_found.add( tax.toString() ); + if ( delete && node.isExternal() ) { + not_found_external_nodes.add( node ); + } + } + } + } + if ( delete ) { + for( final PhylogenyNode node : not_found_external_nodes ) { + phy.deleteSubtree( node, true ); + } + phy.externalNodesHaveChanged(); + phy.hashIDs(); + phy.recalculateNumberOfExternalDescendants( true ); + } + return not_found; + } + + + + + public static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, Object query, QUERY_TYPE qt ) + throws IOException, AncestralTaxonomyInferenceException { + if ( TaxonomyDataObtainer.isHasAppropriateId( tax ) ) { + query = tax.getIdentifier().getValue(); + qt = QUERY_TYPE.ID; + return getTaxonomies( TaxonomyDataObtainer.getIdTaxCacheMap(), query, qt ); + } + else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { + if ( !ForesterUtil.isEmpty( tax.getLineage() ) ) { + query = tax.getLineage(); + qt = QUERY_TYPE.LIN; + return getTaxonomies( TaxonomyDataObtainer.getLineageTaxCacheMap(), query, qt ); + } + else { + query = tax.getScientificName(); + qt = QUERY_TYPE.SN; + return getTaxonomies( TaxonomyDataObtainer.getSnTaxCacheMap(), query, qt ); + } + } + else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + query = tax.getTaxonomyCode(); + qt = QUERY_TYPE.CODE; + return getTaxonomies( TaxonomyDataObtainer.getCodeTaxCacheMap(), query, qt ); + } + else { + query = tax.getCommonName(); + qt = QUERY_TYPE.CN; + return getTaxonomies( TaxonomyDataObtainer.getCnTaxCacheMap(), query, qt ); + } + } + + + + + static boolean isHasAppropriateId( final Taxonomy tax ) { + return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax + .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" ) + || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider() + .equalsIgnoreCase( "uniprotkb" ) ) ) ); + } + + + synchronized private static void updateTaxonomy( final QUERY_TYPE qt, + final PhylogenyNode node, + final Taxonomy tax, + final UniProtTaxonomy up_tax ) { + if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() ) + && ForesterUtil.isEmpty( tax.getScientificName() ) ) { + tax.setScientificName( up_tax.getScientificName() ); + } + if ( node.isExternal() && ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) + && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + tax.setTaxonomyCode( up_tax.getCode() ); + } + if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() ) + && ForesterUtil.isEmpty( tax.getCommonName() ) ) { + tax.setCommonName( up_tax.getCommonName() ); + } + if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) { + tax.getSynonyms().add( up_tax.getSynonym() ); + } + if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) { + try { + tax.setRank( up_tax.getRank().toLowerCase() ); + } + catch ( final PhyloXmlDataFormatException ex ) { + tax.setRank( "" ); + } + } + if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() ) + && ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) ) { + tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) ); + } + if ( up_tax.getLineage() != null ) { + tax.setLineage( new ArrayList() ); + for( final String lin : up_tax.getLineage() ) { + if ( !ForesterUtil.isEmpty( lin ) ) { + tax.getLineage().add( lin ); + } + } + } + } + + + synchronized static HashMap getCnTaxCacheMap() { + return _cn_up_cache_map; + } + + synchronized static HashMap getCodeTaxCacheMap() { + return _code_up_cache_map; + } + + synchronized static HashMap getIdTaxCacheMap() { + return _id_up_cache_map; + } + + synchronized static HashMap getSnTaxCacheMap() { + return _sn_up_cache_map; + } + + synchronized static HashMap getLineageTaxCacheMap() { + return _lineage_up_cache_map; + } + + + enum QUERY_TYPE { + CODE, SN, CN, ID, LIN; + } + + @Override + public void run() { + execute(); + } +} \ No newline at end of file diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index d6fe0f6..51d1129 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -59,6 +59,7 @@ import javax.swing.event.ChangeListener; import javax.swing.filechooser.FileFilter; import javax.swing.plaf.synth.SynthLookAndFeel; +import org.forester.analysis.TaxonomyDataObtainer; import org.forester.archaeopteryx.AptxUtil.GraphicsExportType; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; @@ -70,7 +71,6 @@ import org.forester.archaeopteryx.tools.PhyloInferenceDialog; import org.forester.archaeopteryx.tools.PhylogeneticInferenceOptions; import org.forester.archaeopteryx.tools.PhylogeneticInferrer; import org.forester.archaeopteryx.tools.SequenceDataRetriver; -import org.forester.archaeopteryx.tools.TaxonomyDataObtainer; import org.forester.archaeopteryx.webservices.PhylogeniesWebserviceClient; import org.forester.archaeopteryx.webservices.WebservicesManager; import org.forester.io.parsers.FastaParser; diff --git a/forester/java/src/org/forester/archaeopteryx/tools/RunnableProcess.java b/forester/java/src/org/forester/archaeopteryx/tools/RunnableProcess.java index 0a5504c..0453946 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/RunnableProcess.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/RunnableProcess.java @@ -17,12 +17,12 @@ public abstract class RunnableProcess implements Runnable { _process_id = process_id; } - void start( final MainFrame mf, final String name ) { + public void start( final MainFrame mf, final String name ) { setProcessId( mf.getProcessPool().addProcess( name ) ); mf.updateProcessMenu(); } - void end( final MainFrame mf ) { + public void end( final MainFrame mf ) { final boolean removed = mf.getProcessPool().removeProcess( getProcessId() ); if ( !removed ) { ForesterUtil.printWarningMessage( Constants.PRG_NAME, "could not remove process " + getProcessId() diff --git a/forester/java/src/org/forester/archaeopteryx/tools/TaxonomyDataObtainer.java b/forester/java/src/org/forester/archaeopteryx/tools/TaxonomyDataObtainer.java deleted file mode 100644 index 766f212..0000000 --- a/forester/java/src/org/forester/archaeopteryx/tools/TaxonomyDataObtainer.java +++ /dev/null @@ -1,182 +0,0 @@ -// $Id: -// -// forester -- software libraries and applications -// for genomics and evolutionary biology research. -// -// Copyright (C) 2010 Christian M Zmasek -// Copyright (C) 2010 Sanford-Burnham Medical Research Institute -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester - -package org.forester.archaeopteryx.tools; - -import java.io.IOException; -import java.net.UnknownHostException; -import java.util.SortedSet; - -import javax.swing.JOptionPane; - -import org.forester.analysis.AncestralTaxonomyInference; -import org.forester.analysis.AncestralTaxonomyInferenceException; -import org.forester.archaeopteryx.MainFrameApplication; -import org.forester.archaeopteryx.TreePanel; -import org.forester.phylogeny.Phylogeny; -import org.forester.ws.uniprot.UniProtWsTools; - -public class TaxonomyDataObtainer extends RunnableProcess { - - private final Phylogeny _phy; - private final MainFrameApplication _mf; - private final TreePanel _treepanel; - private final boolean _delete; - - public TaxonomyDataObtainer( final MainFrameApplication mf, - final TreePanel treepanel, - final Phylogeny phy, - final boolean delete ) { - _phy = phy; - _mf = mf; - _treepanel = treepanel; - _delete = delete; - } - - public TaxonomyDataObtainer( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) { - _phy = phy; - _mf = mf; - _treepanel = treepanel; - _delete = false; - } - - private String getBaseUrl() { - return UniProtWsTools.BASE_URL; - } - - private void execute() { - start( _mf, "taxonomy data" ); - SortedSet not_found = null; - try { - not_found = AncestralTaxonomyInference.obtainDetailedTaxonomicInformation( _phy, _delete ); - } - catch ( final UnknownHostException e ) { - JOptionPane.showMessageDialog( _mf, - "Could not connect to \"" + getBaseUrl() + "\"", - "Network error during taxonomic information gathering", - JOptionPane.ERROR_MESSAGE ); - return; - } - catch ( final IOException e ) { - e.printStackTrace(); - JOptionPane.showMessageDialog( _mf, - e.toString(), - "Failed to obtain taxonomic information", - JOptionPane.ERROR_MESSAGE ); - return; - } - catch ( final AncestralTaxonomyInferenceException e ) { - e.printStackTrace(); - JOptionPane.showMessageDialog( _mf, - e.toString(), - "Failed to obtain taxonomic information", - JOptionPane.ERROR_MESSAGE ); - return; - } - finally { - end( _mf ); - } - if ( ( _phy == null ) || _phy.isEmpty() ) { - try { - JOptionPane.showMessageDialog( _mf, - "None of the external node taxonomies could be resolved", - "Taxonomy Tool Failed", - JOptionPane.WARNING_MESSAGE ); - } - catch ( final Exception e ) { - // Not important if this fails, do nothing. - } - return; - } - _treepanel.setTree( _phy ); - _mf.showWhole(); - _treepanel.setEdited( true ); - if ( ( not_found != null ) && ( not_found.size() > 0 ) ) { - int max = not_found.size(); - boolean more = false; - if ( max > 20 ) { - more = true; - max = 20; - } - final StringBuffer sb = new StringBuffer(); - sb.append( "Not all taxonomies could be resolved.\n" ); - if ( not_found.size() == 1 ) { - if ( _delete ) { - sb.append( "The following taxonomy was not found and deleted (if external):\n" ); - } - else { - sb.append( "The following taxonomy was not found:\n" ); - } - } - else { - if ( _delete ) { - sb.append( "The following taxonomies were not found and deleted (if external) (total: " - + not_found.size() + "):\n" ); - } - else { - sb.append( "The following taxonomies were not found (total: " + not_found.size() + "):\n" ); - } - } - int i = 0; - for( final String string : not_found ) { - if ( i > 19 ) { - break; - } - sb.append( string ); - sb.append( "\n" ); - ++i; - } - if ( more ) { - sb.append( "..." ); - } - try { - JOptionPane.showMessageDialog( _mf, - sb.toString(), - "Taxonomy Tool Completed", - JOptionPane.WARNING_MESSAGE ); - } - catch ( final Exception e ) { - // Not important if this fails, do nothing. - } - } - else { - try { - JOptionPane.showMessageDialog( _mf, - "Taxonomy tool successfully completed", - "Taxonomy Tool Completed", - JOptionPane.INFORMATION_MESSAGE ); - } - catch ( final Exception e ) { - // Not important if this fails, do nothing. - } - } - } - - @Override - public void run() { - execute(); - } -} \ No newline at end of file