3 // forester -- software libraries and applications
4 // for genomics and evolutionary biology research.
6 // Copyright (C) 2010 Christian M Zmasek
7 // Copyright (C) 2010 Sanford-Burnham Medical Research Institute
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.analysis;
29 import java.io.IOException;
30 import java.net.UnknownHostException;
31 import java.util.ArrayList;
32 import java.util.HashMap;
33 import java.util.List;
34 import java.util.SortedSet;
35 import java.util.TreeSet;
37 import javax.swing.JOptionPane;
39 import org.forester.archaeopteryx.MainFrameApplication;
40 import org.forester.archaeopteryx.TreePanel;
41 import org.forester.archaeopteryx.tools.RunnableProcess;
42 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
43 import org.forester.phylogeny.Phylogeny;
44 import org.forester.phylogeny.PhylogenyNode;
45 import org.forester.phylogeny.data.Identifier;
46 import org.forester.phylogeny.data.Taxonomy;
47 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
48 import org.forester.util.ForesterUtil;
49 import org.forester.ws.uniprot.UniProtTaxonomy;
50 import org.forester.ws.uniprot.UniProtWsTools;
52 public class TaxonomyDataObtainer extends RunnableProcess {
54 private static final int MAX_CACHE_SIZE = 100000;
56 private static final HashMap<String, UniProtTaxonomy> _sn_up_cache_map = new HashMap<String, UniProtTaxonomy>();
57 private static final HashMap<String, UniProtTaxonomy> _lineage_up_cache_map = new HashMap<String, UniProtTaxonomy>();
58 private static final HashMap<String, UniProtTaxonomy> _code_up_cache_map = new HashMap<String, UniProtTaxonomy>();
59 private static final HashMap<String, UniProtTaxonomy> _cn_up_cache_map = new HashMap<String, UniProtTaxonomy>();
60 private static final HashMap<String, UniProtTaxonomy> _id_up_cache_map = new HashMap<String, UniProtTaxonomy>();
63 private final Phylogeny _phy;
64 private final MainFrameApplication _mf;
65 private final TreePanel _treepanel;
66 private final boolean _delete;
68 public TaxonomyDataObtainer( final MainFrameApplication mf,
69 final TreePanel treepanel,
71 final boolean delete ) {
74 _treepanel = treepanel;
78 public TaxonomyDataObtainer( final MainFrameApplication mf, final TreePanel treepanel, final Phylogeny phy ) {
81 _treepanel = treepanel;
85 private String getBaseUrl() {
86 return UniProtWsTools.BASE_URL;
89 private void execute() {
90 start( _mf, "taxonomy data" );
91 SortedSet<String> not_found = null;
93 not_found = obtainDetailedTaxonomicInformation( _phy, _delete );
95 catch ( final UnknownHostException e ) {
96 JOptionPane.showMessageDialog( _mf,
97 "Could not connect to \"" + getBaseUrl() + "\"",
98 "Network error during taxonomic information gathering",
99 JOptionPane.ERROR_MESSAGE );
102 catch ( final IOException e ) {
104 JOptionPane.showMessageDialog( _mf,
106 "Failed to obtain taxonomic information",
107 JOptionPane.ERROR_MESSAGE );
110 catch ( final AncestralTaxonomyInferenceException e ) {
112 JOptionPane.showMessageDialog( _mf,
114 "Failed to obtain taxonomic information",
115 JOptionPane.ERROR_MESSAGE );
121 if ( ( _phy == null ) || _phy.isEmpty() ) {
123 JOptionPane.showMessageDialog( _mf,
124 "None of the external node taxonomies could be resolved",
125 "Taxonomy Tool Failed",
126 JOptionPane.WARNING_MESSAGE );
128 catch ( final Exception e ) {
129 // Not important if this fails, do nothing.
133 _treepanel.setTree( _phy );
135 _treepanel.setEdited( true );
136 if ( ( not_found != null ) && ( not_found.size() > 0 ) ) {
137 int max = not_found.size();
138 boolean more = false;
143 final StringBuffer sb = new StringBuffer();
144 sb.append( "Not all taxonomies could be resolved.\n" );
145 if ( not_found.size() == 1 ) {
147 sb.append( "The following taxonomy was not found and deleted (if external):\n" );
150 sb.append( "The following taxonomy was not found:\n" );
155 sb.append( "The following taxonomies were not found and deleted (if external) (total: "
156 + not_found.size() + "):\n" );
159 sb.append( "The following taxonomies were not found (total: " + not_found.size() + "):\n" );
163 for( final String string : not_found ) {
175 JOptionPane.showMessageDialog( _mf,
177 "Taxonomy Tool Completed",
178 JOptionPane.WARNING_MESSAGE );
180 catch ( final Exception e ) {
181 // Not important if this fails, do nothing.
186 JOptionPane.showMessageDialog( _mf,
187 "Taxonomy tool successfully completed",
188 "Taxonomy Tool Completed",
189 JOptionPane.INFORMATION_MESSAGE );
191 catch ( final Exception e ) {
192 // Not important if this fails, do nothing.
198 synchronized static void clearCachesIfTooLarge() {
199 if ( getSnTaxCacheMap().size() > MAX_CACHE_SIZE ) {
200 getSnTaxCacheMap().clear();
202 if ( getLineageTaxCacheMap().size() > MAX_CACHE_SIZE ) {
203 getLineageTaxCacheMap().clear();
205 if ( getCnTaxCacheMap().size() > MAX_CACHE_SIZE ) {
206 getCnTaxCacheMap().clear();
208 if ( getCodeTaxCacheMap().size() > MAX_CACHE_SIZE ) {
209 getCodeTaxCacheMap().clear();
211 if ( getIdTaxCacheMap().size() > MAX_CACHE_SIZE ) {
212 getIdTaxCacheMap().clear();
216 synchronized private static SortedSet<String> obtainDetailedTaxonomicInformation( final Phylogeny phy,
217 final boolean delete )
218 throws IOException, AncestralTaxonomyInferenceException {
219 clearCachesIfTooLarge();
220 final SortedSet<String> not_found = new TreeSet<String>();
221 List<PhylogenyNode> not_found_external_nodes = null;
223 not_found_external_nodes = new ArrayList<PhylogenyNode>();
225 for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
226 final PhylogenyNode node = iter.next();
227 final QUERY_TYPE qt = null;
229 if ( node.getNodeData().isHasTaxonomy() ) {
230 tax = node.getNodeData().getTaxonomy();
232 else if ( node.isExternal() ) {
233 if ( !ForesterUtil.isEmpty( node.getName() ) ) {
234 not_found.add( node.getName() );
237 not_found.add( node.toString() );
240 not_found_external_nodes.add( node );
243 UniProtTaxonomy uniprot_tax = null;
245 && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() )
246 || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax
247 .getCommonName() ) ) ) {
248 uniprot_tax = obtainUniProtTaxonomy( tax, null, qt );
249 if ( uniprot_tax != null ) {
250 updateTaxonomy( qt, node, tax, uniprot_tax );
253 not_found.add( tax.toString() );
254 if ( delete && node.isExternal() ) {
255 not_found_external_nodes.add( node );
261 for( final PhylogenyNode node : not_found_external_nodes ) {
262 phy.deleteSubtree( node, true );
264 phy.externalNodesHaveChanged();
266 phy.recalculateNumberOfExternalDescendants( true );
274 public static UniProtTaxonomy obtainUniProtTaxonomy( final Taxonomy tax, Object query, QUERY_TYPE qt )
275 throws IOException, AncestralTaxonomyInferenceException {
276 if ( TaxonomyDataObtainer.isHasAppropriateId( tax ) ) {
277 query = tax.getIdentifier().getValue();
279 return getTaxonomies( TaxonomyDataObtainer.getIdTaxCacheMap(), query, qt );
281 else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) {
282 if ( !ForesterUtil.isEmpty( tax.getLineage() ) ) {
283 query = tax.getLineage();
285 return getTaxonomies( TaxonomyDataObtainer.getLineageTaxCacheMap(), query, qt );
288 query = tax.getScientificName();
290 return getTaxonomies( TaxonomyDataObtainer.getSnTaxCacheMap(), query, qt );
293 else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
294 query = tax.getTaxonomyCode();
295 qt = QUERY_TYPE.CODE;
296 return getTaxonomies( TaxonomyDataObtainer.getCodeTaxCacheMap(), query, qt );
299 query = tax.getCommonName();
301 return getTaxonomies( TaxonomyDataObtainer.getCnTaxCacheMap(), query, qt );
308 static boolean isHasAppropriateId( final Taxonomy tax ) {
309 return ( ( tax.getIdentifier() != null ) && ( !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) && ( tax
310 .getIdentifier().getProvider().equalsIgnoreCase( "ncbi" )
311 || tax.getIdentifier().getProvider().equalsIgnoreCase( "uniprot" ) || tax.getIdentifier().getProvider()
312 .equalsIgnoreCase( "uniprotkb" ) ) ) );
316 synchronized private static void updateTaxonomy( final QUERY_TYPE qt,
317 final PhylogenyNode node,
319 final UniProtTaxonomy up_tax ) {
320 if ( ( qt != QUERY_TYPE.SN ) && !ForesterUtil.isEmpty( up_tax.getScientificName() )
321 && ForesterUtil.isEmpty( tax.getScientificName() ) ) {
322 tax.setScientificName( up_tax.getScientificName() );
324 if ( node.isExternal() && ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
325 && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
326 tax.setTaxonomyCode( up_tax.getCode() );
328 if ( ( qt != QUERY_TYPE.CN ) && !ForesterUtil.isEmpty( up_tax.getCommonName() )
329 && ForesterUtil.isEmpty( tax.getCommonName() ) ) {
330 tax.setCommonName( up_tax.getCommonName() );
332 if ( !ForesterUtil.isEmpty( up_tax.getSynonym() ) && !tax.getSynonyms().contains( up_tax.getSynonym() ) ) {
333 tax.getSynonyms().add( up_tax.getSynonym() );
335 if ( !ForesterUtil.isEmpty( up_tax.getRank() ) && ForesterUtil.isEmpty( tax.getRank() ) ) {
337 tax.setRank( up_tax.getRank().toLowerCase() );
339 catch ( final PhyloXmlDataFormatException ex ) {
343 if ( ( qt != QUERY_TYPE.ID ) && !ForesterUtil.isEmpty( up_tax.getId() )
344 && ( ( tax.getIdentifier() == null ) || ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) ) {
345 tax.setIdentifier( new Identifier( up_tax.getId(), "uniprot" ) );
347 if ( up_tax.getLineage() != null ) {
348 tax.setLineage( new ArrayList<String>() );
349 for( final String lin : up_tax.getLineage() ) {
350 if ( !ForesterUtil.isEmpty( lin ) ) {
351 tax.getLineage().add( lin );
358 synchronized static HashMap<String, UniProtTaxonomy> getCnTaxCacheMap() {
359 return _cn_up_cache_map;
362 synchronized static HashMap<String, UniProtTaxonomy> getCodeTaxCacheMap() {
363 return _code_up_cache_map;
366 synchronized static HashMap<String, UniProtTaxonomy> getIdTaxCacheMap() {
367 return _id_up_cache_map;
370 synchronized static HashMap<String, UniProtTaxonomy> getSnTaxCacheMap() {
371 return _sn_up_cache_map;
374 synchronized static HashMap<String, UniProtTaxonomy> getLineageTaxCacheMap() {
375 return _lineage_up_cache_map;
380 CODE, SN, CN, ID, LIN;