From c4f9dc6343e1fee8846c893b968065d9d9178655 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Mon, 19 Sep 2011 18:28:22 +0000 Subject: [PATCH] needs: testing, proper error messages and dialogs, code cleanup, cache mechanism for lineages! --- .../analysis/AncestralTaxonomyInference.java | 152 +++++++++----------- .../src/org/forester/archaeopteryx/NodePanel.java | 15 +- .../tools/AncestralTaxonomyInferenceException.java | 40 ++++++ .../tools/AncestralTaxonomyInferrer.java | 60 ++------ .../src/org/forester/phylogeny/data/Taxonomy.java | 5 +- .../java/src/org/forester/util/ForesterUtil.java | 21 ++- 6 files changed, 147 insertions(+), 146 deletions(-) create mode 100644 forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferenceException.java diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index ba0a6df..0d1465a 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +import org.forester.archaeopteryx.tools.AncestralTaxonomyInferenceException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -146,21 +147,19 @@ public final class AncestralTaxonomyInference { return UniProtWsTools.getTaxonomiesFromTaxonomyCode( query, MAX_TAXONOMIES_TO_RETURN ); } - synchronized public static SortedSet inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException { + synchronized public static void inferTaxonomyFromDescendents( final Phylogeny phy ) throws IOException, + AncestralTaxonomyInferenceException { clearCachesIfTooLarge(); - final SortedSet not_found = new TreeSet(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); if ( !node.isExternal() ) { - inferTaxonomyFromDescendents( node, not_found ); + inferTaxonomyFromDescendents( node ); } } - return not_found; } - synchronized private static void inferTaxonomyFromDescendents( final PhylogenyNode n, - final SortedSet not_found ) - throws IOException { + synchronized private static void inferTaxonomyFromDescendents( final PhylogenyNode n ) throws IOException, + AncestralTaxonomyInferenceException { if ( n.isExternal() ) { throw new IllegalArgumentException( "attempt to infer taxonomy from descendants of external node" ); } @@ -174,18 +173,14 @@ public final class AncestralTaxonomyInference { || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getScientificName() ) || !ForesterUtil.isEmpty( desc.getNodeData().getTaxonomy().getTaxonomyCode() ) || !ForesterUtil .isEmpty( desc.getNodeData().getTaxonomy().getCommonName() ) ) ) { - final UniProtTaxonomy up_tax = obtainUniProtTaxonomy( desc.getNodeData().getTaxonomy(), null, null ); String[] lineage = null; if ( up_tax != null ) { - //lineage = obtainLineagePlusOwnScientificName( up_tax ); lineage = up_tax.getLineageAsArray(); } if ( ( lineage == null ) || ( lineage.length < 1 ) ) { - //TODO remove me - System.out.println( "node " + desc.getNodeData().getTaxonomy().toString() + " has no lineage!" ); - not_found.add( desc.getNodeData().getTaxonomy().asText().toString() ); - return; + throw new AncestralTaxonomyInferenceException( "a taxonomic lineage for node \"" + + desc.getNodeData().getTaxonomy().toString() + "\" could not be found" ); } if ( lineage.length < shortest_lin_length ) { shortest_lin_length = lineage.length; @@ -193,7 +188,6 @@ public final class AncestralTaxonomyInference { lineages.add( lineage ); } else { - String msg = "Node(s) with no or inappropriate taxonomic information found"; String node = ""; if ( !ForesterUtil.isEmpty( desc.getName() ) ) { node = "\"" + desc.getName() + "\""; @@ -201,22 +195,22 @@ public final class AncestralTaxonomyInference { else { node = "[" + desc.getId() + "]"; } - msg = "Node " + node + " has no or inappropriate taxonomic information"; - // final List e = desc.getAllExternalDescendants(); + // final List e = desc.getAllExternalDescendants(); //TODO remove me! -// System.out.println(); -// int x = 0; -// for( final PhylogenyNode object : e ) { -// System.out.println( x + ":" ); -// System.out.println( object.getName() + " " ); -// x++; -// } -// System.out.println(); + // System.out.println(); + // int x = 0; + // for( final PhylogenyNode object : e ) { + // System.out.println( x + ":" ); + // System.out.println( object.getName() + " " ); + // x++; + // } + // System.out.println(); // - throw new IllegalArgumentException( msg ); + throw new AncestralTaxonomyInferenceException( "node " + node + + " has no or inappropriate taxonomic information" ); } } - List last_common_lineage = new ArrayList(); + final List last_common_lineage = new ArrayList(); String last_common = null; if ( shortest_lin_length > 0 ) { I: for( int i = 0; i < shortest_lin_length; ++i ) { @@ -226,24 +220,24 @@ public final class AncestralTaxonomyInference { break I; } } - // last_common_lineage = lineage_0; - last_common_lineage.add( lineage_0 ) ; - last_common =lineage_0; + // last_common_lineage = lineage_0; + last_common_lineage.add( lineage_0 ); + last_common = lineage_0; } } - // if ( last_common_lineage == null ) { + // if ( last_common_lineage == null ) { if ( last_common_lineage.isEmpty() ) { - System.out.println( "No common lineage for:" ); + String msg = "no common lineage for:\n"; int counter = 0; for( final String[] strings : lineages ) { - System.out.print( counter + ": " ); + msg += counter + ": "; ++counter; for( final String string : strings ) { - System.out.print( string + " " ); + msg += string + " "; } - System.out.println(); + msg += "\n"; } - return; + throw new AncestralTaxonomyInferenceException( msg ); } final Taxonomy tax = new Taxonomy(); n.getNodeData().setTaxonomy( tax ); @@ -275,7 +269,6 @@ public final class AncestralTaxonomyInference { } } } - } for( final PhylogenyNode desc : descs ) { if ( !desc.isExternal() && desc.getNodeData().isHasTaxonomy() @@ -362,13 +355,13 @@ public final class AncestralTaxonomyInference { if ( isHasAppropriateId( tax ) ) { query = tax.getIdentifier().getValue(); qt = QUERY_TYPE.ID; - System.out.println( "query by id: " + query); + System.out.println( "query by id: " + query ); return getTaxonomies( getIdTaxCacheMap(), query, qt ); } else if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { query = tax.getScientificName(); qt = QUERY_TYPE.SN; - System.out.println( "query by sn: " + query); + System.out.println( "query by sn: " + query ); return getTaxonomies( getSnTaxCacheMap(), query, qt ); } else if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { @@ -383,7 +376,7 @@ public final class AncestralTaxonomyInference { } } - synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn) throws IOException { + synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromSn( final String sn ) throws IOException { UniProtTaxonomy up_tax = null; if ( getSnTaxCacheMap().containsKey( sn ) ) { up_tax = getSnTaxCacheMap().get( sn ).copy(); @@ -402,56 +395,54 @@ public final class AncestralTaxonomyInference { if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { getIdTaxCacheMap().put( up_tax.getId(), up_tax ); } - } } return up_tax; } - - synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromCommonLineage( List lineage ) throws IOException { - UniProtTaxonomy up_tax = null; - // -- if ( getSnTaxCacheMap().containsKey( sn ) ) { - // -- up_tax = getSnTaxCacheMap().get( sn ).copy(); - // -- } - // else { - final List up_taxonomies = getTaxonomiesFromScientificName( lineage.get(lineage.size() -1 ) ); - //-- if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { - - if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) { - for( UniProtTaxonomy up_taxonomy : up_taxonomies ) { - boolean match = true; - I: for( int i = 0; i < lineage.size(); ++i ) { - if ( !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { - match = false; - break I; - } - } - if ( match ) { - if ( up_tax != null ) { - throw new IOException( "not unique!"); - } - up_tax = up_taxonomy; + synchronized private static UniProtTaxonomy obtainUniProtTaxonomyFromCommonLineage( final List lineage ) + throws AncestralTaxonomyInferenceException, IOException { + UniProtTaxonomy up_tax = null; + // -- if ( getSnTaxCacheMap().containsKey( sn ) ) { + // -- up_tax = getSnTaxCacheMap().get( sn ).copy(); + // -- } + // else { + final List up_taxonomies = getTaxonomiesFromScientificName( lineage.get( lineage.size() - 1 ) ); + //-- if ( ( up_taxonomies != null ) && ( up_taxonomies.size() == 1 ) ) { + if ( ( up_taxonomies != null ) && ( up_taxonomies.size() > 0 ) ) { + for( final UniProtTaxonomy up_taxonomy : up_taxonomies ) { + boolean match = true; + I: for( int i = 0; i < lineage.size(); ++i ) { + if ( !lineage.get( i ).equalsIgnoreCase( up_taxonomy.getLineage().get( i ) ) ) { + match = false; + break I; } } - - if ( up_tax == null ) { - throw new IOException( "not found!"); - } - //-- up_tax = up_taxonomies.get( 0 ); - //-- getSnTaxCacheMap().put( sn, up_tax ); - if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { - getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); - } - if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { - getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); - } - if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { - getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + if ( match ) { + if ( up_tax != null ) { + throw new AncestralTaxonomyInferenceException( "lineage \"" + + ForesterUtil.stringListToString( lineage, " > " ) + "\" is not unique" ); + } + up_tax = up_taxonomy; } - } - // } + if ( up_tax == null ) { + throw new AncestralTaxonomyInferenceException( "lineage \"" + + ForesterUtil.stringListToString( lineage, " > " ) + "\" not found" ); + } + //-- up_tax = up_taxonomies.get( 0 ); + //-- getSnTaxCacheMap().put( sn, up_tax ); + if ( !ForesterUtil.isEmpty( up_tax.getCode() ) ) { + getCodeTaxCacheMap().put( up_tax.getCode(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getCommonName() ) ) { + getCnTaxCacheMap().put( up_tax.getCommonName(), up_tax ); + } + if ( !ForesterUtil.isEmpty( up_tax.getId() ) ) { + getIdTaxCacheMap().put( up_tax.getId(), up_tax ); + } + } + // } return up_tax; } @@ -494,7 +485,6 @@ public final class AncestralTaxonomyInference { } } } - } private enum QUERY_TYPE { diff --git a/forester/java/src/org/forester/archaeopteryx/NodePanel.java b/forester/java/src/org/forester/archaeopteryx/NodePanel.java index fb4ac82..4cfb7b2 100644 --- a/forester/java/src/org/forester/archaeopteryx/NodePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/NodePanel.java @@ -209,13 +209,12 @@ class NodePanel extends JPanel implements TreeSelectionListener { } } } - + private static void addLineage( final DefaultMutableTreeNode top, - final List lineage, - final DefaultMutableTreeNode category ) { - if ( ( lineage != null ) && ( lineage.size() > 0 ) ) { - - StringBuilder sb = new StringBuilder(); + final List lineage, + final DefaultMutableTreeNode category ) { + if ( ( lineage != null ) && ( lineage.size() > 0 ) ) { + final StringBuilder sb = new StringBuilder(); for( final String lin : lineage ) { if ( !ForesterUtil.isEmpty( lin ) ) { sb.append( lin ); @@ -223,7 +222,7 @@ class NodePanel extends JPanel implements TreeSelectionListener { } } String str = null; - if (sb.length() > 1 ) { + if ( sb.length() > 1 ) { str = sb.substring( 0, sb.length() - 3 ); } if ( !ForesterUtil.isEmpty( str ) ) { @@ -231,8 +230,6 @@ class NodePanel extends JPanel implements TreeSelectionListener { } } } - - private static void addBasics( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node, diff --git a/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferenceException.java b/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferenceException.java new file mode 100644 index 0000000..601a719 --- /dev/null +++ b/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferenceException.java @@ -0,0 +1,40 @@ +// $Id: +// $ +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.archaeopteryx.tools; + +public class AncestralTaxonomyInferenceException extends Exception { + + private static final long serialVersionUID = 1L; + + public AncestralTaxonomyInferenceException() { + super(); + } + + public AncestralTaxonomyInferenceException( final String message ) { + super( message ); + } +} diff --git a/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferrer.java b/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferrer.java index fbcc0bf..09e8804 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferrer.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/AncestralTaxonomyInferrer.java @@ -26,7 +26,6 @@ package org.forester.archaeopteryx.tools; import java.net.UnknownHostException; -import java.util.SortedSet; import javax.swing.JOptionPane; @@ -54,11 +53,10 @@ public class AncestralTaxonomyInferrer implements Runnable { private void inferTaxonomies() { _mf.getMainPanel().getCurrentTreePanel().setWaitCursor(); - SortedSet not_found = null; try { - not_found = AncestralTaxonomyInference.inferTaxonomyFromDescendents( _phy ); + AncestralTaxonomyInference.inferTaxonomyFromDescendents( _phy ); } - catch ( final IllegalArgumentException e ) { + catch ( final AncestralTaxonomyInferenceException e ) { _mf.getMainPanel().getCurrentTreePanel().setArrowCursor(); JOptionPane.showMessageDialog( _mf, e.getMessage(), @@ -88,54 +86,14 @@ public class AncestralTaxonomyInferrer implements Runnable { _treepanel.setTree( _phy ); _mf.showWhole(); _treepanel.setEdited( true ); - if ( ( not_found != null ) && ( not_found.size() > 0 ) ) { - int max = not_found.size(); - boolean more = false; - if ( max > 20 ) { - more = true; - max = 20; - } - final StringBuffer sb = new StringBuffer(); - sb.append( "Not all taxonomies could be resolved.\n" ); - sb.append( "The result is incomplete, and, possibly, misleading.\n" ); - if ( not_found.size() == 1 ) { - sb.append( "The following taxonomy was not found:\n" ); - } - else { - sb.append( "The following taxonomies were not found (total: " + not_found.size() + "):\n" ); - } - int i = 0; - for( final String string : not_found ) { - if ( i > 19 ) { - break; - } - sb.append( string ); - sb.append( "\n" ); - ++i; - } - if ( more ) { - sb.append( "..." ); - } - try { - JOptionPane.showMessageDialog( _mf, - sb.toString(), - "Ancestral Taxonomy Inference Completed", - JOptionPane.WARNING_MESSAGE ); - } - catch ( final Exception e ) { - // Not important if this fails, do nothing. - } + try { + JOptionPane.showMessageDialog( _mf, + "Ancestral taxonomy inference successfully completed", + "Ancestral Taxonomy Inference Completed", + JOptionPane.INFORMATION_MESSAGE ); } - else { - try { - JOptionPane.showMessageDialog( _mf, - "Ancestral taxonomy inference successfully completed", - "Ancestral Taxonomy Inference Completed", - JOptionPane.INFORMATION_MESSAGE ); - } - catch ( final Exception e ) { - // Not important if this fails, do nothing. - } + catch ( final Exception e ) { + // Not important if this fails, do nothing. } } diff --git a/forester/java/src/org/forester/phylogeny/data/Taxonomy.java b/forester/java/src/org/forester/phylogeny/data/Taxonomy.java index ee78912..40115c9 100644 --- a/forester/java/src/org/forester/phylogeny/data/Taxonomy.java +++ b/forester/java/src/org/forester/phylogeny/data/Taxonomy.java @@ -230,7 +230,8 @@ public class Taxonomy implements PhylogenyData, MultipleUris, Comparable lineage ) { + public void setLineage( final List lineage ) { _lineage = lineage; } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index ebbb8fe..b68d89b 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -1088,17 +1088,32 @@ public final class ForesterUtil { return str.split( regex ); } - final public static String stringArrayToString( final String[] a ) { - final StringBuffer sb = new StringBuffer(); + final public static String stringArrayToString( final String[] a, final String separator ) { + final StringBuilder sb = new StringBuilder(); if ( ( a != null ) && ( a.length > 0 ) ) { for( int i = 0; i < a.length - 1; ++i ) { - sb.append( a[ i ] + ", " ); + sb.append( a[ i ] + separator ); } sb.append( a[ a.length - 1 ] ); } return sb.toString(); } + final public static String stringListToString( final List l, final String separator ) { + final StringBuilder sb = new StringBuilder(); + if ( ( l != null ) && ( l.size() > 0 ) ) { + for( int i = 0; i < l.size() - 1; ++i ) { + sb.append( l.get( i ) + separator ); + } + sb.append( l.get( l.size() - 1 ) ); + } + return sb.toString(); + } + + final public static String stringArrayToString( final String[] a ) { + return stringArrayToString( a, ", " ); + } + final public static String[] stringSetToArray( final Set strings ) { final String[] str_array = new String[ strings.size() ]; int i = 0; -- 1.7.10.2