From cd262b9df153624d042b47de8c142f70e27474ec Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 21 Sep 2011 04:55:02 +0000 Subject: [PATCH] in progress --- .../analysis/AncestralTaxonomyInference.java | 43 +++++++++++++++----- .../java/src/org/forester/archaeopteryx/Util.java | 1 - .../org/forester/ws/uniprot/UniProtTaxonomy.java | 1 + 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index 054ac7b..09291f6 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -44,7 +44,7 @@ import org.forester.ws.uniprot.UniProtWsTools; public final class AncestralTaxonomyInference { private static final int MAX_CACHE_SIZE = 100000; - private static final int MAX_TAXONOMIES_TO_RETURN = 1000; + private static final int MAX_TAXONOMIES_TO_RETURN = 10; private static final HashMap _sn_up_cache_map = new HashMap(); private static final HashMap _lineage_up_cache_map = new HashMap(); private static final HashMap _code_up_cache_map = new HashMap(); @@ -250,17 +250,38 @@ public final class AncestralTaxonomyInference { } } if ( last_common_lineage.isEmpty() ) { - String msg = "no common lineage for:\n"; - int counter = 0; - for( final String[] strings : lineages ) { - msg += counter + ": "; - ++counter; - for( final String string : strings ) { - msg += string + " "; + boolean saw_viruses = false; + boolean saw_cellular_organism = false; + for( final String[] lineage : lineages ) { + if ( lineage.length > 0 ) { + if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.VIRUSES ) ) { + saw_viruses = true; + } + else if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.CELLULAR_ORGANISMS ) ) { + saw_cellular_organism = true; + } + if ( saw_cellular_organism && saw_viruses ) { + break; + } + } + } + if ( saw_cellular_organism && saw_viruses ) { + last_common_lineage.add( UniProtTaxonomy.CELLULAR_ORGANISMS ); + last_common = UniProtTaxonomy.CELLULAR_ORGANISMS; + } + else { + String msg = "no common lineage for:\n"; + int counter = 0; + for( final String[] strings : lineages ) { + msg += counter + ": "; + ++counter; + for( final String string : strings ) { + msg += string + " "; + } + msg += "\n"; } - msg += "\n"; + throw new AncestralTaxonomyInferenceException( msg ); } - throw new AncestralTaxonomyInferenceException( msg ); } final Taxonomy tax = new Taxonomy(); n.getNodeData().setTaxonomy( tax ); @@ -459,7 +480,7 @@ public final class AncestralTaxonomyInference { && ForesterUtil.isEmpty( tax.getScientificName() ) ) { tax.setScientificName( up_tax.getScientificName() ); } - if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) + if ( node.isExternal() && ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() ) && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { tax.setTaxonomyCode( up_tax.getCode() ); } diff --git a/forester/java/src/org/forester/archaeopteryx/Util.java b/forester/java/src/org/forester/archaeopteryx/Util.java index fb2b49c..daceb3e 100644 --- a/forester/java/src/org/forester/archaeopteryx/Util.java +++ b/forester/java/src/org/forester/archaeopteryx/Util.java @@ -313,7 +313,6 @@ public final class Util { } if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { m.put( n.getNodeData().getTaxonomy().getScientificName(), c.getValue() ); - System.out.println( n.getNodeData().getTaxonomy().getScientificName() + "->" + c.getValue() ); } } } diff --git a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java index 5bac652..4f62f77 100644 --- a/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java +++ b/forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java @@ -43,6 +43,7 @@ public final class UniProtTaxonomy { private final String _rank; private final String _id; public final static String CELLULAR_ORGANISMS = "cellular organisms"; + public final static String VIRUSES = "Viruses"; public final static UniProtTaxonomy DROSOPHILA_GENUS = new UniProtTaxonomy( new String[] { CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera", "Endopterygota", "Diptera", "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae", "Drosophila" }, -- 1.7.10.2