in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 21 Sep 2011 04:55:02 +0000 (04:55 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 21 Sep 2011 04:55:02 +0000 (04:55 +0000)
forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java
forester/java/src/org/forester/archaeopteryx/Util.java
forester/java/src/org/forester/ws/uniprot/UniProtTaxonomy.java

index 054ac7b..09291f6 100644 (file)
@@ -44,7 +44,7 @@ import org.forester.ws.uniprot.UniProtWsTools;
 public final class AncestralTaxonomyInference {
 
     private static final int                              MAX_CACHE_SIZE           = 100000;
-    private static final int                              MAX_TAXONOMIES_TO_RETURN = 1000;
+    private static final int                              MAX_TAXONOMIES_TO_RETURN = 10;
     private static final HashMap<String, UniProtTaxonomy> _sn_up_cache_map         = new HashMap<String, UniProtTaxonomy>();
     private static final HashMap<String, UniProtTaxonomy> _lineage_up_cache_map    = new HashMap<String, UniProtTaxonomy>();
     private static final HashMap<String, UniProtTaxonomy> _code_up_cache_map       = new HashMap<String, UniProtTaxonomy>();
@@ -250,17 +250,38 @@ public final class AncestralTaxonomyInference {
             }
         }
         if ( last_common_lineage.isEmpty() ) {
-            String msg = "no common lineage for:\n";
-            int counter = 0;
-            for( final String[] strings : lineages ) {
-                msg += counter + ": ";
-                ++counter;
-                for( final String string : strings ) {
-                    msg += string + " ";
+            boolean saw_viruses = false;
+            boolean saw_cellular_organism = false;
+            for( final String[] lineage : lineages ) {
+                if ( lineage.length > 0 ) {
+                    if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.VIRUSES ) ) {
+                        saw_viruses = true;
+                    }
+                    else if ( lineage[ 0 ].equalsIgnoreCase( UniProtTaxonomy.CELLULAR_ORGANISMS ) ) {
+                        saw_cellular_organism = true;
+                    }
+                    if ( saw_cellular_organism && saw_viruses ) {
+                        break;
+                    }
+                }
+            }
+            if ( saw_cellular_organism && saw_viruses ) {
+                last_common_lineage.add( UniProtTaxonomy.CELLULAR_ORGANISMS );
+                last_common = UniProtTaxonomy.CELLULAR_ORGANISMS;
+            }
+            else {
+                String msg = "no common lineage for:\n";
+                int counter = 0;
+                for( final String[] strings : lineages ) {
+                    msg += counter + ": ";
+                    ++counter;
+                    for( final String string : strings ) {
+                        msg += string + " ";
+                    }
+                    msg += "\n";
                 }
-                msg += "\n";
+                throw new AncestralTaxonomyInferenceException( msg );
             }
-            throw new AncestralTaxonomyInferenceException( msg );
         }
         final Taxonomy tax = new Taxonomy();
         n.getNodeData().setTaxonomy( tax );
@@ -459,7 +480,7 @@ public final class AncestralTaxonomyInference {
                 && ForesterUtil.isEmpty( tax.getScientificName() ) ) {
             tax.setScientificName( up_tax.getScientificName() );
         }
-        if ( ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
+        if ( node.isExternal() && ( qt != QUERY_TYPE.CODE ) && !ForesterUtil.isEmpty( up_tax.getCode() )
                 && ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) {
             tax.setTaxonomyCode( up_tax.getCode() );
         }
index fb2b49c..daceb3e 100644 (file)
@@ -313,7 +313,6 @@ public final class Util {
                     }
                     if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
                         m.put( n.getNodeData().getTaxonomy().getScientificName(), c.getValue() );
-                        System.out.println( n.getNodeData().getTaxonomy().getScientificName() + "->" + c.getValue() );
                     }
                 }
             }
index 5bac652..4f62f77 100644 (file)
@@ -43,6 +43,7 @@ public final class UniProtTaxonomy {
     private final String                _rank;
     private final String                _id;
     public final static String          CELLULAR_ORGANISMS       = "cellular organisms";
+    public final static String          VIRUSES                  = "Viruses";
     public final static UniProtTaxonomy DROSOPHILA_GENUS         = new UniProtTaxonomy( new String[] {
             CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera",
             "Endopterygota", "Diptera", "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae", "Drosophila" },