JAL-1191 check for obsolete term when descriptions are duplicated
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 5 Jan 2016 10:12:09 +0000 (10:12 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 5 Jan 2016 10:12:09 +0000 (10:12 +0000)
src/jalview/io/gff/SequenceOntology.java
test/jalview/io/gff/SequenceOntologyTest.java

index a999410..7f714ae 100644 (file)
@@ -19,6 +19,7 @@ import org.biojava.nbio.ontology.Term;
 import org.biojava.nbio.ontology.Term.Impl;
 import org.biojava.nbio.ontology.Triple;
 import org.biojava.nbio.ontology.io.OboParser;
+import org.biojava.nbio.ontology.utils.Annotation;
 
 /**
  * A wrapper class that parses the Sequence Ontology and exposes useful access
@@ -26,7 +27,7 @@ import org.biojava.nbio.ontology.io.OboParser;
  */
 public class SequenceOntology
 {
-  private static SequenceOntology instance = new SequenceOntology();
+  private static SequenceOntology instance;
 
   private Ontology ontology;
 
@@ -43,8 +44,12 @@ public class SequenceOntology
    */
   private Map<Term, List<Term>> termIsA;
 
-  public static SequenceOntology getInstance()
+  public synchronized static SequenceOntology getInstance()
   {
+    if (instance == null)
+    {
+      instance = new SequenceOntology();
+    }
     return instance;
   }
 
@@ -127,8 +132,10 @@ public class SequenceOntology
   }
 
   /**
-   * Store a lookup table of terms by their description. Note that description
-   * is not guaranteed unique - currently reporting 8 duplicates.
+   * Stores a lookup table of terms by description. Note that description is not
+   * guaranteed unique. Where duplicate descriptions are found, try to discard
+   * the term that is flagged as obsolete. However we do store obsolete terms
+   * where there is no duplication of description.
    */
   protected void storeTermNames()
   {
@@ -139,20 +146,63 @@ public class SequenceOntology
         String description = term.getDescription();
         if (description != null)
         {
-          // System.out.println(term.getName() + "=" + term.getDescription());
-          Term replaced = termsByDescription.put(description, term);
+          Term replaced = termsByDescription.get(description);
           if (replaced != null)
           {
+            boolean newTermIsObsolete = isObsolete(term);
+            boolean oldTermIsObsolete = isObsolete(replaced);
+            if (newTermIsObsolete && !oldTermIsObsolete)
+            {
+              System.err.println("Ignoring " + term.getName()
+                      + " as obsolete and duplicated by "
+                      + replaced.getName());
+              term = replaced;
+            }
+            else if (!newTermIsObsolete && oldTermIsObsolete)
+            {
+              System.err.println("Ignoring " + replaced.getName()
+                      + " as obsolete and duplicated by " + term.getName());
+            }
+            else
+            {
             System.err.println("Warning: " + term.getName()
                     + " has replaced " + replaced.getName()
                     + " for lookup of '" + description + "'");
+            }
           }
+          termsByDescription.put(description, term);
         }
       }
     }
   }
 
   /**
+   * Answers true if the term has property "is_obsolete" with value true, else
+   * false
+   * 
+   * @param term
+   * @return
+   */
+  public static boolean isObsolete(Term term)
+  {
+    Annotation ann = term.getAnnotation();
+    if (ann != null)
+    {
+      try
+      {
+      if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
+      {
+          return true;
+        }
+      } catch (NoSuchElementException e)
+      {
+        // fall through to false
+      }
+    }
+    return false;
+  }
+
+  /**
    * Test whether the given Sequence Ontology term is nucleotide_match (either
    * directly or via is_a relationship)
    * 
index f8ff6f2..6c9226f 100644 (file)
@@ -12,7 +12,11 @@ public class SequenceOntologyTest
 
   @BeforeMethod
   public void setUp() {
+    long now = System.currentTimeMillis();
     so = SequenceOntology.getInstance();
+    long elapsed = System.currentTimeMillis() - now;
+    System.out.println("Load and cache of Sequence Ontology took "
+            + elapsed + "ms");
   }
 
   @Test(groups = "Functional")