JAL-1191 zipped SO OBO file added to /resources, more tests added
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 23 Dec 2015 09:47:52 +0000 (09:47 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Wed, 23 Dec 2015 09:47:52 +0000 (09:47 +0000)
resources/so-xp-simple.obo.zip [new file with mode: 0644]
src/jalview/io/gff/SequenceOntology.java
test/jalview/io/gff/SequenceOntologyTest.java

diff --git a/resources/so-xp-simple.obo.zip b/resources/so-xp-simple.obo.zip
new file mode 100644 (file)
index 0000000..d150da0
Binary files /dev/null and b/resources/so-xp-simple.obo.zip differ
index 1d6e35b..a999410 100644 (file)
@@ -1,14 +1,18 @@
 package jalview.io.gff;
 
+import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
 
 import org.biojava.nbio.ontology.Ontology;
 import org.biojava.nbio.ontology.Term;
@@ -53,36 +57,79 @@ public class SequenceOntology
     termsByDescription = new HashMap<String, Term>();
     termIsA = new HashMap<Term, List<Term>>();
 
-    OboParser parser = new OboParser();
-    InputStream inStream = null;
+    loadOntologyZipFile("so-xp-simple.obo");
+  }
+
+  /**
+   * Loads the given ontology file from a zip file with ".zip" appended
+   * 
+   * @param ontologyFile
+   */
+  protected void loadOntologyZipFile(String ontologyFile)
+  {
+    ZipInputStream zipStream = null;
     try
     {
-      inStream = this.getClass().getResourceAsStream("/so-xp-simple.obo");
-
-      BufferedReader oboFile = new BufferedReader(new InputStreamReader(
-              inStream));
-      ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
-      isA = ontology.getTerm("is_a");
-
-      storeTermNames();
+      InputStream inStream = this.getClass().getResourceAsStream(
+              "/" + ontologyFile + ".zip");
+      zipStream = new ZipInputStream(new BufferedInputStream(inStream));
+      ZipEntry entry;
+      while ((entry = zipStream.getNextEntry()) != null)
+      {
+        if (entry.getName().equals(ontologyFile))
+        {
+          loadOboFile(zipStream);
+        }
+      }
     } catch (Exception e)
     {
       e.printStackTrace();
     } finally
     {
-      if (inStream != null)
+      closeStream(zipStream);
+    }
+  }
+
+  /**
+   * Closes the input stream, swallowing all exceptions
+   * 
+   * @param is
+   */
+  protected void closeStream(InputStream is)
+  {
+    if (is != null)
+    {
+      try
       {
-        try
-        {
-          inStream.close();
-        } catch (IOException e)
-        {
-          // ignore
-        }
+        is.close();
+      } catch (IOException e)
+      {
+        // ignore
       }
     }
   }
 
+  /**
+   * Reads, parses and stores the OBO file data
+   * 
+   * @param is
+   * @throws ParseException
+   * @throws IOException
+   */
+  protected void loadOboFile(InputStream is) throws ParseException,
+          IOException
+  {
+    BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
+    OboParser parser = new OboParser();
+    ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
+    isA = ontology.getTerm("is_a");
+    storeTermNames();
+  }
+
+  /**
+   * Store a lookup table of terms by their description. Note that description
+   * is not guaranteed unique - currently reporting 8 duplicates.
+   */
   protected void storeTermNames()
   {
     for (Term term : ontology.getTerms())
@@ -98,8 +145,7 @@ public class SequenceOntology
           {
             System.err.println("Warning: " + term.getName()
                     + " has replaced " + replaced.getName()
-                    + " for lookup of description "
-                    + description);
+                    + " for lookup of '" + description + "'");
           }
         }
       }
@@ -111,6 +157,7 @@ public class SequenceOntology
    * directly or via is_a relationship)
    * 
    * @param soTerm
+   *          SO name or description
    * @return
    */
   public boolean isNucleotideMatch(String soTerm)
@@ -123,6 +170,7 @@ public class SequenceOntology
    * directly or via is_a relationship)
    * 
    * @param soTerm
+   *          SO name or description
    * @return
    */
   public boolean isProteinMatch(String soTerm)
@@ -130,6 +178,14 @@ public class SequenceOntology
     return isA(soTerm, "protein_match");
   }
 
+  /**
+   * Test whether the given Sequence Ontology term is polypeptide (either
+   * directly or via is_a relationship)
+   * 
+   * @param soTerm
+   *          SO name or description
+   * @return
+   */
   public boolean isPolypeptide(String soTerm)
   {
     return isA(soTerm, "polypeptide");
@@ -161,7 +217,7 @@ public class SequenceOntology
   protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
   {
     /*
-     * null child term arises from a misspelled SO description
+     * null term could arise from a misspelled SO description
      */
     if (childTerm == null || parentTerm == null)
     {
@@ -175,9 +231,10 @@ public class SequenceOntology
     {
       return true;
     }
+
     /*
-     * lazy initialisation - find all of a term's parents the first
-     * time this is called, and save them in a map.
+     * lazy initialisation - find all of a term's parents (recursively) 
+     * the first time this is called, and save them in a map.
      */
     if (!termIsA.containsKey(childTerm))
     {
index 2fd3865..f8ff6f2 100644 (file)
@@ -18,10 +18,17 @@ public class SequenceOntologyTest
   @Test(groups = "Functional")
   public void testIsA()
   {
+    assertFalse(so.isA(null, null));
+    assertFalse(so.isA(null, "SO:0000087"));
+    assertFalse(so.isA("SO:0000087", null));
+    assertFalse(so.isA("complete", "garbage"));
+
     assertTrue(so.isA("SO:0000087", "SO:0000704"));
     assertFalse(so.isA("SO:0000704", "SO:0000087"));
     assertTrue(so.isA("SO:0000736", "SO:0000735"));
 
+    // same thing:
+    assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence"));
     // direct parent:
     assertTrue(so.isA("micronuclear_sequence", "organelle_sequence"));
     // grandparent:
@@ -29,8 +36,7 @@ public class SequenceOntologyTest
     // great-grandparent:
     assertTrue(so.isA("micronuclear_sequence", "sequence_attribute"));
 
-    // same thing:
-    assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence"));
+    // same thing by name / description:
     assertTrue(so.isA("micronuclear_sequence", "SO:0000084"));
     assertTrue(so.isA("SO:0000084", "micronuclear_sequence"));
     assertTrue(so.isA("SO:0000084", "SO:0000084"));