From: gmungoc Date: Wed, 23 Dec 2015 09:47:52 +0000 (+0000) Subject: JAL-1191 zipped SO OBO file added to /resources, more tests added X-Git-Tag: Release_2_10_0~296^2~94 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=fb1ee2952cd17c79c0226c9f79a7a78e9df778a3;p=jalview.git JAL-1191 zipped SO OBO file added to /resources, more tests added --- diff --git a/resources/so-xp-simple.obo.zip b/resources/so-xp-simple.obo.zip new file mode 100644 index 0000000..d150da0 Binary files /dev/null and b/resources/so-xp-simple.obo.zip differ diff --git a/src/jalview/io/gff/SequenceOntology.java b/src/jalview/io/gff/SequenceOntology.java index 1d6e35b..a999410 100644 --- a/src/jalview/io/gff/SequenceOntology.java +++ b/src/jalview/io/gff/SequenceOntology.java @@ -1,14 +1,18 @@ package jalview.io.gff; +import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; import org.biojava.nbio.ontology.Ontology; import org.biojava.nbio.ontology.Term; @@ -53,36 +57,79 @@ public class SequenceOntology termsByDescription = new HashMap(); termIsA = new HashMap>(); - OboParser parser = new OboParser(); - InputStream inStream = null; + loadOntologyZipFile("so-xp-simple.obo"); + } + + /** + * Loads the given ontology file from a zip file with ".zip" appended + * + * @param ontologyFile + */ + protected void loadOntologyZipFile(String ontologyFile) + { + ZipInputStream zipStream = null; try { - inStream = this.getClass().getResourceAsStream("/so-xp-simple.obo"); - - BufferedReader oboFile = new BufferedReader(new InputStreamReader( - inStream)); - ontology = parser.parseOBO(oboFile, "SO", "the SO ontology"); - isA = ontology.getTerm("is_a"); - - storeTermNames(); + InputStream inStream = this.getClass().getResourceAsStream( + "/" + ontologyFile + ".zip"); + zipStream = new ZipInputStream(new BufferedInputStream(inStream)); + ZipEntry entry; + while ((entry = zipStream.getNextEntry()) != null) + { + if (entry.getName().equals(ontologyFile)) + { + loadOboFile(zipStream); + } + } } catch (Exception e) { e.printStackTrace(); } finally { - if (inStream != null) + closeStream(zipStream); + } + } + + /** + * Closes the input stream, swallowing all exceptions + * + * @param is + */ + protected void closeStream(InputStream is) + { + if (is != null) + { + try { - try - { - inStream.close(); - } catch (IOException e) - { - // ignore - } + is.close(); + } catch (IOException e) + { + // ignore } } } + /** + * Reads, parses and stores the OBO file data + * + * @param is + * @throws ParseException + * @throws IOException + */ + protected void loadOboFile(InputStream is) throws ParseException, + IOException + { + BufferedReader oboFile = new BufferedReader(new InputStreamReader(is)); + OboParser parser = new OboParser(); + ontology = parser.parseOBO(oboFile, "SO", "the SO ontology"); + isA = ontology.getTerm("is_a"); + storeTermNames(); + } + + /** + * Store a lookup table of terms by their description. Note that description + * is not guaranteed unique - currently reporting 8 duplicates. + */ protected void storeTermNames() { for (Term term : ontology.getTerms()) @@ -98,8 +145,7 @@ public class SequenceOntology { System.err.println("Warning: " + term.getName() + " has replaced " + replaced.getName() - + " for lookup of description " - + description); + + " for lookup of '" + description + "'"); } } } @@ -111,6 +157,7 @@ public class SequenceOntology * directly or via is_a relationship) * * @param soTerm + * SO name or description * @return */ public boolean isNucleotideMatch(String soTerm) @@ -123,6 +170,7 @@ public class SequenceOntology * directly or via is_a relationship) * * @param soTerm + * SO name or description * @return */ public boolean isProteinMatch(String soTerm) @@ -130,6 +178,14 @@ public class SequenceOntology return isA(soTerm, "protein_match"); } + /** + * Test whether the given Sequence Ontology term is polypeptide (either + * directly or via is_a relationship) + * + * @param soTerm + * SO name or description + * @return + */ public boolean isPolypeptide(String soTerm) { return isA(soTerm, "polypeptide"); @@ -161,7 +217,7 @@ public class SequenceOntology protected synchronized boolean termIsA(Term childTerm, Term parentTerm) { /* - * null child term arises from a misspelled SO description + * null term could arise from a misspelled SO description */ if (childTerm == null || parentTerm == null) { @@ -175,9 +231,10 @@ public class SequenceOntology { return true; } + /* - * lazy initialisation - find all of a term's parents the first - * time this is called, and save them in a map. + * lazy initialisation - find all of a term's parents (recursively) + * the first time this is called, and save them in a map. */ if (!termIsA.containsKey(childTerm)) { diff --git a/test/jalview/io/gff/SequenceOntologyTest.java b/test/jalview/io/gff/SequenceOntologyTest.java index 2fd3865..f8ff6f2 100644 --- a/test/jalview/io/gff/SequenceOntologyTest.java +++ b/test/jalview/io/gff/SequenceOntologyTest.java @@ -18,10 +18,17 @@ public class SequenceOntologyTest @Test(groups = "Functional") public void testIsA() { + assertFalse(so.isA(null, null)); + assertFalse(so.isA(null, "SO:0000087")); + assertFalse(so.isA("SO:0000087", null)); + assertFalse(so.isA("complete", "garbage")); + assertTrue(so.isA("SO:0000087", "SO:0000704")); assertFalse(so.isA("SO:0000704", "SO:0000087")); assertTrue(so.isA("SO:0000736", "SO:0000735")); + // same thing: + assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence")); // direct parent: assertTrue(so.isA("micronuclear_sequence", "organelle_sequence")); // grandparent: @@ -29,8 +36,7 @@ public class SequenceOntologyTest // great-grandparent: assertTrue(so.isA("micronuclear_sequence", "sequence_attribute")); - // same thing: - assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence")); + // same thing by name / description: assertTrue(so.isA("micronuclear_sequence", "SO:0000084")); assertTrue(so.isA("SO:0000084", "micronuclear_sequence")); assertTrue(so.isA("SO:0000084", "SO:0000084"));