package jalview.io.gff;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+import org.biojava.nbio.ontology.Ontology;
+import org.biojava.nbio.ontology.Term;
+import org.biojava.nbio.ontology.Term.Impl;
+import org.biojava.nbio.ontology.Triple;
+import org.biojava.nbio.ontology.io.OboParser;
+
/**
* A wrapper class that parses the Sequence Ontology and exposes useful access
- * methods
+ * methods. This version uses the BioJava parser.
*/
public class SequenceOntology
{
private static SequenceOntology instance = new SequenceOntology();
+ private Ontology ontology;
+
+ private Term isA;
+
+ /*
+ * lookup of terms by user readable name (NB not guaranteed unique)
+ */
+ private Map<String, Term> termsByDescription;
+
+ /*
+ * Map where key is a Term and value is a (possibly empty) list of
+ * all Terms to which the key has a direct 'isA' relationship
+ */
+ private Map<Term, List<Term>> termIsA;
+
public static SequenceOntology getInstance()
{
return instance;
}
/**
- * Private constructor to enforce use of singleton.
+ * Private constructor to enforce use of singleton. Parses and caches the SO
+ * OBO data file.
*/
private SequenceOntology()
{
- // TODO: parse and cache so.obo data file e.g. using BioJava
+ termsByDescription = new HashMap<String, Term>();
+ termIsA = new HashMap<Term, List<Term>>();
+
+ OboParser parser = new OboParser();
+ InputStream inStream = null;
+ try
+ {
+ inStream = new FileInputStream(
+ "/Users/gmcarstairs/Documents/ontologies/so-xp-simple.obo");
+
+ BufferedReader oboFile = new BufferedReader(new InputStreamReader(
+ inStream));
+ ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
+ isA = ontology.getTerm("is_a");
+
+ storeTermNames();
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ } finally
+ {
+ if (inStream != null)
+ {
+ try
+ {
+ inStream.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
+ }
+ }
+
+ protected void storeTermNames()
+ {
+ for (Term term : ontology.getTerms())
+ {
+ if (term instanceof Impl)
+ {
+ String description = term.getDescription();
+ if (description != null)
+ {
+ // System.out.println(term.getName() + "=" + term.getDescription());
+ Term replaced = termsByDescription.put(description, term);
+ if (replaced != null)
+ {
+ System.err.println("Warning: " + term.getName()
+ + " has replaced " + replaced.getName()
+ + " for lookup of description "
+ + description);
+ }
+ }
+ }
+ }
}
/**
*/
public boolean isNucleotideMatch(String soTerm)
{
- // temporary until OBO parser is in place!
- // (which should also match SO ids e.g. "SO:0000347")
- String[] nucMatch = { "nucleotide_match", "primer_match",
- "cross_genome_match", "expressed_sequence_match",
- "translated_nucleotide_match", "UST_match", "RSF_match",
- "cDNA_match", "EST_match" };
- for (int i = 0; i < nucMatch.length; i++)
- {
- if (nucMatch[i].equals(soTerm))
- {
- return true;
- }
- }
- return false;
+ return isA(soTerm, "nucleotide_match");
}
/**
*/
public boolean isProteinMatch(String soTerm)
{
- // temporary until OBO parser is in place!
- return "protein_match".equals(soTerm)
- || "protein_hmm_match".equals(soTerm);
+ return isA(soTerm, "protein_match");
}
public boolean isPolypeptide(String soTerm)
{
- return "polypeptide".equals(soTerm);
+ return isA(soTerm, "polypeptide");
+ }
+
+ /**
+ * Returns true if the given term has a (direct or indirect) 'isA'
+ * relationship with the parent
+ *
+ * @param child
+ * @param parent
+ * @return
+ */
+ public boolean isA(String child, String parent)
+ {
+ Term childTerm = getTerm(child);
+ Term parentTerm = getTerm(parent);
+
+ return termIsA(childTerm, parentTerm);
+ }
+
+ /**
+ * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
+ *
+ * @param childTerm
+ * @param parentTerm
+ * @return
+ */
+ protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
+ {
+ /*
+ * null child term arises from a misspelled SO description
+ */
+ if (childTerm == null || parentTerm == null)
+ {
+ return false;
+ }
+
+ /*
+ * recursive search endpoint:
+ */
+ if (childTerm == parentTerm)
+ {
+ return true;
+ }
+ /*
+ * lazy initialisation - find all of a term's parents the first
+ * time this is called, and save them in a map.
+ */
+ if (!termIsA.containsKey(childTerm))
+ {
+ findParents(childTerm);
+ }
+
+ List<Term> parents = termIsA.get(childTerm);
+ for (Term parent : parents)
+ {
+ if (termIsA(parent, parentTerm))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
+ * empty) list.
+ *
+ * @param childTerm
+ */
+ protected synchronized void findParents(Term childTerm)
+ {
+ List<Term> result = new ArrayList<Term>();
+ for (Triple triple : ontology.getTriples(childTerm, null, isA))
+ {
+ Term parent = triple.getObject();
+ result.add(parent);
+
+ /*
+ * and search for the parent's parents recursively
+ */
+ findParents(parent);
+ }
+ termIsA.put(childTerm, result);
+ }
+
+ /**
+ * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
+ * "sequence_location"), or null if not found.
+ *
+ * @param child
+ * @return
+ */
+ protected Term getTerm(String nameOrDescription)
+ {
+ Term t = termsByDescription.get(nameOrDescription);
+ if (t == null)
+ {
+ try
+ {
+ t = ontology.getTerm(nameOrDescription);
+ } catch (NoSuchElementException e)
+ {
+ // not found
+ }
+ }
+ return t;
}
}
--- /dev/null
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class SequenceOntologyTest
+{
+ private SequenceOntology so;
+
+ @BeforeMethod
+ public void setUp() {
+ so = SequenceOntology.getInstance();
+ }
+
+ @Test(groups = "Functional")
+ public void testTermIsA()
+ {
+ assertTrue(so.isA("SO:0000087", "SO:0000704"));
+ assertFalse(so.isA("SO:0000704", "SO:0000087"));
+ assertTrue(so.isA("SO:0000736", "SO:0000735"));
+
+ // direct parent:
+ assertTrue(so.isA("micronuclear_sequence", "organelle_sequence"));
+ // grandparent:
+ assertTrue(so.isA("micronuclear_sequence", "sequence_location"));
+ // great-grandparent:
+ assertTrue(so.isA("micronuclear_sequence", "sequence_attribute"));
+
+ // same thing:
+ assertTrue(so.isA("micronuclear_sequence", "SO:0000084"));
+ }
+
+ @Test(groups = "Functional")
+ public void testIsProteinMatch()
+ {
+ assertTrue(so.isProteinMatch("protein_match"));
+ assertTrue(so.isProteinMatch("protein_hmm_match"));
+ assertFalse(so.isProteinMatch("Protein_match")); // case-sensitive
+ }
+
+ @Test(groups = "Functional")
+ public void testIsNucleotideMatch()
+ {
+ assertTrue(so.isNucleotideMatch("nucleotide_match"));
+ assertTrue(so.isNucleotideMatch("primer_match"));
+ assertTrue(so.isNucleotideMatch("cross_genome_match"));
+ assertTrue(so.isNucleotideMatch("expressed_sequence_match"));
+ assertTrue(so.isNucleotideMatch("translated_nucleotide_match"));
+ assertTrue(so.isNucleotideMatch("UST_match"));
+ assertTrue(so.isNucleotideMatch("RST_match"));
+ assertTrue(so.isNucleotideMatch("cDNA_match"));
+ assertTrue(so.isNucleotideMatch("EST_match"));
+ assertFalse(so.isNucleotideMatch("match")); // parent
+ }
+
+ @Test(groups = "Functional")
+ public void testIsCDS()
+ {
+ assertTrue(so.isA("CDS", "CDS"));
+ assertTrue(so.isA("CDS_predicted", "CDS"));
+ assertTrue(so.isA("transposable_element_CDS", "CDS"));
+ assertTrue(so.isA("edited_CDS", "CDS"));
+ assertTrue(so.isA("CDS_independently_known", "CDS"));
+ assertTrue(so.isA("CDS_fragment", "CDS"));
+ assertFalse(so.isA("CDS_region", "CDS"));// part_of
+ assertFalse(so.isA("polypeptide", "CDS")); // derives_from
+ }
+}