1 package jalview.ext.so;
3 import jalview.io.gff.SequenceOntologyI;
5 import java.io.BufferedInputStream;
6 import java.io.BufferedReader;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.text.ParseException;
11 import java.util.ArrayList;
12 import java.util.Collections;
13 import java.util.HashMap;
14 import java.util.List;
16 import java.util.NoSuchElementException;
17 import java.util.zip.ZipEntry;
18 import java.util.zip.ZipInputStream;
20 import org.biojava.nbio.ontology.Ontology;
21 import org.biojava.nbio.ontology.Term;
22 import org.biojava.nbio.ontology.Term.Impl;
23 import org.biojava.nbio.ontology.Triple;
24 import org.biojava.nbio.ontology.io.OboParser;
25 import org.biojava.nbio.ontology.utils.Annotation;
28 * A wrapper class that parses the Sequence Ontology and exposes useful access
29 * methods. This version uses the BioJava parser.
31 public class SequenceOntology implements SequenceOntologyI
34 * the parsed Ontology data as modelled by BioJava
36 private Ontology ontology;
39 * the ontology term for the isA relationship
44 * lookup of terms by user readable name (NB not guaranteed unique)
46 private Map<String, Term> termsByDescription;
49 * Map where key is a Term and value is a (possibly empty) list of
50 * all Terms to which the key has an 'isA' relationship, either
51 * directly or indirectly (A isA B isA C)
53 private Map<Term, List<Term>> termIsA;
55 private List<String> termsFound;
57 private List<String> termsNotFound;
60 * Package private constructor to enforce use of singleton. Parses and caches
61 * the SO OBO data file.
63 public SequenceOntology()
65 termsFound = new ArrayList<String>();
66 termsNotFound = new ArrayList<String>();
67 termsByDescription = new HashMap<String, Term>();
68 termIsA = new HashMap<Term, List<Term>>();
70 loadOntologyZipFile("so-xp-simple.obo");
74 * Loads the given ontology file from a zip file with ".zip" appended
78 protected void loadOntologyZipFile(String ontologyFile)
80 long now = System.currentTimeMillis();
81 ZipInputStream zipStream = null;
84 String zipFile = ontologyFile + ".zip";
85 InputStream inStream = this.getClass().getResourceAsStream(
87 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
89 while ((entry = zipStream.getNextEntry()) != null)
91 if (entry.getName().equals(ontologyFile))
93 loadOboFile(zipStream);
96 long elapsed = System.currentTimeMillis() - now;
97 System.out.println("Loaded Sequence Ontology from " + zipFile + " ("
104 closeStream(zipStream);
109 * Closes the input stream, swallowing all exceptions
113 protected void closeStream(InputStream is)
120 } catch (IOException e)
128 * Reads, parses and stores the OBO file data
131 * @throws ParseException
132 * @throws IOException
134 protected void loadOboFile(InputStream is) throws ParseException,
137 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
138 OboParser parser = new OboParser();
139 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
140 isA = ontology.getTerm("is_a");
145 * Stores a lookup table of terms by description. Note that description is not
146 * guaranteed unique. Where duplicate descriptions are found, try to discard
147 * the term that is flagged as obsolete. However we do store obsolete terms
148 * where there is no duplication of description.
150 protected void storeTermNames()
152 for (Term term : ontology.getTerms())
154 if (term instanceof Impl)
156 String description = term.getDescription();
157 if (description != null)
159 Term replaced = termsByDescription.get(description);
160 if (replaced != null)
162 boolean newTermIsObsolete = isObsolete(term);
163 boolean oldTermIsObsolete = isObsolete(replaced);
164 if (newTermIsObsolete && !oldTermIsObsolete)
166 System.err.println("Ignoring " + term.getName()
167 + " as obsolete and duplicated by "
168 + replaced.getName());
171 else if (!newTermIsObsolete && oldTermIsObsolete)
173 System.err.println("Ignoring " + replaced.getName()
174 + " as obsolete and duplicated by " + term.getName());
178 System.err.println("Warning: " + term.getName()
179 + " has replaced " + replaced.getName()
180 + " for lookup of '" + description + "'");
183 termsByDescription.put(description, term);
190 * Answers true if the term has property "is_obsolete" with value true, else
196 public static boolean isObsolete(Term term)
198 Annotation ann = term.getAnnotation();
203 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
207 } catch (NoSuchElementException e)
209 // fall through to false
216 * Test whether the given Sequence Ontology term is nucleotide_match (either
217 * directly or via is_a relationship)
220 * SO name or description
223 public boolean isNucleotideMatch(String soTerm)
225 return isA(soTerm, NUCLEOTIDE_MATCH);
229 * Test whether the given Sequence Ontology term is protein_match (either
230 * directly or via is_a relationship)
233 * SO name or description
236 public boolean isProteinMatch(String soTerm)
238 return isA(soTerm, PROTEIN_MATCH);
242 * Test whether the given Sequence Ontology term is polypeptide (either
243 * directly or via is_a relationship)
246 * SO name or description
249 public boolean isPolypeptide(String soTerm)
251 return isA(soTerm, POLYPEPTIDE);
255 * Returns true if the given term has a (direct or indirect) 'isA'
256 * relationship with the parent
263 public boolean isA(String child, String parent)
265 if (child == null || parent == null)
270 * optimise trivial checks like isA("CDS", "CDS")
272 if (child.equals(parent))
278 Term childTerm = getTerm(child);
279 if (childTerm != null)
287 Term parentTerm = getTerm(parent);
289 return termIsA(childTerm, parentTerm);
293 * Records a valid term queried for, for reporting purposes
297 private void termFound(String term)
299 synchronized (termsFound)
301 if (!termsFound.contains(term))
303 termsFound.add(term);
309 * Records an invalid term queried for, for reporting purposes
313 private void termNotFound(String term)
315 synchronized (termsNotFound)
317 if (!termsNotFound.contains(term))
319 System.err.println("SO term " + term + " invalid");
320 termsNotFound.add(term);
326 * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
332 protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
335 * null term could arise from a misspelled SO description
337 if (childTerm == null || parentTerm == null)
343 * recursive search endpoint:
345 if (childTerm == parentTerm)
351 * lazy initialisation - find all of a term's parents (recursively)
352 * the first time this is called, and save them in a map.
354 if (!termIsA.containsKey(childTerm))
356 findParents(childTerm);
359 List<Term> parents = termIsA.get(childTerm);
360 for (Term parent : parents)
362 if (termIsA(parent, parentTerm))
365 * add (great-)grandparents to parents list as they are discovered,
366 * for faster lookup next time
368 if (!parents.contains(parentTerm))
370 parents.add(parentTerm);
380 * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
385 protected synchronized void findParents(Term childTerm)
387 List<Term> result = new ArrayList<Term>();
388 for (Triple triple : ontology.getTriples(childTerm, null, isA))
390 Term parent = triple.getObject();
394 * and search for the parent's parents recursively
398 termIsA.put(childTerm, result);
402 * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
403 * "sequence_location"), or null if not found.
408 protected Term getTerm(String nameOrDescription)
410 Term t = termsByDescription.get(nameOrDescription);
415 t = ontology.getTerm(nameOrDescription);
416 } catch (NoSuchElementException e)
424 public boolean isSequenceVariant(String soTerm)
426 return isA(soTerm, SEQUENCE_VARIANT);
430 * Sorts (case-insensitive) and returns the list of valid terms queried for
433 public List<String> termsFound()
435 synchronized (termsFound)
437 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
443 * Sorts (case-insensitive) and returns the list of invalid terms queried for
446 public List<String> termsNotFound()
448 synchronized (termsNotFound)
450 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
451 return termsNotFound;