2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.so;
23 import jalview.datamodel.ontology.OntologyBase;
24 import jalview.io.gff.SequenceOntologyI;
26 import java.io.BufferedInputStream;
27 import java.io.BufferedReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.InputStreamReader;
31 import java.text.ParseException;
32 import java.util.ArrayList;
33 import java.util.Collections;
34 import java.util.HashMap;
35 import java.util.List;
37 import java.util.NoSuchElementException;
39 import java.util.zip.ZipEntry;
40 import java.util.zip.ZipInputStream;
42 import org.biojava.nbio.ontology.Ontology;
43 import org.biojava.nbio.ontology.Term;
44 import org.biojava.nbio.ontology.Term.Impl;
45 import org.biojava.nbio.ontology.Triple;
46 import org.biojava.nbio.ontology.io.OboParser;
47 import org.biojava.nbio.ontology.utils.Annotation;
50 * A wrapper class that parses the Sequence Ontology and exposes useful access
51 * methods. This version uses the BioJava parser.
53 public class SequenceOntology extends OntologyBase
54 implements SequenceOntologyI
57 * the parsed Ontology data as modelled by BioJava
59 private Ontology ontology;
62 * the ontology term for the isA relationship
67 * lookup of terms by user readable name (NB not guaranteed unique)
69 private Map<String, Term> termsByDescription;
72 * Map where key is a Term and value is a (possibly empty) list of
73 * all Terms to which the key has an 'isA' relationship, either
74 * directly or indirectly (A isA B isA C)
76 private Map<Term, List<Term>> termIsA;
78 private List<String> termsFound;
80 private List<String> termsNotFound;
83 * Package private constructor to enforce use of singleton. Parses and caches
84 * the SO OBO data file.
86 public SequenceOntology()
88 termsFound = new ArrayList<>();
89 termsNotFound = new ArrayList<>();
90 termsByDescription = new HashMap<>();
91 termIsA = new HashMap<>();
93 loadOntologyZipFile("so-xp-simple.obo");
97 * Loads the given ontology file from a zip file with ".zip" appended
101 protected void loadOntologyZipFile(String ontologyFile)
103 long now = System.currentTimeMillis();
104 ZipInputStream zipStream = null;
107 String zipFile = ontologyFile + ".zip";
108 InputStream inStream = this.getClass()
109 .getResourceAsStream("/" + zipFile);
110 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
112 while ((entry = zipStream.getNextEntry()) != null)
114 if (entry.getName().equals(ontologyFile))
116 loadOboFile(zipStream);
119 long elapsed = System.currentTimeMillis() - now;
120 System.out.println("Loaded Sequence Ontology from " + zipFile + " ("
122 } catch (Exception e)
127 closeStream(zipStream);
132 * Closes the input stream, swallowing all exceptions
136 protected void closeStream(InputStream is)
143 } catch (IOException e)
151 * Reads, parses and stores the OBO file data
154 * @throws ParseException
155 * @throws IOException
157 protected void loadOboFile(InputStream is)
158 throws ParseException, IOException
160 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
161 OboParser parser = new OboParser();
162 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
163 isA = ontology.getTerm("is_a");
168 * Stores a lookup table of terms by description. Note that description is not
169 * guaranteed unique. Where duplicate descriptions are found, try to discard
170 * the term that is flagged as obsolete. However we do store obsolete terms
171 * where there is no duplication of description.
173 protected void storeTermNames()
175 for (Term term : ontology.getTerms())
177 if (term instanceof Impl)
179 String description = term.getDescription();
180 if (description != null)
182 Term replaced = termsByDescription.get(description);
183 if (replaced != null)
185 boolean newTermIsObsolete = isObsolete(term);
186 boolean oldTermIsObsolete = isObsolete(replaced);
187 if (newTermIsObsolete && !oldTermIsObsolete)
189 System.err.println("Ignoring " + term.getName()
190 + " as obsolete and duplicated by "
191 + replaced.getName());
194 else if (!newTermIsObsolete && oldTermIsObsolete)
196 System.err.println("Ignoring " + replaced.getName()
197 + " as obsolete and duplicated by " + term.getName());
201 System.err.println("Warning: " + term.getName()
202 + " has replaced " + replaced.getName()
203 + " for lookup of '" + description + "'");
206 termsByDescription.put(description, term);
213 * Answers true if the term has property "is_obsolete" with value true, else
219 public static boolean isObsolete(Term term)
221 Annotation ann = term.getAnnotation();
226 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
230 } catch (NoSuchElementException e)
232 // fall through to false
239 * Test whether the given Sequence Ontology term is nucleotide_match (either
240 * directly or via is_a relationship)
243 * SO name or description
246 public boolean isNucleotideMatch(String soTerm)
248 return isA(soTerm, NUCLEOTIDE_MATCH);
252 * Test whether the given Sequence Ontology term is protein_match (either
253 * directly or via is_a relationship)
256 * SO name or description
259 public boolean isProteinMatch(String soTerm)
261 return isA(soTerm, PROTEIN_MATCH);
265 * Test whether the given Sequence Ontology term is polypeptide (either
266 * directly or via is_a relationship)
269 * SO name or description
272 public boolean isPolypeptide(String soTerm)
274 return isA(soTerm, POLYPEPTIDE);
278 * Returns true if the given term has a (direct or indirect) 'isA'
279 * relationship with the parent
286 public boolean isA(String child, String parent)
288 if (child == null || parent == null)
293 * optimise trivial checks like isA("CDS", "CDS")
295 if (child.equals(parent))
301 Term childTerm = getTerm(child);
302 if (childTerm != null)
310 Term parentTerm = getTerm(parent);
312 return termIsA(childTerm, parentTerm);
316 * Records a valid term queried for, for reporting purposes
320 private void termFound(String term)
322 synchronized (termsFound)
324 if (!termsFound.contains(term))
326 termsFound.add(term);
332 * Records an invalid term queried for, for reporting purposes
336 private void termNotFound(String term)
338 synchronized (termsNotFound)
340 if (!termsNotFound.contains(term))
342 System.err.println("SO term " + term + " invalid");
343 termsNotFound.add(term);
349 * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
355 protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
358 * null term could arise from a misspelled SO description
360 if (childTerm == null || parentTerm == null)
366 * recursive search endpoint:
368 if (childTerm == parentTerm)
374 * lazy initialisation - find all of a term's parents (recursively)
375 * the first time this is called, and save them in a map.
377 if (!termIsA.containsKey(childTerm))
379 findParents(childTerm);
382 List<Term> parents = termIsA.get(childTerm);
383 for (Term parent : parents)
385 if (termIsA(parent, parentTerm))
388 * add (great-)grandparents to parents list as they are discovered,
389 * for faster lookup next time
391 if (!parents.contains(parentTerm))
393 parents.add(parentTerm);
403 * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
408 protected synchronized void findParents(Term childTerm)
410 List<Term> result = new ArrayList<>();
411 for (Triple triple : ontology.getTriples(childTerm, null, isA))
413 Term parent = triple.getObject();
417 * and search for the parent's parents recursively
421 termIsA.put(childTerm, result);
425 * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
426 * "sequence_location"), or null if not found.
431 protected Term getTerm(String nameOrDescription)
433 Term t = termsByDescription.get(nameOrDescription);
438 t = ontology.getTerm(nameOrDescription);
439 } catch (NoSuchElementException e)
447 public boolean isSequenceVariant(String soTerm)
449 return isA(soTerm, SEQUENCE_VARIANT);
453 * Sorts (case-insensitive) and returns the list of valid terms queried for
456 public List<String> termsFound()
458 synchronized (termsFound)
460 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
466 * Sorts (case-insensitive) and returns the list of invalid terms queried for
469 public List<String> termsNotFound()
471 synchronized (termsNotFound)
473 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
474 return termsNotFound;
481 * @throws IllegalStateException
482 * if a loop is detected in the ontology
485 public List<String> getRootParents(final String term)
488 * check in cache first
490 if (rootParents.containsKey(term))
492 return rootParents.get(term);
494 Term t = getTerm(term);
501 * todo: check for loops using 'seen', allowing for alternate paths e.g.
502 * stop_gained isA feature_truncation isA feature_variant
503 * " isA nonsynonymous_variant ... isA geneVariant isA feature_variant
505 List<Term> seen = new ArrayList<>();
506 List<Term> top = new ArrayList<>();
507 List<Term> query = new ArrayList<>();
510 while (!query.isEmpty())
512 List<Term> nextQuery = new ArrayList<>();
515 Set<Triple> parents = ontology.getTriples(q, null, isA);
516 if (parents.isEmpty())
519 * q has no parents so is a top level term
526 * search all parent terms
528 for (Triple triple : parents)
530 Term parent = triple.getObject();
531 nextQuery.add(parent);
538 List<String> result = new ArrayList<>();
539 for (Term found : top)
541 String desc = found.getDescription();
542 if (!result.contains(desc))
549 * save result in cache
551 rootParents.put(term, result);