2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.so;
23 import jalview.io.gff.SequenceOntologyI;
25 import java.io.BufferedInputStream;
26 import java.io.BufferedReader;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.InputStreamReader;
30 import java.text.ParseException;
31 import java.util.ArrayList;
32 import java.util.Collections;
33 import java.util.HashMap;
34 import java.util.List;
36 import java.util.NoSuchElementException;
37 import java.util.zip.ZipEntry;
38 import java.util.zip.ZipInputStream;
40 import org.biojava.nbio.ontology.Ontology;
41 import org.biojava.nbio.ontology.Term;
42 import org.biojava.nbio.ontology.Term.Impl;
43 import org.biojava.nbio.ontology.Triple;
44 import org.biojava.nbio.ontology.io.OboParser;
45 import org.biojava.nbio.ontology.utils.Annotation;
48 * A wrapper class that parses the Sequence Ontology and exposes useful access
49 * methods. This version uses the BioJava parser.
51 public class SequenceOntology implements SequenceOntologyI
54 * the parsed Ontology data as modelled by BioJava
56 private Ontology ontology;
59 * the ontology term for the isA relationship
64 * lookup of terms by user readable name (NB not guaranteed unique)
66 private Map<String, Term> termsByDescription;
69 * Map where key is a Term and value is a (possibly empty) list of
70 * all Terms to which the key has an 'isA' relationship, either
71 * directly or indirectly (A isA B isA C)
73 private Map<Term, List<Term>> termIsA;
75 private List<String> termsFound;
77 private List<String> termsNotFound;
80 * Package private constructor to enforce use of singleton. Parses and caches
81 * the SO OBO data file.
83 public SequenceOntology()
85 termsFound = new ArrayList<String>();
86 termsNotFound = new ArrayList<String>();
87 termsByDescription = new HashMap<String, Term>();
88 termIsA = new HashMap<Term, List<Term>>();
90 loadOntologyZipFile("so-xp-simple.obo");
94 * Loads the given ontology file from a zip file with ".zip" appended
98 protected void loadOntologyZipFile(String ontologyFile)
100 long now = System.currentTimeMillis();
101 ZipInputStream zipStream = null;
104 String zipFile = ontologyFile + ".zip";
105 InputStream inStream = this.getClass()
106 .getResourceAsStream("/" + zipFile);
107 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
109 while ((entry = zipStream.getNextEntry()) != null)
111 if (entry.getName().equals(ontologyFile))
113 loadOboFile(zipStream);
116 long elapsed = System.currentTimeMillis() - now;
117 System.out.println("Loaded Sequence Ontology from " + zipFile + " ("
119 } catch (Exception e)
124 closeStream(zipStream);
129 * Closes the input stream, swallowing all exceptions
133 protected void closeStream(InputStream is)
140 } catch (IOException e)
148 * Reads, parses and stores the OBO file data
151 * @throws ParseException
152 * @throws IOException
154 protected void loadOboFile(InputStream is)
155 throws ParseException, IOException
157 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
158 OboParser parser = new OboParser();
159 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
160 isA = ontology.getTerm("is_a");
165 * Stores a lookup table of terms by description. Note that description is not
166 * guaranteed unique. Where duplicate descriptions are found, try to discard
167 * the term that is flagged as obsolete. However we do store obsolete terms
168 * where there is no duplication of description.
170 protected void storeTermNames()
172 for (Term term : ontology.getTerms())
174 if (term instanceof Impl)
176 String description = term.getDescription();
177 if (description != null)
179 Term replaced = termsByDescription.get(description);
180 if (replaced != null)
182 boolean newTermIsObsolete = isObsolete(term);
183 boolean oldTermIsObsolete = isObsolete(replaced);
184 if (newTermIsObsolete && !oldTermIsObsolete)
186 System.err.println("Ignoring " + term.getName()
187 + " as obsolete and duplicated by "
188 + replaced.getName());
191 else if (!newTermIsObsolete && oldTermIsObsolete)
193 System.err.println("Ignoring " + replaced.getName()
194 + " as obsolete and duplicated by " + term.getName());
198 System.err.println("Warning: " + term.getName()
199 + " has replaced " + replaced.getName()
200 + " for lookup of '" + description + "'");
203 termsByDescription.put(description, term);
210 * Answers true if the term has property "is_obsolete" with value true, else
216 public static boolean isObsolete(Term term)
218 Annotation ann = term.getAnnotation();
223 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
227 } catch (NoSuchElementException e)
229 // fall through to false
236 * Test whether the given Sequence Ontology term is nucleotide_match (either
237 * directly or via is_a relationship)
240 * SO name or description
243 public boolean isNucleotideMatch(String soTerm)
245 return isA(soTerm, NUCLEOTIDE_MATCH);
249 * Test whether the given Sequence Ontology term is protein_match (either
250 * directly or via is_a relationship)
253 * SO name or description
256 public boolean isProteinMatch(String soTerm)
258 return isA(soTerm, PROTEIN_MATCH);
262 * Test whether the given Sequence Ontology term is polypeptide (either
263 * directly or via is_a relationship)
266 * SO name or description
269 public boolean isPolypeptide(String soTerm)
271 return isA(soTerm, POLYPEPTIDE);
275 * Returns true if the given term has a (direct or indirect) 'isA'
276 * relationship with the parent
283 public boolean isA(String child, String parent)
285 if (child == null || parent == null)
290 * optimise trivial checks like isA("CDS", "CDS")
292 if (child.equals(parent))
298 Term childTerm = getTerm(child);
299 if (childTerm != null)
307 Term parentTerm = getTerm(parent);
309 return termIsA(childTerm, parentTerm);
313 * Records a valid term queried for, for reporting purposes
317 private void termFound(String term)
319 synchronized (termsFound)
321 if (!termsFound.contains(term))
323 termsFound.add(term);
329 * Records an invalid term queried for, for reporting purposes
333 private void termNotFound(String term)
335 synchronized (termsNotFound)
337 if (!termsNotFound.contains(term))
339 System.err.println("SO term " + term + " invalid");
340 termsNotFound.add(term);
346 * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
352 protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
355 * null term could arise from a misspelled SO description
357 if (childTerm == null || parentTerm == null)
363 * recursive search endpoint:
365 if (childTerm == parentTerm)
371 * lazy initialisation - find all of a term's parents (recursively)
372 * the first time this is called, and save them in a map.
374 if (!termIsA.containsKey(childTerm))
376 findParents(childTerm);
379 List<Term> parents = termIsA.get(childTerm);
380 for (Term parent : parents)
382 if (termIsA(parent, parentTerm))
385 * add (great-)grandparents to parents list as they are discovered,
386 * for faster lookup next time
388 if (!parents.contains(parentTerm))
390 parents.add(parentTerm);
400 * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
405 protected synchronized void findParents(Term childTerm)
407 List<Term> result = new ArrayList<Term>();
408 for (Triple triple : ontology.getTriples(childTerm, null, isA))
410 Term parent = triple.getObject();
414 * and search for the parent's parents recursively
418 termIsA.put(childTerm, result);
422 * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
423 * "sequence_location"), or null if not found.
428 protected Term getTerm(String nameOrDescription)
430 Term t = termsByDescription.get(nameOrDescription);
435 t = ontology.getTerm(nameOrDescription);
436 } catch (NoSuchElementException e)
444 public boolean isSequenceVariant(String soTerm)
446 return isA(soTerm, SEQUENCE_VARIANT);
450 * Sorts (case-insensitive) and returns the list of valid terms queried for
453 public List<String> termsFound()
455 synchronized (termsFound)
457 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
463 * Sorts (case-insensitive) and returns the list of invalid terms queried for
466 public List<String> termsNotFound()
468 synchronized (termsNotFound)
470 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
471 return termsNotFound;