1 package jalview.io.gff;
3 import java.io.BufferedInputStream;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.text.ParseException;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.List;
13 import java.util.NoSuchElementException;
14 import java.util.zip.ZipEntry;
15 import java.util.zip.ZipInputStream;
17 import org.biojava.nbio.ontology.Ontology;
18 import org.biojava.nbio.ontology.Term;
19 import org.biojava.nbio.ontology.Term.Impl;
20 import org.biojava.nbio.ontology.Triple;
21 import org.biojava.nbio.ontology.io.OboParser;
22 import org.biojava.nbio.ontology.utils.Annotation;
25 * A wrapper class that parses the Sequence Ontology and exposes useful access
26 * methods. This version uses the BioJava parser.
28 public class SequenceOntology
32 * selected commonly used values for quick reference
35 public static final String CDS = "CDS";
38 public static final String SEQUENCE_VARIANT = "sequence_variant";
41 public static final String EXON = "exon";
44 public static final String TRANSCRIPT = "transcript";
47 public static final String GENE = "gene";
50 * singleton instance of this class
52 private static SequenceOntology instance;
55 * the parsed Ontology data as modelled by BioJava
57 private Ontology ontology;
60 * the ontology term for the isA relationship
65 * lookup of terms by user readable name (NB not guaranteed unique)
67 private Map<String, Term> termsByDescription;
70 * Map where key is a Term and value is a (possibly empty) list of
71 * all Terms to which the key has an 'isA' relationship, either
72 * directly or indirectly (A isA B isA C)
74 private Map<Term, List<Term>> termIsA;
77 * Returns singleton instance
81 public synchronized static SequenceOntology getInstance()
85 instance = new SequenceOntology();
91 * Private constructor to enforce use of singleton. Parses and caches the SO
94 private SequenceOntology()
96 termsByDescription = new HashMap<String, Term>();
97 termIsA = new HashMap<Term, List<Term>>();
99 loadOntologyZipFile("so-xp-simple.obo");
103 * Loads the given ontology file from a zip file with ".zip" appended
105 * @param ontologyFile
107 protected void loadOntologyZipFile(String ontologyFile)
109 ZipInputStream zipStream = null;
112 String zipFile = ontologyFile + ".zip";
113 System.out.println("Loading Sequence Ontology from " + zipFile);
114 InputStream inStream = this.getClass().getResourceAsStream(
116 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
118 while ((entry = zipStream.getNextEntry()) != null)
120 if (entry.getName().equals(ontologyFile))
122 loadOboFile(zipStream);
125 } catch (Exception e)
130 closeStream(zipStream);
135 * Closes the input stream, swallowing all exceptions
139 protected void closeStream(InputStream is)
146 } catch (IOException e)
154 * Reads, parses and stores the OBO file data
157 * @throws ParseException
158 * @throws IOException
160 protected void loadOboFile(InputStream is) throws ParseException,
163 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
164 OboParser parser = new OboParser();
165 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
166 isA = ontology.getTerm("is_a");
171 * Stores a lookup table of terms by description. Note that description is not
172 * guaranteed unique. Where duplicate descriptions are found, try to discard
173 * the term that is flagged as obsolete. However we do store obsolete terms
174 * where there is no duplication of description.
176 protected void storeTermNames()
178 for (Term term : ontology.getTerms())
180 if (term instanceof Impl)
182 String description = term.getDescription();
183 if (description != null)
185 Term replaced = termsByDescription.get(description);
186 if (replaced != null)
188 boolean newTermIsObsolete = isObsolete(term);
189 boolean oldTermIsObsolete = isObsolete(replaced);
190 if (newTermIsObsolete && !oldTermIsObsolete)
192 System.err.println("Ignoring " + term.getName()
193 + " as obsolete and duplicated by "
194 + replaced.getName());
197 else if (!newTermIsObsolete && oldTermIsObsolete)
199 System.err.println("Ignoring " + replaced.getName()
200 + " as obsolete and duplicated by " + term.getName());
204 System.err.println("Warning: " + term.getName()
205 + " has replaced " + replaced.getName()
206 + " for lookup of '" + description + "'");
209 termsByDescription.put(description, term);
216 * Answers true if the term has property "is_obsolete" with value true, else
222 public static boolean isObsolete(Term term)
224 Annotation ann = term.getAnnotation();
229 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
233 } catch (NoSuchElementException e)
235 // fall through to false
242 * Test whether the given Sequence Ontology term is nucleotide_match (either
243 * directly or via is_a relationship)
246 * SO name or description
249 public boolean isNucleotideMatch(String soTerm)
251 return isA(soTerm, "nucleotide_match");
255 * Test whether the given Sequence Ontology term is protein_match (either
256 * directly or via is_a relationship)
259 * SO name or description
262 public boolean isProteinMatch(String soTerm)
264 return isA(soTerm, "protein_match");
268 * Test whether the given Sequence Ontology term is polypeptide (either
269 * directly or via is_a relationship)
272 * SO name or description
275 public boolean isPolypeptide(String soTerm)
277 return isA(soTerm, "polypeptide");
281 * Returns true if the given term has a (direct or indirect) 'isA'
282 * relationship with the parent
288 public boolean isA(String child, String parent)
291 * optimise trivial checks like isA("CDS", "CDS")
293 if (child.equals(parent))
298 Term childTerm = getTerm(child);
299 Term parentTerm = getTerm(parent);
301 return termIsA(childTerm, parentTerm);
305 * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
311 protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
314 * null term could arise from a misspelled SO description
316 if (childTerm == null || parentTerm == null)
322 * recursive search endpoint:
324 if (childTerm == parentTerm)
330 * lazy initialisation - find all of a term's parents (recursively)
331 * the first time this is called, and save them in a map.
333 if (!termIsA.containsKey(childTerm))
335 findParents(childTerm);
338 List<Term> parents = termIsA.get(childTerm);
339 for (Term parent : parents)
341 if (termIsA(parent, parentTerm))
344 * add (great-)grandparents to parents list as they are discovered,
345 * for faster lookup next time
347 if (!parents.contains(parentTerm))
349 parents.add(parentTerm);
359 * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
364 protected synchronized void findParents(Term childTerm)
366 List<Term> result = new ArrayList<Term>();
367 for (Triple triple : ontology.getTriples(childTerm, null, isA))
369 Term parent = triple.getObject();
373 * and search for the parent's parents recursively
377 termIsA.put(childTerm, result);
381 * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
382 * "sequence_location"), or null if not found.
387 protected Term getTerm(String nameOrDescription)
389 Term t = termsByDescription.get(nameOrDescription);
394 t = ontology.getTerm(nameOrDescription);
395 } catch (NoSuchElementException e)
403 public boolean isSequenceVariant(String soTerm)
405 return isA(soTerm, "sequence_variant");