1 package jalview.io.gff;
3 import java.io.BufferedInputStream;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.text.ParseException;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.List;
13 import java.util.NoSuchElementException;
14 import java.util.zip.ZipEntry;
15 import java.util.zip.ZipInputStream;
17 import org.biojava.nbio.ontology.Ontology;
18 import org.biojava.nbio.ontology.Term;
19 import org.biojava.nbio.ontology.Term.Impl;
20 import org.biojava.nbio.ontology.Triple;
21 import org.biojava.nbio.ontology.io.OboParser;
24 * A wrapper class that parses the Sequence Ontology and exposes useful access
25 * methods. This version uses the BioJava parser.
27 public class SequenceOntology
29 private static SequenceOntology instance = new SequenceOntology();
31 private Ontology ontology;
36 * lookup of terms by user readable name (NB not guaranteed unique)
38 private Map<String, Term> termsByDescription;
41 * Map where key is a Term and value is a (possibly empty) list of
42 * all Terms to which the key has a direct 'isA' relationship
44 private Map<Term, List<Term>> termIsA;
46 public static SequenceOntology getInstance()
52 * Private constructor to enforce use of singleton. Parses and caches the SO
55 private SequenceOntology()
57 termsByDescription = new HashMap<String, Term>();
58 termIsA = new HashMap<Term, List<Term>>();
60 loadOntologyZipFile("so-xp-simple.obo");
64 * Loads the given ontology file from a zip file with ".zip" appended
68 protected void loadOntologyZipFile(String ontologyFile)
70 ZipInputStream zipStream = null;
73 InputStream inStream = this.getClass().getResourceAsStream(
74 "/" + ontologyFile + ".zip");
75 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
77 while ((entry = zipStream.getNextEntry()) != null)
79 if (entry.getName().equals(ontologyFile))
81 loadOboFile(zipStream);
89 closeStream(zipStream);
94 * Closes the input stream, swallowing all exceptions
98 protected void closeStream(InputStream is)
105 } catch (IOException e)
113 * Reads, parses and stores the OBO file data
116 * @throws ParseException
117 * @throws IOException
119 protected void loadOboFile(InputStream is) throws ParseException,
122 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
123 OboParser parser = new OboParser();
124 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
125 isA = ontology.getTerm("is_a");
130 * Store a lookup table of terms by their description. Note that description
131 * is not guaranteed unique - currently reporting 8 duplicates.
133 protected void storeTermNames()
135 for (Term term : ontology.getTerms())
137 if (term instanceof Impl)
139 String description = term.getDescription();
140 if (description != null)
142 // System.out.println(term.getName() + "=" + term.getDescription());
143 Term replaced = termsByDescription.put(description, term);
144 if (replaced != null)
146 System.err.println("Warning: " + term.getName()
147 + " has replaced " + replaced.getName()
148 + " for lookup of '" + description + "'");
156 * Test whether the given Sequence Ontology term is nucleotide_match (either
157 * directly or via is_a relationship)
160 * SO name or description
163 public boolean isNucleotideMatch(String soTerm)
165 return isA(soTerm, "nucleotide_match");
169 * Test whether the given Sequence Ontology term is protein_match (either
170 * directly or via is_a relationship)
173 * SO name or description
176 public boolean isProteinMatch(String soTerm)
178 return isA(soTerm, "protein_match");
182 * Test whether the given Sequence Ontology term is polypeptide (either
183 * directly or via is_a relationship)
186 * SO name or description
189 public boolean isPolypeptide(String soTerm)
191 return isA(soTerm, "polypeptide");
195 * Returns true if the given term has a (direct or indirect) 'isA'
196 * relationship with the parent
202 public boolean isA(String child, String parent)
204 Term childTerm = getTerm(child);
205 Term parentTerm = getTerm(parent);
207 return termIsA(childTerm, parentTerm);
211 * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
217 protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
220 * null term could arise from a misspelled SO description
222 if (childTerm == null || parentTerm == null)
228 * recursive search endpoint:
230 if (childTerm == parentTerm)
236 * lazy initialisation - find all of a term's parents (recursively)
237 * the first time this is called, and save them in a map.
239 if (!termIsA.containsKey(childTerm))
241 findParents(childTerm);
244 List<Term> parents = termIsA.get(childTerm);
245 for (Term parent : parents)
247 if (termIsA(parent, parentTerm))
257 * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
262 protected synchronized void findParents(Term childTerm)
264 List<Term> result = new ArrayList<Term>();
265 for (Triple triple : ontology.getTriples(childTerm, null, isA))
267 Term parent = triple.getObject();
271 * and search for the parent's parents recursively
275 termIsA.put(childTerm, result);
279 * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
280 * "sequence_location"), or null if not found.
285 protected Term getTerm(String nameOrDescription)
287 Term t = termsByDescription.get(nameOrDescription);
292 t = ontology.getTerm(nameOrDescription);
293 } catch (NoSuchElementException e)