package jalview.io.gff;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
import org.biojava.nbio.ontology.Ontology;
import org.biojava.nbio.ontology.Term;
import org.biojava.nbio.ontology.Term.Impl;
import org.biojava.nbio.ontology.Triple;
import org.biojava.nbio.ontology.io.OboParser;
+import org.biojava.nbio.ontology.utils.Annotation;
/**
* A wrapper class that parses the Sequence Ontology and exposes useful access
*/
public class SequenceOntology
{
- private static SequenceOntology instance = new SequenceOntology();
+ /*
+ * selected commonly used values for quick reference
+ */
+ // SO:0000316
+ public static final String CDS = "CDS";
+
+ // SO:0001060
+ public static final String SEQUENCE_VARIANT = "sequence_variant";
+
+ // SO:0000147
+ public static final String EXON = "exon";
+
+ // SO:0000673
+ public static final String TRANSCRIPT = "transcript";
+
+ /*
+ * singleton instance of this class
+ */
+ private static SequenceOntology instance;
+
+ /*
+ * the parsed Ontology data as modelled by BioJava
+ */
private Ontology ontology;
+ /*
+ * the ontology term for the isA relationship
+ */
private Term isA;
/*
/*
* Map where key is a Term and value is a (possibly empty) list of
- * all Terms to which the key has a direct 'isA' relationship
+ * all Terms to which the key has an 'isA' relationship, either
+ * directly or indirectly (A isA B isA C)
*/
private Map<Term, List<Term>> termIsA;
- public static SequenceOntology getInstance()
+ /**
+ * Returns singleton instance
+ *
+ * @return
+ */
+ public synchronized static SequenceOntology getInstance()
{
+ if (instance == null)
+ {
+ instance = new SequenceOntology();
+ }
return instance;
}
termsByDescription = new HashMap<String, Term>();
termIsA = new HashMap<Term, List<Term>>();
- OboParser parser = new OboParser();
- InputStream inStream = null;
+ loadOntologyZipFile("so-xp-simple.obo");
+ }
+
+ /**
+ * Loads the given ontology file from a zip file with ".zip" appended
+ *
+ * @param ontologyFile
+ */
+ protected void loadOntologyZipFile(String ontologyFile)
+ {
+ ZipInputStream zipStream = null;
try
{
- inStream = this.getClass().getResourceAsStream("/so-xp-simple.obo");
-
- BufferedReader oboFile = new BufferedReader(new InputStreamReader(
- inStream));
- ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
- isA = ontology.getTerm("is_a");
-
- storeTermNames();
+ String zipFile = ontologyFile + ".zip";
+ System.out.println("Loading Sequence Ontology from " + zipFile);
+ InputStream inStream = this.getClass().getResourceAsStream(
+ "/" + zipFile);
+ zipStream = new ZipInputStream(new BufferedInputStream(inStream));
+ ZipEntry entry;
+ while ((entry = zipStream.getNextEntry()) != null)
+ {
+ if (entry.getName().equals(ontologyFile))
+ {
+ loadOboFile(zipStream);
+ }
+ }
} catch (Exception e)
{
e.printStackTrace();
} finally
{
- if (inStream != null)
+ closeStream(zipStream);
+ }
+ }
+
+ /**
+ * Closes the input stream, swallowing all exceptions
+ *
+ * @param is
+ */
+ protected void closeStream(InputStream is)
+ {
+ if (is != null)
+ {
+ try
{
- try
- {
- inStream.close();
- } catch (IOException e)
- {
- // ignore
- }
+ is.close();
+ } catch (IOException e)
+ {
+ // ignore
}
}
}
+ /**
+ * Reads, parses and stores the OBO file data
+ *
+ * @param is
+ * @throws ParseException
+ * @throws IOException
+ */
+ protected void loadOboFile(InputStream is) throws ParseException,
+ IOException
+ {
+ BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
+ OboParser parser = new OboParser();
+ ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
+ isA = ontology.getTerm("is_a");
+ storeTermNames();
+ }
+
+ /**
+ * Stores a lookup table of terms by description. Note that description is not
+ * guaranteed unique. Where duplicate descriptions are found, try to discard
+ * the term that is flagged as obsolete. However we do store obsolete terms
+ * where there is no duplication of description.
+ */
protected void storeTermNames()
{
for (Term term : ontology.getTerms())
String description = term.getDescription();
if (description != null)
{
- // System.out.println(term.getName() + "=" + term.getDescription());
- Term replaced = termsByDescription.put(description, term);
+ Term replaced = termsByDescription.get(description);
if (replaced != null)
{
+ boolean newTermIsObsolete = isObsolete(term);
+ boolean oldTermIsObsolete = isObsolete(replaced);
+ if (newTermIsObsolete && !oldTermIsObsolete)
+ {
+ System.err.println("Ignoring " + term.getName()
+ + " as obsolete and duplicated by "
+ + replaced.getName());
+ term = replaced;
+ }
+ else if (!newTermIsObsolete && oldTermIsObsolete)
+ {
+ System.err.println("Ignoring " + replaced.getName()
+ + " as obsolete and duplicated by " + term.getName());
+ }
+ else
+ {
System.err.println("Warning: " + term.getName()
+ " has replaced " + replaced.getName()
- + " for lookup of description "
- + description);
+ + " for lookup of '" + description + "'");
+ }
}
+ termsByDescription.put(description, term);
+ }
+ }
+ }
+ }
+
+ /**
+ * Answers true if the term has property "is_obsolete" with value true, else
+ * false
+ *
+ * @param term
+ * @return
+ */
+ public static boolean isObsolete(Term term)
+ {
+ Annotation ann = term.getAnnotation();
+ if (ann != null)
+ {
+ try
+ {
+ if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
+ {
+ return true;
}
+ } catch (NoSuchElementException e)
+ {
+ // fall through to false
}
}
+ return false;
}
/**
* directly or via is_a relationship)
*
* @param soTerm
+ * SO name or description
* @return
*/
public boolean isNucleotideMatch(String soTerm)
* directly or via is_a relationship)
*
* @param soTerm
+ * SO name or description
* @return
*/
public boolean isProteinMatch(String soTerm)
return isA(soTerm, "protein_match");
}
+ /**
+ * Test whether the given Sequence Ontology term is polypeptide (either
+ * directly or via is_a relationship)
+ *
+ * @param soTerm
+ * SO name or description
+ * @return
+ */
public boolean isPolypeptide(String soTerm)
{
return isA(soTerm, "polypeptide");
*/
public boolean isA(String child, String parent)
{
+ /*
+ * optimise trivial checks like isA("CDS", "CDS")
+ */
+ if (child.equals(parent))
+ {
+ return true;
+ }
+
Term childTerm = getTerm(child);
Term parentTerm = getTerm(parent);
protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
{
/*
- * null child term arises from a misspelled SO description
+ * null term could arise from a misspelled SO description
*/
if (childTerm == null || parentTerm == null)
{
{
return true;
}
+
/*
- * lazy initialisation - find all of a term's parents the first
- * time this is called, and save them in a map.
+ * lazy initialisation - find all of a term's parents (recursively)
+ * the first time this is called, and save them in a map.
*/
if (!termIsA.containsKey(childTerm))
{
{
if (termIsA(parent, parentTerm))
{
+ /*
+ * add (great-)grandparents to parents list as they are discovered,
+ * for faster lookup next time
+ */
+ if (!parents.contains(parentTerm))
+ {
+ parents.add(parentTerm);
+ }
return true;
}
}
}
return t;
}
+
+ public boolean isSequenceVariant(String soTerm)
+ {
+ return isA(soTerm, "sequence_variant");
+ }
}