import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.biojava.nbio.ontology.Term.Impl;
import org.biojava.nbio.ontology.Triple;
import org.biojava.nbio.ontology.io.OboParser;
+import org.biojava.nbio.ontology.utils.Annotation;
/**
* A wrapper class that parses the Sequence Ontology and exposes useful access
* methods. This version uses the BioJava parser.
*/
-public class SequenceOntology
+class SequenceOntology implements SequenceOntologyI
{
- private static SequenceOntology instance = new SequenceOntology();
-
+ /*
+ * the parsed Ontology data as modelled by BioJava
+ */
private Ontology ontology;
+ /*
+ * the ontology term for the isA relationship
+ */
private Term isA;
/*
/*
* Map where key is a Term and value is a (possibly empty) list of
- * all Terms to which the key has a direct 'isA' relationship
+ * all Terms to which the key has an 'isA' relationship, either
+ * directly or indirectly (A isA B isA C)
*/
private Map<Term, List<Term>> termIsA;
- public static SequenceOntology getInstance()
- {
- return instance;
- }
+ private List<String> termsFound;
+
+ private List<String> termsNotFound;
/**
- * Private constructor to enforce use of singleton. Parses and caches the SO
- * OBO data file.
+ * Package private constructor to enforce use of singleton. Parses and caches
+ * the SO OBO data file.
*/
- private SequenceOntology()
+ SequenceOntology()
{
+ termsFound = new ArrayList<String>();
+ termsNotFound = new ArrayList<String>();
termsByDescription = new HashMap<String, Term>();
termIsA = new HashMap<Term, List<Term>>();
ZipInputStream zipStream = null;
try
{
+ String zipFile = ontologyFile + ".zip";
+ System.out.println("Loading Sequence Ontology from " + zipFile);
InputStream inStream = this.getClass().getResourceAsStream(
- "/" + ontologyFile + ".zip");
+ "/" + zipFile);
zipStream = new ZipInputStream(new BufferedInputStream(inStream));
ZipEntry entry;
while ((entry = zipStream.getNextEntry()) != null)
}
/**
- * Store a lookup table of terms by their description. Note that description
- * is not guaranteed unique - currently reporting 8 duplicates.
+ * Stores a lookup table of terms by description. Note that description is not
+ * guaranteed unique. Where duplicate descriptions are found, try to discard
+ * the term that is flagged as obsolete. However we do store obsolete terms
+ * where there is no duplication of description.
*/
protected void storeTermNames()
{
String description = term.getDescription();
if (description != null)
{
- // System.out.println(term.getName() + "=" + term.getDescription());
- Term replaced = termsByDescription.put(description, term);
+ Term replaced = termsByDescription.get(description);
if (replaced != null)
{
+ boolean newTermIsObsolete = isObsolete(term);
+ boolean oldTermIsObsolete = isObsolete(replaced);
+ if (newTermIsObsolete && !oldTermIsObsolete)
+ {
+ System.err.println("Ignoring " + term.getName()
+ + " as obsolete and duplicated by "
+ + replaced.getName());
+ term = replaced;
+ }
+ else if (!newTermIsObsolete && oldTermIsObsolete)
+ {
+ System.err.println("Ignoring " + replaced.getName()
+ + " as obsolete and duplicated by " + term.getName());
+ }
+ else
+ {
System.err.println("Warning: " + term.getName()
+ " has replaced " + replaced.getName()
+ " for lookup of '" + description + "'");
+ }
}
+ termsByDescription.put(description, term);
}
}
}
}
/**
+ * Answers true if the term has property "is_obsolete" with value true, else
+ * false
+ *
+ * @param term
+ * @return
+ */
+ public static boolean isObsolete(Term term)
+ {
+ Annotation ann = term.getAnnotation();
+ if (ann != null)
+ {
+ try
+ {
+ if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
+ {
+ return true;
+ }
+ } catch (NoSuchElementException e)
+ {
+ // fall through to false
+ }
+ }
+ return false;
+ }
+
+ /**
* Test whether the given Sequence Ontology term is nucleotide_match (either
* directly or via is_a relationship)
*
*/
public boolean isNucleotideMatch(String soTerm)
{
- return isA(soTerm, "nucleotide_match");
+ return isA(soTerm, NUCLEOTIDE_MATCH);
}
/**
*/
public boolean isProteinMatch(String soTerm)
{
- return isA(soTerm, "protein_match");
+ return isA(soTerm, PROTEIN_MATCH);
}
/**
*/
public boolean isPolypeptide(String soTerm)
{
- return isA(soTerm, "polypeptide");
+ return isA(soTerm, POLYPEPTIDE);
}
/**
* @param parent
* @return
*/
+ @Override
public boolean isA(String child, String parent)
{
+ if (child == null || parent == null)
+ {
+ return false;
+ }
+ /*
+ * optimise trivial checks like isA("CDS", "CDS")
+ */
+ if (child.equals(parent))
+ {
+ termFound(child);
+ return true;
+ }
+
Term childTerm = getTerm(child);
+ if (childTerm != null)
+ {
+ termFound(child);
+ }
+ else
+ {
+ termNotFound(child);
+ }
Term parentTerm = getTerm(parent);
return termIsA(childTerm, parentTerm);
}
/**
+ * Records a valid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termFound(String term)
+ {
+ synchronized (termsFound)
+ {
+ if (!termsFound.contains(term))
+ {
+ termsFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Records an invalid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termNotFound(String term)
+ {
+ synchronized (termsNotFound)
+ {
+ if (!termsNotFound.contains(term))
+ {
+ System.err.println("SO term " + term + " invalid");
+ termsNotFound.add(term);
+ }
+ }
+ }
+
+ /**
* Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
*
* @param childTerm
{
if (termIsA(parent, parentTerm))
{
+ /*
+ * add (great-)grandparents to parents list as they are discovered,
+ * for faster lookup next time
+ */
+ if (!parents.contains(parentTerm))
+ {
+ parents.add(parentTerm);
+ }
return true;
}
}
}
return t;
}
+
+ public boolean isSequenceVariant(String soTerm)
+ {
+ return isA(soTerm, SEQUENCE_VARIANT);
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of valid terms queried for
+ */
+ @Override
+ public List<String> termsFound()
+ {
+ synchronized (termsFound)
+ {
+ Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+ return termsFound;
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of invalid terms queried for
+ */
+ @Override
+ public List<String> termsNotFound()
+ {
+ synchronized (termsNotFound)
+ {
+ Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+ return termsNotFound;
+ }
+ }
}