import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
* A wrapper class that parses the Sequence Ontology and exposes useful access
* methods. This version uses the BioJava parser.
*/
-public class SequenceOntology
+class SequenceOntology implements SequenceOntologyI
{
-
- /*
- * selected commonly used values for quick reference
- */
- // SO:0000316
- public static final String CDS = "CDS";
-
- // SO:0001060
- public static final String SEQUENCE_VARIANT = "sequence_variant";
-
- // SO:0000147
- public static final String EXON = "exon";
-
- // SO:0000673
- public static final String TRANSCRIPT = "transcript";
-
- // SO:0000704
- public static final String GENE = "gene";
-
- /*
- * singleton instance of this class
- */
- private static SequenceOntology instance;
-
/*
* the parsed Ontology data as modelled by BioJava
*/
*/
private Map<Term, List<Term>> termIsA;
- /**
- * Returns singleton instance
- *
- * @return
- */
- public synchronized static SequenceOntology getInstance()
- {
- if (instance == null)
- {
- instance = new SequenceOntology();
- }
- return instance;
- }
+ private List<String> termsFound;
+
+ private List<String> termsNotFound;
/**
- * Private constructor to enforce use of singleton. Parses and caches the SO
- * OBO data file.
+ * Package private constructor to enforce use of singleton. Parses and caches
+ * the SO OBO data file.
*/
- private SequenceOntology()
+ SequenceOntology()
{
+ termsFound = new ArrayList<String>();
+ termsNotFound = new ArrayList<String>();
termsByDescription = new HashMap<String, Term>();
termIsA = new HashMap<Term, List<Term>>();
*/
public boolean isNucleotideMatch(String soTerm)
{
- return isA(soTerm, "nucleotide_match");
+ return isA(soTerm, NUCLEOTIDE_MATCH);
}
/**
*/
public boolean isProteinMatch(String soTerm)
{
- return isA(soTerm, "protein_match");
+ return isA(soTerm, PROTEIN_MATCH);
}
/**
*/
public boolean isPolypeptide(String soTerm)
{
- return isA(soTerm, "polypeptide");
+ return isA(soTerm, POLYPEPTIDE);
}
/**
* @param parent
* @return
*/
+ @Override
public boolean isA(String child, String parent)
{
+ if (child == null || parent == null)
+ {
+ return false;
+ }
/*
* optimise trivial checks like isA("CDS", "CDS")
*/
if (child.equals(parent))
{
+ termFound(child);
return true;
}
Term childTerm = getTerm(child);
+ if (childTerm != null)
+ {
+ termFound(child);
+ }
+ else
+ {
+ termNotFound(child);
+ }
Term parentTerm = getTerm(parent);
return termIsA(childTerm, parentTerm);
}
/**
+ * Records a valid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termFound(String term)
+ {
+ synchronized (termsFound)
+ {
+ if (!termsFound.contains(term))
+ {
+ termsFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Records an invalid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termNotFound(String term)
+ {
+ synchronized (termsNotFound)
+ {
+ if (!termsNotFound.contains(term))
+ {
+ System.err.println("SO term " + term + " invalid");
+ termsNotFound.add(term);
+ }
+ }
+ }
+
+ /**
* Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
*
* @param childTerm
public boolean isSequenceVariant(String soTerm)
{
- return isA(soTerm, "sequence_variant");
+ return isA(soTerm, SEQUENCE_VARIANT);
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of valid terms queried for
+ */
+ @Override
+ public List<String> termsFound()
+ {
+ synchronized (termsFound)
+ {
+ Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+ return termsFound;
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of invalid terms queried for
+ */
+ @Override
+ public List<String> termsNotFound()
+ {
+ synchronized (termsNotFound)
+ {
+ Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+ return termsNotFound;
+ }
}
}