--- /dev/null
+package jalview.io.gff;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An implementation of SequenceOntologyI that hard codes terms of interest.
+ *
+ * Use this in unit testing by calling SequenceOntology.setInstance(new
+ * SequenceOntologyLite()).
+ *
+ * May also become a stand-in for SequenceOntology in the applet if we want to
+ * avoid the additional jars needed for parsing the full SO.
+ *
+ * @author gmcarstairs
+ *
+ */
+public class SequenceOntologyLite implements SequenceOntologyI
+{
+ /*
+ * initial selection of types of interest when processing Ensembl features
+ * NB unlike the full SequenceOntology we don't traverse indirect
+ * child-parent relationships here so e.g. need to list every sub-type
+ * of gene (direct or indirect) that is of interest
+ */
+ // @formatter:off
+ private final String[][] TERMS = new String[][] {
+
+ /*
+ * gene sub-types:
+ */
+ { "gene", "gene" },
+ { "ncRNA_gene", "gene" },
+ { "snRNA_gene", "gene" },
+ { "miRNA_gene", "gene" },
+ { "lincRNA_gene", "gene" },
+ { "rRNA_gene", "gene" },
+
+ /*
+ * transcript sub-types:
+ */
+ { "transcript", "transcript" },
+ { "mature_transcript", "transcript" },
+ { "processed_transcript", "transcript" },
+ { "aberrant_processed_transcript", "transcript" },
+ { "ncRNA", "transcript" },
+ { "snRNA", "transcript" },
+ { "miRNA", "transcript" },
+ { "lincRNA", "transcript" },
+ { "rRNA", "transcript" },
+ // there are many more sub-types of ncRNA...
+
+ /*
+ * sequence_variant sub-types:
+ */
+ { "sequence_variant", "sequence_variant" },
+ { "feature_variant", "sequence_variant" },
+ { "gene_variant", "sequence_variant" },
+ // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
+ // but we model it here correctly as per the SO
+ { "NMD_transcript_variant", "sequence_variant" },
+ { "transcript_variant", "sequence_variant" },
+ { "structural_variant", "sequence_variant" },
+
+ /*
+ * no sub-types of exon or CDS yet seen in Ensembl
+ * some added here for testing purposes
+ */
+ { "exon", "exon" },
+ { "coding_exon", "exon" },
+ { "CDS", "CDS" },
+ { "CDS_predicted", "CDS" },
+
+ /*
+ * terms used in exonerate or PASA GFF
+ */
+ { "protein_match", "protein_match"},
+ { "nucleotide_match", "nucleotide_match"},
+ { "cDNA_match", "nucleotide_match"},
+
+ /*
+ * used in InterProScan GFF
+ */
+ { "polypeptide", "polypeptide" }
+ };
+ // @formatter:on
+
+ /*
+ * hard-coded list of any parents (direct or indirect)
+ * that we care about for a term
+ */
+ private Map<String, List<String>> parents;
+
+ private List<String> termsFound;
+
+ private List<String> termsNotFound;
+
+ public SequenceOntologyLite()
+ {
+ termsFound = new ArrayList<String>();
+ termsNotFound = new ArrayList<String>();
+ loadStaticData();
+ }
+
+ /**
+ * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
+ */
+ private void loadStaticData()
+ {
+ parents = new HashMap<String, List<String>>();
+ for (String [] pair : TERMS) {
+ List<String> p = parents.get(pair[0]);
+ if (p == null)
+ {
+ p = new ArrayList<String>();
+ parents.put(pair[0], p);
+ }
+ p.add(pair[1]);
+ }
+ }
+
+ /**
+ * Answers true if 'child' isA 'parent' (including equality). In this
+ * implementation, based only on hard-coded values.
+ */
+ @Override
+ public boolean isA(String child, String parent)
+ {
+ if (child == null || parent == null)
+ {
+ return false;
+ }
+ if (child.equals(parent))
+ {
+ termFound(child);
+ return true;
+ }
+
+ List<String> p = parents.get(child);
+ if (p == null)
+ {
+ termNotFound(child);
+ return false;
+ }
+ termFound(child);
+ if (p.contains(parent))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Records a valid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termFound(String term)
+ {
+ if (!termsFound.contains(term))
+ {
+ synchronized (termsFound)
+ {
+ termsFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Records an invalid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termNotFound(String term)
+ {
+ synchronized (termsNotFound)
+ {
+ if (!termsNotFound.contains(term))
+ {
+ System.out.println("SO term " + term
+ + " not known - may be invalid, or model if needed in "
+ + getClass().getName());
+ termsNotFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of valid terms queried for
+ */
+ @Override
+ public List<String> termsFound()
+ {
+ synchronized (termsFound)
+ {
+ Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+ return termsFound;
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of invalid terms queried for
+ */
+ @Override
+ public List<String> termsNotFound()
+ {
+ synchronized (termsNotFound)
+ {
+ Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+ return termsNotFound;
+ }
+ }
+}