Merge branch 'develop' into features/JAL-3010ontologyFeatureSettings

[jalview.git] / src / jalview / io / gff / SequenceOntologyLite.java
diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java

index f989f7b..2cbec36 100644 (file)
--- a/src/jalview/io/gff/SequenceOntologyLite.java
+++ b/src/jalview/io/gff/SequenceOntologyLite.java
@@ -20,6 +20,8 @@
   */
  package jalview.io.gff;
  
+import jalview.datamodel.ontology.OntologyBase;
+
  import java.util.ArrayList;
  import java.util.Collections;
  import java.util.HashMap;
@@ -38,13 +40,14 @@ import java.util.Map;
   * @author gmcarstairs
   *
   */
-public class SequenceOntologyLite implements SequenceOntologyI
+public class SequenceOntologyLite extends OntologyBase
+        implements SequenceOntologyI
  {
    /*
     * initial selection of types of interest when processing Ensembl features
     * NB unlike the full SequenceOntology we don't traverse indirect
     * child-parent relationships here so e.g. need to list every sub-type
-   * of gene (direct or indirect) that is of interest
+   * (direct or indirect) that is of interest
     */
    // @formatter:off
    private final String[][] TERMS = new String[][] {
@@ -70,21 +73,35 @@ public class SequenceOntologyLite implements SequenceOntologyI
      { "snRNA", "transcript" },
      { "miRNA", "transcript" },
      { "lincRNA", "transcript" },
+    { "lnc_RNA", "transcript" },
      { "rRNA", "transcript" },
      { "mRNA", "transcript" },
      // there are many more sub-types of ncRNA...
      
      /*
-     * sequence_variant sub-types:
+     * sequence_variant sub-types
       */
      { "sequence_variant", "sequence_variant" },
+    { "structural_variant", "sequence_variant" },
      { "feature_variant", "sequence_variant" },
+    { "upstream_gene_variant", "sequence_variant" },
      { "gene_variant", "sequence_variant" },
+    { "transcript_variant", "sequence_variant" },
+    { "non_coding_transcript_variant", "sequence_variant" },
+    { "non_coding_transcript_exon_variant", "sequence_variant" },
      // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
      // but we model it here correctly as per the SO
      { "NMD_transcript_variant", "sequence_variant" },
-    { "transcript_variant", "sequence_variant" },
-    { "structural_variant", "sequence_variant" },
+    { "missense_variant", "sequence_variant" },
+    { "synonymous_variant", "sequence_variant" },
+    { "frameshift_variant", "sequence_variant" },
+    { "5_prime_UTR_variant", "sequence_variant" },
+    { "3_prime_UTR_variant", "sequence_variant" },
+    { "stop_gained", "sequence_variant" },
+    { "stop_lost", "sequence_variant" },
+    { "inframe_deletion", "sequence_variant" },
+    { "inframe_insertion", "sequence_variant" },
+    { "splice_region_variant", "sequence_variant" },
      
      /*
       * no sub-types of exon or CDS yet seen in Ensembl
@@ -121,8 +138,8 @@ public class SequenceOntologyLite implements SequenceOntologyI
  
    public SequenceOntologyLite()
    {
-    termsFound = new ArrayList<String>();
-    termsNotFound = new ArrayList<String>();
+    termsFound = new ArrayList<>();
+    termsNotFound = new ArrayList<>();
      loadStaticData();
    }
  
@@ -131,13 +148,13 @@ public class SequenceOntologyLite implements SequenceOntologyI
     */
    private void loadStaticData()
    {
-    parents = new HashMap<String, List<String>>();
+    parents = new HashMap<>();
      for (String[] pair : TERMS)
      {
        List<String> p = parents.get(pair[0]);
        if (p == null)
        {
-        p = new ArrayList<String>();
+        p = new ArrayList<>();
          parents.put(pair[0], p);
        }
        p.add(pair[1]);
@@ -237,4 +254,70 @@ public class SequenceOntologyLite implements SequenceOntologyI
        return termsNotFound;
      }
    }
+
+  @Override
+  public List<String> getRootParents(final String term)
+  {
+    /*
+     * check in cache first
+     */
+    if (rootParents.containsKey(term))
+    {
+      return rootParents.get(term);
+    }
+
+    List<String> top = new ArrayList<>();
+    List<String> query = new ArrayList<>();
+    query.add(term);
+
+    while (!query.isEmpty())
+    {
+      List<String> nextQuery = new ArrayList<>();
+      for (String q : query)
+      {
+        List<String> theParents = parents.get(q);
+        if (theParents != null)
+        {
+          if (theParents.size() == 1 && theParents.get(0).equals(q))
+          {
+            /*
+             * top-level term
+             */
+            if (!top.contains(q))
+            {
+              top.add(q);
+            }
+          }
+          else
+          {
+            for (String p : theParents)
+            {
+              if (!p.equals(q))
+              {
+                nextQuery.add(p);
+              }
+            }
+          }
+        }
+      }
+      query = nextQuery;
+    }
+
+    rootParents.put(term, top);
+
+    return top.isEmpty() ? null : top;
+  }
+
+  @Override
+  public List<String> getParents(String term)
+  {
+    List<String> result = parents.get(term);
+    return result == null ? new ArrayList<>() : result;
+  }
+
+  @Override
+  public boolean isValidTerm(String term)
+  {
+    return parents.containsKey(term);
+  }
  }