Merge branch 'develop' into features/JAL-3010ontologyFeatureSettings
[jalview.git] / src / jalview / io / gff / SequenceOntologyLite.java
index d2e6654..2cbec36 100644 (file)
@@ -1,5 +1,27 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
 package jalview.io.gff;
 
+import jalview.datamodel.ontology.OntologyBase;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -18,13 +40,14 @@ import java.util.Map;
  * @author gmcarstairs
  *
  */
-public class SequenceOntologyLite implements SequenceOntologyI
+public class SequenceOntologyLite extends OntologyBase
+        implements SequenceOntologyI
 {
   /*
    * initial selection of types of interest when processing Ensembl features
    * NB unlike the full SequenceOntology we don't traverse indirect
    * child-parent relationships here so e.g. need to list every sub-type
-   * of gene (direct or indirect) that is of interest
+   * (direct or indirect) that is of interest
    */
   // @formatter:off
   private final String[][] TERMS = new String[][] {
@@ -37,6 +60,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "snRNA_gene", "gene" },
     { "miRNA_gene", "gene" },
     { "lincRNA_gene", "gene" },
+    { "rRNA_gene", "gene" },
     
     /*
      * transcript sub-types:
@@ -49,19 +73,35 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "snRNA", "transcript" },
     { "miRNA", "transcript" },
     { "lincRNA", "transcript" },
+    { "lnc_RNA", "transcript" },
+    { "rRNA", "transcript" },
+    { "mRNA", "transcript" },
     // there are many more sub-types of ncRNA...
     
     /*
-     * sequence_variant sub-types:
+     * sequence_variant sub-types
      */
     { "sequence_variant", "sequence_variant" },
+    { "structural_variant", "sequence_variant" },
     { "feature_variant", "sequence_variant" },
+    { "upstream_gene_variant", "sequence_variant" },
     { "gene_variant", "sequence_variant" },
+    { "transcript_variant", "sequence_variant" },
+    { "non_coding_transcript_variant", "sequence_variant" },
+    { "non_coding_transcript_exon_variant", "sequence_variant" },
     // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
     // but we model it here correctly as per the SO
     { "NMD_transcript_variant", "sequence_variant" },
-    { "transcript_variant", "sequence_variant" },
-    { "structural_variant", "sequence_variant" },
+    { "missense_variant", "sequence_variant" },
+    { "synonymous_variant", "sequence_variant" },
+    { "frameshift_variant", "sequence_variant" },
+    { "5_prime_UTR_variant", "sequence_variant" },
+    { "3_prime_UTR_variant", "sequence_variant" },
+    { "stop_gained", "sequence_variant" },
+    { "stop_lost", "sequence_variant" },
+    { "inframe_deletion", "sequence_variant" },
+    { "inframe_insertion", "sequence_variant" },
+    { "splice_region_variant", "sequence_variant" },
     
     /*
      * no sub-types of exon or CDS yet seen in Ensembl
@@ -98,8 +138,8 @@ public class SequenceOntologyLite implements SequenceOntologyI
 
   public SequenceOntologyLite()
   {
-    termsFound = new ArrayList<String>();
-    termsNotFound = new ArrayList<String>();
+    termsFound = new ArrayList<>();
+    termsNotFound = new ArrayList<>();
     loadStaticData();
   }
 
@@ -108,12 +148,13 @@ public class SequenceOntologyLite implements SequenceOntologyI
    */
   private void loadStaticData()
   {
-    parents = new HashMap<String, List<String>>();
-    for (String [] pair : TERMS) {
+    parents = new HashMap<>();
+    for (String[] pair : TERMS)
+    {
       List<String> p = parents.get(pair[0]);
       if (p == null)
       {
-        p = new ArrayList<String>();
+        p = new ArrayList<>();
         parents.put(pair[0], p);
       }
       p.add(pair[1]);
@@ -178,9 +219,11 @@ public class SequenceOntologyLite implements SequenceOntologyI
     {
       if (!termsNotFound.contains(term))
       {
-        System.out.println("SO term " + term
-                + " not known - may be invalid, or model if needed in "
-                + getClass().getName());
+        // suppress logging here as it reports Uniprot sequence features
+        // (which do not use SO terms) when auto-configuring feature colours
+        // System.out.println("SO term " + term
+        // + " not known - add to model if needed in "
+        // + getClass().getName());
         termsNotFound.add(term);
       }
     }
@@ -211,4 +254,70 @@ public class SequenceOntologyLite implements SequenceOntologyI
       return termsNotFound;
     }
   }
+
+  @Override
+  public List<String> getRootParents(final String term)
+  {
+    /*
+     * check in cache first
+     */
+    if (rootParents.containsKey(term))
+    {
+      return rootParents.get(term);
+    }
+
+    List<String> top = new ArrayList<>();
+    List<String> query = new ArrayList<>();
+    query.add(term);
+
+    while (!query.isEmpty())
+    {
+      List<String> nextQuery = new ArrayList<>();
+      for (String q : query)
+      {
+        List<String> theParents = parents.get(q);
+        if (theParents != null)
+        {
+          if (theParents.size() == 1 && theParents.get(0).equals(q))
+          {
+            /*
+             * top-level term
+             */
+            if (!top.contains(q))
+            {
+              top.add(q);
+            }
+          }
+          else
+          {
+            for (String p : theParents)
+            {
+              if (!p.equals(q))
+              {
+                nextQuery.add(p);
+              }
+            }
+          }
+        }
+      }
+      query = nextQuery;
+    }
+
+    rootParents.put(term, top);
+
+    return top.isEmpty() ? null : top;
+  }
+
+  @Override
+  public List<String> getParents(String term)
+  {
+    List<String> result = parents.get(term);
+    return result == null ? new ArrayList<>() : result;
+  }
+
+  @Override
+  public boolean isValidTerm(String term)
+  {
+    return parents.containsKey(term);
+  }
 }