X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fgff%2FSequenceOntologyLite.java;h=72e906c96cf8abf05a407ed0e299a3211d025602;hb=b5889c572976c81f068d9743363695ca84e7d413;hp=6719ae6e749235eeaa4fe6d0a91d077108bb84e3;hpb=b96accb6a3904ea9b424f1dbd8b2b3b4164c521b;p=jalview.git

diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java
index 6719ae6..72e906c 100644
--- a/src/jalview/io/gff/SequenceOntologyLite.java
+++ b/src/jalview/io/gff/SequenceOntologyLite.java
@@ -1,3 +1,23 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
 package jalview.io.gff;
 
 import java.util.ArrayList;
@@ -22,6 +42,9 @@ public class SequenceOntologyLite implements SequenceOntologyI
 {
   /*
    * initial selection of types of interest when processing Ensembl features
+   * NB unlike the full SequenceOntology we don't traverse indirect
+   * child-parent relationships here so e.g. need to list every sub-type
+   * (direct or indirect) that is of interest
    */
   // @formatter:off
   private final String[][] TERMS = new String[][] {
@@ -32,39 +55,62 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "gene", "gene" }, 
     { "ncRNA_gene", "gene" }, 
     { "snRNA_gene", "gene" },
+    { "miRNA_gene", "gene" },
+    { "lincRNA_gene", "gene" },
+    { "rRNA_gene", "gene" },
     
     /*
      * transcript sub-types:
      */
     { "transcript", "transcript" }, 
     { "mature_transcript", "transcript" }, 
+    { "processed_transcript", "transcript" }, 
+    { "aberrant_processed_transcript", "transcript" },
     { "ncRNA", "transcript" },
     { "snRNA", "transcript" },
-    { "aberrant_processed_transcript", "transcript" },
+    { "miRNA", "transcript" },
+    { "lincRNA", "transcript" },
+    { "rRNA", "transcript" },
+    { "mRNA", "transcript" },
+    // there are many more sub-types of ncRNA...
     
     /*
-     * sequence_variant sub-types:
+     * sequence_variant sub-types
      */
     { "sequence_variant", "sequence_variant" },
+    { "structural_variant", "sequence_variant" },
     { "feature_variant", "sequence_variant" },
     { "gene_variant", "sequence_variant" },
+    { "transcript_variant", "sequence_variant" },
     // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
     // but we model it here correctly as per the SO
     { "NMD_transcript_variant", "sequence_variant" },
-    { "transcript_variant", "sequence_variant" },
-    { "structural_variant", "sequence_variant" },
+    { "missense_variant", "sequence_variant" },
+    { "synonymous_variant", "sequence_variant" },
+    { "frameshift_variant", "sequence_variant" },
+    { "5_prime_UTR_variant", "sequence_variant" },
+    { "3_prime_UTR_variant", "sequence_variant" },
+    { "stop_gained", "sequence_variant" },
+    { "stop_lost", "sequence_variant" },
+    { "inframe_deletion", "sequence_variant" },
+    { "inframe_insertion", "sequence_variant" },
+    { "splice_region_variant", "sequence_variant" },
     
     /*
-     * no sub-types of exon or CDS yet encountered; add if needed
+     * no sub-types of exon or CDS yet seen in Ensembl
+     * some added here for testing purposes
      */
     { "exon", "exon" },
+    { "coding_exon", "exon" },
     { "CDS", "CDS" },
+    { "CDS_predicted", "CDS" },
     
     /*
-     * used in exonerate GFF
+     * terms used in exonerate or PASA GFF
      */
     { "protein_match", "protein_match"},
     { "nucleotide_match", "nucleotide_match"},
+    { "cDNA_match", "nucleotide_match"},
     
     /*
      * used in InterProScan GFF
@@ -85,8 +131,8 @@ public class SequenceOntologyLite implements SequenceOntologyI
 
   public SequenceOntologyLite()
   {
-    termsFound = new ArrayList<String>();
-    termsNotFound = new ArrayList<String>();
+    termsFound = new ArrayList<>();
+    termsNotFound = new ArrayList<>();
     loadStaticData();
   }
 
@@ -95,12 +141,13 @@ public class SequenceOntologyLite implements SequenceOntologyI
    */
   private void loadStaticData()
   {
-    parents = new HashMap<String, List<String>>();
-    for (String [] pair : TERMS) {
+    parents = new HashMap<>();
+    for (String[] pair : TERMS)
+    {
       List<String> p = parents.get(pair[0]);
       if (p == null)
       {
-        p = new ArrayList<String>();
+        p = new ArrayList<>();
         parents.put(pair[0], p);
       }
       p.add(pair[1]);
@@ -165,9 +212,11 @@ public class SequenceOntologyLite implements SequenceOntologyI
     {
       if (!termsNotFound.contains(term))
       {
-        System.out.println("SO term " + term
-                + " not known - either invalid or needs modelled in "
-                + getClass().getName());
+        // suppress logging here as it reports Uniprot sequence features
+        // (which do not use SO terms) when auto-configuring feature colours
+        // System.out.println("SO term " + term
+        // + " not known - add to model if needed in "
+        // + getClass().getName());
         termsNotFound.add(term);
       }
     }