JAL-1191 additions to SOLite, tweak to test
[jalview.git] / src / jalview / io / gff / SequenceOntologyLite.java
index 173dea6..d2e6654 100644 (file)
@@ -22,6 +22,9 @@ public class SequenceOntologyLite implements SequenceOntologyI
 {
   /*
    * initial selection of types of interest when processing Ensembl features
+   * NB unlike the full SequenceOntology we don't traverse indirect
+   * child-parent relationships here so e.g. need to list every sub-type
+   * of gene (direct or indirect) that is of interest
    */
   // @formatter:off
   private final String[][] TERMS = new String[][] {
@@ -32,15 +35,21 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "gene", "gene" }, 
     { "ncRNA_gene", "gene" }, 
     { "snRNA_gene", "gene" },
+    { "miRNA_gene", "gene" },
+    { "lincRNA_gene", "gene" },
     
     /*
      * transcript sub-types:
      */
     { "transcript", "transcript" }, 
     { "mature_transcript", "transcript" }, 
+    { "processed_transcript", "transcript" }, 
+    { "aberrant_processed_transcript", "transcript" },
     { "ncRNA", "transcript" },
     { "snRNA", "transcript" },
-    { "aberrant_processed_transcript", "transcript" },
+    { "miRNA", "transcript" },
+    { "lincRNA", "transcript" },
+    // there are many more sub-types of ncRNA...
     
     /*
      * sequence_variant sub-types:
@@ -55,10 +64,25 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "structural_variant", "sequence_variant" },
     
     /*
-     * no sub-types of exon or CDS yet encountered; add if needed
+     * no sub-types of exon or CDS yet seen in Ensembl
+     * some added here for testing purposes
      */
     { "exon", "exon" },
-    { "CDS", "CDS" }
+    { "coding_exon", "exon" },
+    { "CDS", "CDS" },
+    { "CDS_predicted", "CDS" },
+    
+    /*
+     * terms used in exonerate or PASA GFF
+     */
+    { "protein_match", "protein_match"},
+    { "nucleotide_match", "nucleotide_match"},
+    { "cDNA_match", "nucleotide_match"},
+    
+    /*
+     * used in InterProScan GFF
+     */
+    { "polypeptide", "polypeptide" }
   };
   // @formatter:on
 
@@ -155,7 +179,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
       if (!termsNotFound.contains(term))
       {
         System.out.println("SO term " + term
-                + " not known - either invalid or needs modelled in "
+                + " not known - may be invalid, or model if needed in "
                 + getClass().getName());
         termsNotFound.add(term);
       }