JAL-1191 additions to SOLite, tweak to test
[jalview.git] / src / jalview / io / gff / SequenceOntologyLite.java
index c0ae971..d2e6654 100644 (file)
@@ -22,6 +22,9 @@ public class SequenceOntologyLite implements SequenceOntologyI
 {
   /*
    * initial selection of types of interest when processing Ensembl features
+   * NB unlike the full SequenceOntology we don't traverse indirect
+   * child-parent relationships here so e.g. need to list every sub-type
+   * of gene (direct or indirect) that is of interest
    */
   // @formatter:off
   private final String[][] TERMS = new String[][] {
@@ -32,15 +35,21 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "gene", "gene" }, 
     { "ncRNA_gene", "gene" }, 
     { "snRNA_gene", "gene" },
+    { "miRNA_gene", "gene" },
+    { "lincRNA_gene", "gene" },
     
     /*
      * transcript sub-types:
      */
     { "transcript", "transcript" }, 
     { "mature_transcript", "transcript" }, 
+    { "processed_transcript", "transcript" }, 
+    { "aberrant_processed_transcript", "transcript" },
     { "ncRNA", "transcript" },
     { "snRNA", "transcript" },
-    { "aberrant_processed_transcript", "transcript" },
+    { "miRNA", "transcript" },
+    { "lincRNA", "transcript" },
+    // there are many more sub-types of ncRNA...
     
     /*
      * sequence_variant sub-types:
@@ -55,8 +64,8 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "structural_variant", "sequence_variant" },
     
     /*
-     * no sub-types of exon or CDS encountered in Ensembl
-     * a few added here for testing purposes
+     * no sub-types of exon or CDS yet seen in Ensembl
+     * some added here for testing purposes
      */
     { "exon", "exon" },
     { "coding_exon", "exon" },
@@ -64,10 +73,11 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "CDS_predicted", "CDS" },
     
     /*
-     * used in exonerate GFF
+     * terms used in exonerate or PASA GFF
      */
     { "protein_match", "protein_match"},
     { "nucleotide_match", "nucleotide_match"},
+    { "cDNA_match", "nucleotide_match"},
     
     /*
      * used in InterProScan GFF
@@ -169,7 +179,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
       if (!termsNotFound.contains(term))
       {
         System.out.println("SO term " + term
-                + " not known - either invalid or needs modelled in "
+                + " not known - may be invalid, or model if needed in "
                 + getClass().getName());
         termsNotFound.add(term);
       }