JAL-2835 additional sub-terms of sequence_variant added
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 1 Dec 2017 15:51:29 +0000 (15:51 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 1 Dec 2017 15:51:29 +0000 (15:51 +0000)
src/jalview/io/gff/SequenceOntologyLite.java
test/jalview/ext/so/SequenceOntologyTest.java
test/jalview/io/gff/SequenceOntologyLiteTest.java [new file with mode: 0644]

index f989f7b..72e906c 100644 (file)
@@ -44,7 +44,7 @@ public class SequenceOntologyLite implements SequenceOntologyI
    * initial selection of types of interest when processing Ensembl features
    * NB unlike the full SequenceOntology we don't traverse indirect
    * child-parent relationships here so e.g. need to list every sub-type
-   * of gene (direct or indirect) that is of interest
+   * (direct or indirect) that is of interest
    */
   // @formatter:off
   private final String[][] TERMS = new String[][] {
@@ -75,16 +75,26 @@ public class SequenceOntologyLite implements SequenceOntologyI
     // there are many more sub-types of ncRNA...
     
     /*
-     * sequence_variant sub-types:
+     * sequence_variant sub-types
      */
     { "sequence_variant", "sequence_variant" },
+    { "structural_variant", "sequence_variant" },
     { "feature_variant", "sequence_variant" },
     { "gene_variant", "sequence_variant" },
+    { "transcript_variant", "sequence_variant" },
     // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
     // but we model it here correctly as per the SO
     { "NMD_transcript_variant", "sequence_variant" },
-    { "transcript_variant", "sequence_variant" },
-    { "structural_variant", "sequence_variant" },
+    { "missense_variant", "sequence_variant" },
+    { "synonymous_variant", "sequence_variant" },
+    { "frameshift_variant", "sequence_variant" },
+    { "5_prime_UTR_variant", "sequence_variant" },
+    { "3_prime_UTR_variant", "sequence_variant" },
+    { "stop_gained", "sequence_variant" },
+    { "stop_lost", "sequence_variant" },
+    { "inframe_deletion", "sequence_variant" },
+    { "inframe_insertion", "sequence_variant" },
+    { "splice_region_variant", "sequence_variant" },
     
     /*
      * no sub-types of exon or CDS yet seen in Ensembl
@@ -121,8 +131,8 @@ public class SequenceOntologyLite implements SequenceOntologyI
 
   public SequenceOntologyLite()
   {
-    termsFound = new ArrayList<String>();
-    termsNotFound = new ArrayList<String>();
+    termsFound = new ArrayList<>();
+    termsNotFound = new ArrayList<>();
     loadStaticData();
   }
 
@@ -131,13 +141,13 @@ public class SequenceOntologyLite implements SequenceOntologyI
    */
   private void loadStaticData()
   {
-    parents = new HashMap<String, List<String>>();
+    parents = new HashMap<>();
     for (String[] pair : TERMS)
     {
       List<String> p = parents.get(pair[0]);
       if (p == null)
       {
-        p = new ArrayList<String>();
+        p = new ArrayList<>();
         parents.put(pair[0], p);
       }
       p.add(pair[1]);
index b76a295..31e1887 100644 (file)
@@ -107,4 +107,29 @@ public class SequenceOntologyTest
     assertFalse(so.isA("CDS_region", "CDS"));// part_of
     assertFalse(so.isA("polypeptide", "CDS")); // derives_from
   }
+
+  @Test(groups = "Functional")
+  public void testIsSequenceVariant()
+  {
+    assertFalse(so.isA("CDS", "sequence_variant"));
+    assertTrue(so.isA("sequence_variant", "sequence_variant"));
+
+    /*
+     * these should all be sub-types of sequence_variant
+     */
+    assertTrue(so.isA("structural_variant", "sequence_variant"));
+    assertTrue(so.isA("feature_variant", "sequence_variant"));
+    assertTrue(so.isA("gene_variant", "sequence_variant"));
+    assertTrue(so.isA("transcript_variant", "sequence_variant"));
+    assertTrue(so.isA("NMD_transcript_variant", "sequence_variant"));
+    assertTrue(so.isA("missense_variant", "sequence_variant"));
+    assertTrue(so.isA("synonymous_variant", "sequence_variant"));
+    assertTrue(so.isA("frameshift_variant", "sequence_variant"));
+    assertTrue(so.isA("5_prime_UTR_variant", "sequence_variant"));
+    assertTrue(so.isA("3_prime_UTR_variant", "sequence_variant"));
+    assertTrue(so.isA("stop_gained", "sequence_variant"));
+    assertTrue(so.isA("stop_lost", "sequence_variant"));
+    assertTrue(so.isA("inframe_deletion", "sequence_variant"));
+    assertTrue(so.isA("inframe_insertion", "sequence_variant"));
+  }
 }
diff --git a/test/jalview/io/gff/SequenceOntologyLiteTest.java b/test/jalview/io/gff/SequenceOntologyLiteTest.java
new file mode 100644 (file)
index 0000000..0766666
--- /dev/null
@@ -0,0 +1,37 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import org.testng.annotations.Test;
+
+public class SequenceOntologyLiteTest
+{
+  @Test(groups = "Functional")
+  public void testIsA_sequenceVariant()
+  {
+    SequenceOntologyI so = new SequenceOntologyLite();
+
+    assertFalse(so.isA("CDS", "sequence_variant"));
+    assertTrue(so.isA("sequence_variant", "sequence_variant"));
+
+    /*
+     * these should all be sub-types of sequence_variant
+     */
+    assertTrue(so.isA("structural_variant", "sequence_variant"));
+    assertTrue(so.isA("feature_variant", "sequence_variant"));
+    assertTrue(so.isA("gene_variant", "sequence_variant"));
+    assertTrue(so.isA("transcript_variant", "sequence_variant"));
+    assertTrue(so.isA("NMD_transcript_variant", "sequence_variant"));
+    assertTrue(so.isA("missense_variant", "sequence_variant"));
+    assertTrue(so.isA("synonymous_variant", "sequence_variant"));
+    assertTrue(so.isA("frameshift_variant", "sequence_variant"));
+    assertTrue(so.isA("5_prime_UTR_variant", "sequence_variant"));
+    assertTrue(so.isA("3_prime_UTR_variant", "sequence_variant"));
+    assertTrue(so.isA("stop_gained", "sequence_variant"));
+    assertTrue(so.isA("stop_lost", "sequence_variant"));
+    assertTrue(so.isA("inframe_deletion", "sequence_variant"));
+    assertTrue(so.isA("inframe_insertion", "sequence_variant"));
+    assertTrue(so.isA("splice_region_variant", "sequence_variant"));
+  }
+}