JAL-1705 further unit tests
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 1 Feb 2016 16:44:17 +0000 (16:44 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 1 Feb 2016 16:44:17 +0000 (16:44 +0000)
src/jalview/ext/ensembl/EnsemblGenome.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/io/gff/SequenceOntologyI.java
src/jalview/io/gff/SequenceOntologyLite.java
test/jalview/ext/ensembl/EnsemblCdnaTest.java
test/jalview/ext/ensembl/EnsemblCdsTest.java [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblGeneTest.java [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblGenomeTest.java [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java

index b7db2bc..6bbc3e9 100644 (file)
@@ -37,9 +37,10 @@ public class EnsemblGenome extends EnsemblSeqProxy
 
   /**
    * Answers true unless the feature type is 'transcript' (or a sub-type of
-   * transcript in the Sequence Ontology). Transcript features are only
-   * retrieved in order to identify the transcript sequence range, and are
-   * redundant information on the transcript sequence itself.
+   * transcript in the Sequence Ontology), or has a parent other than the given
+   * accession id. Transcript features are only retrieved in order to identify
+   * the transcript sequence range, and are redundant information on the
+   * transcript sequence itself.
    */
   @Override
   protected boolean retainFeature(SequenceFeature sf, String accessionId)
index 8c1e972..b2804f2 100644 (file)
@@ -46,12 +46,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
   protected static final String ID = "ID";
 
-  /*
-   * this needs special handling, as it isA sequence_variant in the
-   * Sequence Ontology, but behaves in Ensembl as if it isA transcript
-   */
-  protected static final String NMD_VARIANT = "NMD_transcript_variant";
-
   protected static final String NAME = "Name";
 
   public enum EnsemblSeqType
@@ -1105,7 +1099,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    */
   public static boolean isTranscript(String featureType)
   {
-    return NMD_VARIANT.equals(featureType)
+    return SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(featureType)
             || SequenceOntologyFactory.getInstance().isA(featureType,
                     SequenceOntologyI.TRANSCRIPT);
   }
index 8128177..c7a5baa 100644 (file)
@@ -7,10 +7,13 @@ public interface SequenceOntologyI
   /*
    * selected commonly used values for quick reference
    */
+  // SO:0000104
   public static final String POLYPEPTIDE = "polypeptide";
 
+  // SO:0000349
   public static final String PROTEIN_MATCH = "protein_match";
 
+  // SO:0000347
   public static final String NUCLEOTIDE_MATCH = "nucleotide_match";
 
   // SO:0000316
@@ -25,6 +28,9 @@ public interface SequenceOntologyI
   // SO:0000673
   public static final String TRANSCRIPT = "transcript";
 
+  // SO:0001621 isA sequence_variant but used in Ensembl as a transcript
+  public static final String NMD_TRANSCRIPT_VARIANT = "NMD_transcript_variant";
+
   // SO:0000704
   public static final String GENE = "gene";
 
index 6719ae6..c0ae971 100644 (file)
@@ -55,10 +55,13 @@ public class SequenceOntologyLite implements SequenceOntologyI
     { "structural_variant", "sequence_variant" },
     
     /*
-     * no sub-types of exon or CDS yet encountered; add if needed
+     * no sub-types of exon or CDS encountered in Ensembl
+     * a few added here for testing purposes
      */
     { "exon", "exon" },
+    { "coding_exon", "exon" },
     { "CDS", "CDS" },
+    { "CDS_predicted", "CDS" },
     
     /*
      * used in exonerate GFF
index 30bc81b..2d99a52 100644 (file)
@@ -1,7 +1,9 @@
 package jalview.ext.ensembl;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertTrue;
 
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
@@ -12,16 +14,29 @@ import jalview.util.MapList;
 
 import java.util.List;
 
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 public class EnsemblCdnaTest
 {
+  @BeforeClass
+  public void setUp()
+  {
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+  }
+
+  @AfterClass
+  public void tearDown()
+  {
+    SequenceOntologyFactory.setInstance(null);
+  }
   /**
    * Test that the cdna part of genomic sequence is correctly identified by
    * 'exon' features (or subtypes) - reverse strand case.
    */
   @Test(groups = "Functional")
-  public void getGenomicRangesFromFeatures_reverseStrand()
+  public void testGetGenomicRangesFromFeatures_reverseStrand()
   {
     EnsemblCdna testee = new EnsemblCdna();
     SequenceI genomic = new SequenceDummy("chr7");
@@ -51,8 +66,6 @@ public class EnsemblCdnaTest
     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
     genomic.addSequenceFeature(sf);
 
-    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
-
     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
             transcriptId, 23);
     List<int[]> fromRanges = ranges.getFromRanges();
@@ -75,10 +88,10 @@ public class EnsemblCdnaTest
 
   /**
    * Test that the cdna part of genomic sequence is correctly identified by
-   * 'exon' features (or subtypes).
+   * 'exon' features (or subtypes) with the desired transcript as parent
    */
   @Test(groups = "Functional")
-  public void getGenomicRangesFromFeatures()
+  public void testGetGenomicRangesFromFeatures()
   {
     EnsemblCdna testee = new EnsemblCdna();
     SequenceI genomic = new SequenceDummy("chr7");
@@ -109,8 +122,6 @@ public class EnsemblCdnaTest
     sf.setStrand("-"); // weird but ignored
     genomic.addSequenceFeature(sf);
   
-    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
-  
     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
             transcriptId, 23);
     List<int[]> fromRanges = ranges.getFromRanges();
@@ -132,7 +143,7 @@ public class EnsemblCdnaTest
    * reverse strands are present in the features of interest
    */
   @Test(groups = "Functional")
-  public void getGenomicRangesFromFeatures_mixedStrand()
+  public void testGetGenomicRangesFromFeatures_mixedStrand()
   {
     EnsemblCdna testee = new EnsemblCdna();
     SequenceI genomic = new SequenceDummy("chr7");
@@ -151,10 +162,76 @@ public class EnsemblCdnaTest
     sf.setStrand("+");
     genomic.addSequenceFeature(sf);
   
-    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
-  
     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
             transcriptId, 23);
     assertNull(ranges);
   }
+
+  /**
+   * Test the method that retains features except for 'transcript' (or
+   * subtypes), or features with parent other than the given id
+   */
+  @Test(groups = "Functional")
+  public void testRetainFeature()
+  {
+    String accId = "ABC123";
+    EnsemblCdna testee = new EnsemblCdna();
+
+    SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
+            20500, 0f, null);
+    assertFalse(testee.retainFeature(sf, accId));
+
+    sf.setType("aberrant_processed_transcript");
+    assertFalse(testee.retainFeature(sf, accId));
+
+    sf.setType("NMD_transcript_variant");
+    assertFalse(testee.retainFeature(sf, accId));
+
+    // other feature with no parent is retained
+    sf.setType("sequence_variant");
+    assertTrue(testee.retainFeature(sf, accId));
+
+    // other feature with desired parent is retained
+    sf.setValue("Parent", "transcript:" + accId);
+    assertTrue(testee.retainFeature(sf, accId));
+
+    // feature with wrong parent is not retained
+    sf.setValue("Parent", "transcript:XYZ");
+    assertFalse(testee.retainFeature(sf, accId));
+  }
+
+  /**
+   * Test the method that picks out 'exon' (or subtype) features with the
+   * accession id as parent
+   */
+  @Test(groups = "Functional")
+  public void testIdentifiesSequence()
+  {
+    String accId = "ABC123";
+    EnsemblCdna testee = new EnsemblCdna();
+
+    // exon with no parent not valid
+    SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
+    assertFalse(testee.identifiesSequence(sf, accId));
+
+    // exon with wrong parent not valid
+    sf.setValue("Parent", "transcript:XYZ");
+    assertFalse(testee.identifiesSequence(sf, accId));
+
+    // exon with right parent is valid
+    sf.setValue("Parent", "transcript:" + accId);
+    assertTrue(testee.identifiesSequence(sf, accId));
+
+    // exon sub-type with right parent is valid
+    sf.setType("coding_exon");
+    assertTrue(testee.identifiesSequence(sf, accId));
+
+    // transcript not valid:
+    sf.setType("transcript");
+    assertFalse(testee.identifiesSequence(sf, accId));
+
+    // CDS not valid:
+    sf.setType("CDS");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  }
 }
diff --git a/test/jalview/ext/ensembl/EnsemblCdsTest.java b/test/jalview/ext/ensembl/EnsemblCdsTest.java
new file mode 100644 (file)
index 0000000..fb17845
--- /dev/null
@@ -0,0 +1,154 @@
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblCdsTest
+{
+  @BeforeClass
+  public void setUp()
+  {
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+  }
+
+  @AfterClass
+  public void tearDown()
+  {
+    SequenceOntologyFactory.setInstance(null);
+  }
+
+  /**
+   * Test that the cdna part of genomic sequence is correctly identified by
+   * 'CDS' features (or subtypes) with the desired transcript as parent
+   */
+  @Test(groups = "Functional")
+  public void testGetGenomicRangesFromFeatures()
+  {
+    EnsemblCds testee = new EnsemblCds();
+    SequenceI genomic = new SequenceDummy("chr7");
+    genomic.setStart(10000);
+    genomic.setEnd(50000);
+    String transcriptId = "ABC123";
+  
+    // CDS at (start+10000) length 501
+    SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
+            null);
+    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+  
+    // CDS (sub-type) at (start + 10500) length 101
+    sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null);
+    sf.setValue("Parent", "transcript:" + transcriptId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+  
+    // CDS belonging to a different transcript doesn't count
+    sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null);
+    sf.setValue("Parent", "transcript:anotherOne");
+    genomic.addSequenceFeature(sf);
+  
+    // exon feature doesn't count
+    sf = new SequenceFeature("exon", "", 10000, 50000, 0f, null);
+    genomic.addSequenceFeature(sf);
+
+    // mRNA_region feature doesn't count (parent of CDS)
+    sf = new SequenceFeature("mRNA_region", "", 10000, 50000, 0f, null);
+    genomic.addSequenceFeature(sf);
+  
+    MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
+            transcriptId, 23);
+    List<int[]> fromRanges = ranges.getFromRanges();
+    assertEquals(2, fromRanges.size());
+    // from ranges should be sorted by start order
+    assertEquals(10500, fromRanges.get(0)[0]);
+    assertEquals(10600, fromRanges.get(0)[1]);
+    assertEquals(20000, fromRanges.get(1)[0]);
+    assertEquals(20500, fromRanges.get(1)[1]);
+    // to range should start from given start numbering
+    List<int[]> toRanges = ranges.getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(23, toRanges.get(0)[0]);
+    assertEquals(624, toRanges.get(0)[1]);
+  }
+
+  /**
+   * Test the method that retains features except for 'CDS' (or subtypes), or
+   * features with parent other than the given id
+   */
+  @Test(groups = "Functional")
+  public void testRetainFeature()
+  {
+    String accId = "ABC123";
+    EnsemblCds testee = new EnsemblCds();
+  
+    SequenceFeature sf = new SequenceFeature("CDS", "", 20000,
+            20500, 0f, null);
+    assertFalse(testee.retainFeature(sf, accId));
+  
+    sf.setType("CDS_predicted");
+    assertFalse(testee.retainFeature(sf, accId));
+  
+    // other feature with no parent is retained
+    sf.setType("sequence_variant");
+    assertTrue(testee.retainFeature(sf, accId));
+  
+    // other feature with desired parent is retained
+    sf.setValue("Parent", "transcript:" + accId);
+    assertTrue(testee.retainFeature(sf, accId));
+  
+    // feature with wrong parent is not retained
+    sf.setValue("Parent", "transcript:XYZ");
+    assertFalse(testee.retainFeature(sf, accId));
+  }
+
+  /**
+   * Test the method that picks out 'CDS' (or subtype) features with the
+   * accession id as parent
+   */
+  @Test(groups = "Functional")
+  public void testIdentifiesSequence()
+  {
+    String accId = "ABC123";
+    EnsemblCds testee = new EnsemblCds();
+  
+    // cds with no parent not valid
+    SequenceFeature sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // cds with wrong parent not valid
+    sf.setValue("Parent", "transcript:XYZ");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // cds with right parent is valid
+    sf.setValue("Parent", "transcript:" + accId);
+    assertTrue(testee.identifiesSequence(sf, accId));
+  
+    // cds sub-type with right parent is valid
+    sf.setType("CDS_predicted");
+    assertTrue(testee.identifiesSequence(sf, accId));
+  
+    // transcript not valid:
+    sf.setType("transcript");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // exon not valid:
+    sf.setType("exon");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  }
+
+}
diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java
new file mode 100644 (file)
index 0000000..1f1a84e
--- /dev/null
@@ -0,0 +1,228 @@
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblGeneTest
+{
+  @BeforeClass
+  public void setUp()
+  {
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+  }
+
+  @AfterClass
+  public void tearDown()
+  {
+    SequenceOntologyFactory.setInstance(null);
+  }
+
+  /**
+   * Test that the gene part of genomic sequence is uniquely identified by a
+   * 'gene' features (or subtype) with the correct gene ID
+   */
+  @Test(groups = "Functional")
+  public void testGetGenomicRangesFromFeatures()
+  {
+    EnsemblGene testee = new EnsemblGene();
+    SequenceI genomic = new SequenceDummy("chr7");
+    genomic.setStart(10000);
+    genomic.setEnd(50000);
+    String geneId = "ABC123";
+
+    // gene at (start+10000) length 501
+    SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
+            null);
+    sf.setValue("ID", "gene:" + geneId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+
+    // gene at (start + 10500) length 101
+    // should be ignored - the first 'gene' found defines the whole range
+    sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+    sf.setValue("ID", "gene:" + geneId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+
+    MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
+            23);
+    List<int[]> fromRanges = ranges.getFromRanges();
+    assertEquals(1, fromRanges.size());
+    assertEquals(20000, fromRanges.get(0)[0]);
+    assertEquals(20500, fromRanges.get(0)[1]);
+    // to range should start from given start numbering
+    List<int[]> toRanges = ranges.getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(23, toRanges.get(0)[0]);
+    assertEquals(523, toRanges.get(0)[1]);
+  }
+
+  /**
+   * Test variant using a sub-type of gene from the Sequence Ontology
+   */
+  @Test(groups = "Functional")
+  public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
+  {
+    EnsemblGene testee = new EnsemblGene();
+    SequenceI genomic = new SequenceDummy("chr7");
+    genomic.setStart(10000);
+    genomic.setEnd(50000);
+    String geneId = "ABC123";
+
+    // gene at (start+10000) length 501
+    SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
+            20500, 0f, null);
+    sf.setValue("ID", "gene:" + geneId);
+    sf.setStrand("-");
+    genomic.addSequenceFeature(sf);
+
+    // gene at (start + 10500) length 101
+    // should be ignored - the first 'gene' found defines the whole range
+    // (real data would only have one such feature)
+    sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+    sf.setValue("ID", "gene:" + geneId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+
+    MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
+            23);
+    List<int[]> fromRanges = ranges.getFromRanges();
+    assertEquals(1, fromRanges.size());
+    // from range on reverse strand:
+    assertEquals(20500, fromRanges.get(0)[0]);
+    assertEquals(20000, fromRanges.get(0)[1]);
+    // to range should start from given start numbering
+    List<int[]> toRanges = ranges.getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(23, toRanges.get(0)[0]);
+    assertEquals(523, toRanges.get(0)[1]);
+  }
+
+  /**
+   * Test the method that extracts transcript (or subtype) features with a
+   * specified gene as parent
+   */
+  @Test(groups = "Functional")
+  public void testGetTranscriptFeatures()
+  {
+    SequenceI genomic = new SequenceDummy("chr7");
+    genomic.setStart(10000);
+    genomic.setEnd(50000);
+    String geneId = "ABC123";
+
+    // transcript feature
+    SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
+            20500, 0f, null);
+    sf1.setValue("Parent", "gene:" + geneId);
+    genomic.addSequenceFeature(sf1);
+
+    // transcript sub-type feature
+    SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
+            20500, 0f, null);
+    sf2.setValue("Parent", "gene:" + geneId);
+    genomic.addSequenceFeature(sf2);
+
+    // NMD_transcript_variant treated like transcript in Ensembl
+    SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
+            20000, 20500, 0f, null);
+    sf3.setValue("Parent", "gene:" + geneId);
+    genomic.addSequenceFeature(sf3);
+
+    // transcript for a different gene - ignored
+    SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
+            0f, null);
+    sf4.setValue("Parent", "gene:XYZ");
+    genomic.addSequenceFeature(sf4);
+
+    EnsemblGene testee = new EnsemblGene();
+    List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
+            genomic);
+    assertEquals(3, features.size());
+    assertSame(sf1, features.get(0));
+    assertSame(sf2, features.get(1));
+    assertSame(sf3, features.get(2));
+  }
+
+  /**
+   * Test the method that retains features except for 'gene', or 'transcript'
+   * with parent other than the given id
+   */
+  @Test(groups = "Functional")
+  public void testRetainFeature()
+  {
+    String geneId = "ABC123";
+    EnsemblGene testee = new EnsemblGene();
+    SequenceFeature sf = new SequenceFeature("gene", "", 20000,
+            20500, 0f, null);
+    sf.setValue("ID", "gene:" + geneId);
+    assertFalse(testee.retainFeature(sf, geneId));
+
+    sf.setType("transcript");
+    sf.setValue("Parent", "gene:" + geneId);
+    assertTrue(testee.retainFeature(sf, geneId));
+
+    sf.setType("mature_transcript");
+    sf.setValue("Parent", "gene:" + geneId);
+    assertTrue(testee.retainFeature(sf, geneId));
+
+    sf.setType("NMD_transcript_variant");
+    sf.setValue("Parent", "gene:" + geneId);
+    assertTrue(testee.retainFeature(sf, geneId));
+
+    sf.setValue("Parent", "gene:XYZ");
+    assertFalse(testee.retainFeature(sf, geneId));
+
+    sf.setType("anything");
+    assertTrue(testee.retainFeature(sf, geneId));
+  }
+
+  /**
+   * Test the method that picks out 'gene' (or subtype) features with the
+   * accession id as ID
+   */
+  @Test(groups = "Functional")
+  public void testIdentifiesSequence()
+  {
+    String accId = "ABC123";
+    EnsemblGene testee = new EnsemblGene();
+  
+    // gene with no ID not valid
+    SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // gene with wrong ID not valid
+    sf.setValue("ID", "gene:XYZ");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // gene with right ID is valid
+    sf.setValue("ID", "gene:" + accId);
+    assertTrue(testee.identifiesSequence(sf, accId));
+  
+    // gene sub-type with right ID is valid
+    sf.setType("snRNA_gene");
+    assertTrue(testee.identifiesSequence(sf, accId));
+  
+    // transcript not valid:
+    sf.setType("transcript");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // exon not valid:
+    sf.setType("exon");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  }
+}
diff --git a/test/jalview/ext/ensembl/EnsemblGenomeTest.java b/test/jalview/ext/ensembl/EnsemblGenomeTest.java
new file mode 100644 (file)
index 0000000..daad8b1
--- /dev/null
@@ -0,0 +1,170 @@
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblGenomeTest
+{
+  @BeforeClass
+  public void setUp()
+  {
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+  }
+
+  @AfterClass
+  public void tearDown()
+  {
+    SequenceOntologyFactory.setInstance(null);
+  }
+
+  /**
+   * Test that the genomic sequence part of genomic sequence is correctly
+   * identified by 'transcript' features (or subtypes) with the correct gene ID
+   */
+  @Test(groups = "Functional")
+  public void testGetGenomicRangesFromFeatures()
+  {
+    EnsemblGenome testee = new EnsemblGenome();
+    SequenceI genomic = new SequenceDummy("chr7");
+    genomic.setStart(10000);
+    genomic.setEnd(50000);
+    String transcriptId = "ABC123";
+  
+    // transcript at (start+10000) length 501
+    SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
+            20500, 0f,
+            null);
+    sf.setValue("ID", "transcript:" + transcriptId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+  
+    // transcript (sub-type) at (start + 10500) length 101
+    sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null);
+    sf.setValue("ID", "transcript:" + transcriptId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+
+    // Ensembl treats NMD_transcript_variant as if transcript
+    // although strictly it is a sequence_variant in SO
+    sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000,
+            0f, null);
+    sf.setValue("ID", "transcript:" + transcriptId);
+    sf.setStrand("+");
+    genomic.addSequenceFeature(sf);
+  
+    // transcript with a different ID doesn't count
+    sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null);
+    sf.setValue("ID", "transcript:anotherOne");
+    genomic.addSequenceFeature(sf);
+  
+    // parent of transcript feature doesn't count
+    sf = new SequenceFeature("gene_member_region", "", 10000, 50000, 0f,
+            null);
+    genomic.addSequenceFeature(sf);
+
+    MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
+            transcriptId, 23);
+    List<int[]> fromRanges = ranges.getFromRanges();
+    assertEquals(3, fromRanges.size());
+    // from ranges should be sorted by start order
+    assertEquals(10500, fromRanges.get(0)[0]);
+    assertEquals(10600, fromRanges.get(0)[1]);
+    assertEquals(11000, fromRanges.get(1)[0]);
+    assertEquals(12000, fromRanges.get(1)[1]);
+    assertEquals(20000, fromRanges.get(2)[0]);
+    assertEquals(20500, fromRanges.get(2)[1]);
+    // to range should start from given start numbering
+    List<int[]> toRanges = ranges.getToRanges();
+    assertEquals(1, toRanges.size());
+    assertEquals(23, toRanges.get(0)[0]);
+    assertEquals(1625, toRanges.get(0)[1]);
+  }
+
+  /**
+   * Test the method that retains features except for 'transcript' (or
+   * sub-type), or those with parent other than the given id
+   */
+  @Test(groups = "Functional")
+  public void testRetainFeature()
+  {
+    String accId = "ABC123";
+    EnsemblGenome testee = new EnsemblGenome();
+
+    SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
+            20500, 0f, null);
+    assertFalse(testee.retainFeature(sf, accId));
+  
+    sf.setType("mature_transcript");
+    assertFalse(testee.retainFeature(sf, accId));
+  
+    sf.setType("NMD_transcript_variant");
+    assertFalse(testee.retainFeature(sf, accId));
+  
+    // other feature with no parent is kept
+    sf.setType("anything");
+    assertTrue(testee.retainFeature(sf, accId));
+
+    // other feature with correct parent is kept
+    sf.setValue("Parent", "transcript:" + accId);
+    assertTrue(testee.retainFeature(sf, accId));
+
+    // other feature with wrong parent is not kept
+    sf.setValue("Parent", "transcript:XYZ");
+    assertFalse(testee.retainFeature(sf, accId));
+  }
+
+  /**
+   * Test the method that picks out 'transcript' (or subtype) features with the
+   * accession id as ID
+   */
+  @Test(groups = "Functional")
+  public void testIdentifiesSequence()
+  {
+    String accId = "ABC123";
+    EnsemblGenome testee = new EnsemblGenome();
+  
+    // transcript with no ID not valid
+    SequenceFeature sf = new SequenceFeature("transcript", "", 1, 2, 0f,
+            null);
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // transcript with wrong ID not valid
+    sf.setValue("ID", "transcript");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // transcript with right ID is valid
+    sf.setValue("ID", "transcript:" + accId);
+    assertTrue(testee.identifiesSequence(sf, accId));
+  
+    // transcript sub-type with right ID is valid
+    sf.setType("ncRNA");
+    assertTrue(testee.identifiesSequence(sf, accId));
+
+    // Ensembl treats NMD_transcript_variant as if a transcript
+    sf.setType("NMD_transcript_variant");
+    assertTrue(testee.identifiesSequence(sf, accId));
+  
+    // gene not valid:
+    sf.setType("gene");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  
+    // exon not valid:
+    sf.setType("exon");
+    assertFalse(testee.identifiesSequence(sf, accId));
+  }
+
+}
index 31745e5..ed936d5 100644 (file)
@@ -21,6 +21,8 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
@@ -106,6 +108,18 @@ public class EnsemblSeqProxyTest
                   + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
                   + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
 
+  @BeforeClass
+  public void setUp()
+  {
+    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+  }
+
+  @AfterClass
+  public void tearDown()
+  {
+    SequenceOntologyFactory.setInstance(null);
+  }
+
   @DataProvider(name = "queries")
   public Object[][] createQueryData(Method m)
   {
@@ -306,10 +320,9 @@ public class EnsemblSeqProxyTest
     sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
     ds.addSequenceFeature(sf);
     // CDS for dna 10-12
-    sf = new SequenceFeature("some_cds", "", 10, 12, 0f, null);
+    sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
     ds.addSequenceFeature(sf);
 
-    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
     List<int[]> ranges = new ArrayList<int[]>();
     int mappedLength = testee.getCdsRanges(dnaSeq, ranges);
     assertEquals(6, mappedLength);
@@ -346,10 +359,9 @@ public class EnsemblSeqProxyTest
     ds.addSequenceFeature(sf);
     ds.addSequenceFeature(sf);
     // CDS for dna 13-15
-    sf = new SequenceFeature("some_cds", "", 13, 15, 0f, null);
+    sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
     ds.addSequenceFeature(sf);
   
-    SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
     List<int[]> ranges = new ArrayList<int[]>();
     int mappedLength = testee.getCdsRanges(dnaSeq, ranges);