--- /dev/null
+package jalview.ext.ensembl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
+import jalview.util.MapList;
+
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class EnsemblGeneTest
+{
+ @BeforeClass
+ public void setUp()
+ {
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ }
+
+ @AfterClass
+ public void tearDown()
+ {
+ SequenceOntologyFactory.setInstance(null);
+ }
+
+ /**
+ * Test that the gene part of genomic sequence is uniquely identified by a
+ * 'gene' features (or subtype) with the correct gene ID
+ */
+ @Test(groups = "Functional")
+ public void testGetGenomicRangesFromFeatures()
+ {
+ EnsemblGene testee = new EnsemblGene();
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String geneId = "ABC123";
+
+ // gene at (start+10000) length 501
+ SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
+ null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ // gene at (start + 10500) length 101
+ // should be ignored - the first 'gene' found defines the whole range
+ sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
+ 23);
+ List<int[]> fromRanges = ranges.getFromRanges();
+ assertEquals(1, fromRanges.size());
+ assertEquals(20000, fromRanges.get(0)[0]);
+ assertEquals(20500, fromRanges.get(0)[1]);
+ // to range should start from given start numbering
+ List<int[]> toRanges = ranges.getToRanges();
+ assertEquals(1, toRanges.size());
+ assertEquals(23, toRanges.get(0)[0]);
+ assertEquals(523, toRanges.get(0)[1]);
+ }
+
+ /**
+ * Test variant using a sub-type of gene from the Sequence Ontology
+ */
+ @Test(groups = "Functional")
+ public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
+ {
+ EnsemblGene testee = new EnsemblGene();
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String geneId = "ABC123";
+
+ // gene at (start+10000) length 501
+ SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
+ 20500, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("-");
+ genomic.addSequenceFeature(sf);
+
+ // gene at (start + 10500) length 101
+ // should be ignored - the first 'gene' found defines the whole range
+ // (real data would only have one such feature)
+ sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ sf.setStrand("+");
+ genomic.addSequenceFeature(sf);
+
+ MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
+ 23);
+ List<int[]> fromRanges = ranges.getFromRanges();
+ assertEquals(1, fromRanges.size());
+ // from range on reverse strand:
+ assertEquals(20500, fromRanges.get(0)[0]);
+ assertEquals(20000, fromRanges.get(0)[1]);
+ // to range should start from given start numbering
+ List<int[]> toRanges = ranges.getToRanges();
+ assertEquals(1, toRanges.size());
+ assertEquals(23, toRanges.get(0)[0]);
+ assertEquals(523, toRanges.get(0)[1]);
+ }
+
+ /**
+ * Test the method that extracts transcript (or subtype) features with a
+ * specified gene as parent
+ */
+ @Test(groups = "Functional")
+ public void testGetTranscriptFeatures()
+ {
+ SequenceI genomic = new SequenceDummy("chr7");
+ genomic.setStart(10000);
+ genomic.setEnd(50000);
+ String geneId = "ABC123";
+
+ // transcript feature
+ SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
+ 20500, 0f, null);
+ sf1.setValue("Parent", "gene:" + geneId);
+ genomic.addSequenceFeature(sf1);
+
+ // transcript sub-type feature
+ SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
+ 20500, 0f, null);
+ sf2.setValue("Parent", "gene:" + geneId);
+ genomic.addSequenceFeature(sf2);
+
+ // NMD_transcript_variant treated like transcript in Ensembl
+ SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
+ 20000, 20500, 0f, null);
+ sf3.setValue("Parent", "gene:" + geneId);
+ genomic.addSequenceFeature(sf3);
+
+ // transcript for a different gene - ignored
+ SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
+ 0f, null);
+ sf4.setValue("Parent", "gene:XYZ");
+ genomic.addSequenceFeature(sf4);
+
+ EnsemblGene testee = new EnsemblGene();
+ List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
+ genomic);
+ assertEquals(3, features.size());
+ assertSame(sf1, features.get(0));
+ assertSame(sf2, features.get(1));
+ assertSame(sf3, features.get(2));
+ }
+
+ /**
+ * Test the method that retains features except for 'gene', or 'transcript'
+ * with parent other than the given id
+ */
+ @Test(groups = "Functional")
+ public void testRetainFeature()
+ {
+ String geneId = "ABC123";
+ EnsemblGene testee = new EnsemblGene();
+ SequenceFeature sf = new SequenceFeature("gene", "", 20000,
+ 20500, 0f, null);
+ sf.setValue("ID", "gene:" + geneId);
+ assertFalse(testee.retainFeature(sf, geneId));
+
+ sf.setType("transcript");
+ sf.setValue("Parent", "gene:" + geneId);
+ assertTrue(testee.retainFeature(sf, geneId));
+
+ sf.setType("mature_transcript");
+ sf.setValue("Parent", "gene:" + geneId);
+ assertTrue(testee.retainFeature(sf, geneId));
+
+ sf.setType("NMD_transcript_variant");
+ sf.setValue("Parent", "gene:" + geneId);
+ assertTrue(testee.retainFeature(sf, geneId));
+
+ sf.setValue("Parent", "gene:XYZ");
+ assertFalse(testee.retainFeature(sf, geneId));
+
+ sf.setType("anything");
+ assertTrue(testee.retainFeature(sf, geneId));
+ }
+
+ /**
+ * Test the method that picks out 'gene' (or subtype) features with the
+ * accession id as ID
+ */
+ @Test(groups = "Functional")
+ public void testIdentifiesSequence()
+ {
+ String accId = "ABC123";
+ EnsemblGene testee = new EnsemblGene();
+
+ // gene with no ID not valid
+ SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // gene with wrong ID not valid
+ sf.setValue("ID", "gene:XYZ");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // gene with right ID is valid
+ sf.setValue("ID", "gene:" + accId);
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // gene sub-type with right ID is valid
+ sf.setType("snRNA_gene");
+ assertTrue(testee.identifiesSequence(sf, accId));
+
+ // transcript not valid:
+ sf.setType("transcript");
+ assertFalse(testee.identifiesSequence(sf, accId));
+
+ // exon not valid:
+ sf.setType("exon");
+ assertFalse(testee.identifiesSequence(sf, accId));
+ }
+}