1 package jalview.ext.ensembl;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
15 import java.util.List;
17 import org.testng.Assert;
18 import org.testng.annotations.AfterClass;
19 import org.testng.annotations.BeforeClass;
20 import org.testng.annotations.Test;
22 public class EnsemblCdnaTest
24 @BeforeClass(alwaysRun = true)
27 SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
30 @AfterClass(alwaysRun = true)
31 public void tearDown()
33 SequenceOntologyFactory.setInstance(null);
37 * Test that the cdna part of genomic sequence is correctly identified by
38 * 'exon' features (or subtypes) - reverse strand case.
40 @Test(groups = "Functional")
41 public void testGetGenomicRangesFromFeatures_reverseStrand()
43 EnsemblCdna testee = new EnsemblCdna();
44 SequenceI genomic = new SequenceDummy("chr7");
45 genomic.setStart(10000);
46 genomic.setEnd(50000);
47 String transcriptId = "ABC123";
49 // exon at (start+10000) length 501
50 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
52 sf.setValue("Parent", "transcript:" + transcriptId);
54 genomic.addSequenceFeature(sf);
56 // exon (sub-type) at (start + exon_variant) length 101
57 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
58 sf.setValue("Parent", "transcript:" + transcriptId);
60 genomic.addSequenceFeature(sf);
62 // exon belonging to a different transcript doesn't count
63 sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
64 sf.setValue("Parent", "transcript:anotherOne");
65 genomic.addSequenceFeature(sf);
67 // transcript feature doesn't count
68 sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
69 genomic.addSequenceFeature(sf);
71 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
73 List<int[]> fromRanges = ranges.getFromRanges();
74 assertEquals(2, fromRanges.size());
77 * from ranges should be sorted by start order descending
78 * and hold mappings from reverse strand sense
80 assertEquals(20500, fromRanges.get(0)[0]);
81 assertEquals(20000, fromRanges.get(0)[1]);
82 assertEquals(10600, fromRanges.get(1)[0]);
83 assertEquals(10500, fromRanges.get(1)[1]);
84 // to range should start from given start numbering
85 List<int[]> toRanges = ranges.getToRanges();
86 assertEquals(1, toRanges.size());
87 assertEquals(23, toRanges.get(0)[0]);
88 assertEquals(624, toRanges.get(0)[1]);
92 * Test that the cdna part of genomic sequence is correctly identified by
93 * 'exon' features (or subtypes) with the desired transcript as parent
95 @Test(groups = "Functional")
96 public void testGetGenomicRangesFromFeatures()
98 EnsemblCdna testee = new EnsemblCdna();
99 SequenceI genomic = new SequenceDummy("chr7");
100 genomic.setStart(10000);
101 genomic.setEnd(50000);
102 String transcriptId = "ABC123";
104 // exon at (start+10000) length 501
105 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
107 sf.setValue("Parent", "transcript:" + transcriptId);
109 genomic.addSequenceFeature(sf);
111 // exon (sub-type) at (start + exon_variant) length 101
112 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
113 sf.setValue("Parent", "transcript:" + transcriptId);
115 genomic.addSequenceFeature(sf);
117 // exon belonging to a different transcript doesn't count
118 sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
119 sf.setValue("Parent", "transcript:anotherOne");
120 genomic.addSequenceFeature(sf);
122 // transcript feature doesn't count
123 sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
124 sf.setStrand("-"); // weird but ignored
125 genomic.addSequenceFeature(sf);
127 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
129 List<int[]> fromRanges = ranges.getFromRanges();
130 assertEquals(2, fromRanges.size());
131 // from ranges should be sorted by start order
132 assertEquals(10500, fromRanges.get(0)[0]);
133 assertEquals(10600, fromRanges.get(0)[1]);
134 assertEquals(20000, fromRanges.get(1)[0]);
135 assertEquals(20500, fromRanges.get(1)[1]);
136 // to range should start from given start numbering
137 List<int[]> toRanges = ranges.getToRanges();
138 assertEquals(1, toRanges.size());
139 assertEquals(23, toRanges.get(0)[0]);
140 assertEquals(624, toRanges.get(0)[1]);
144 * The method under test should give up and return null if both forward and
145 * reverse strands are present in the features of interest
147 @Test(groups = "Functional")
148 public void testGetGenomicRangesFromFeatures_mixedStrand()
150 EnsemblCdna testee = new EnsemblCdna();
151 SequenceI genomic = new SequenceDummy("chr7");
152 genomic.setStart(10000);
153 genomic.setEnd(50000);
154 String transcriptId = "ABC123";
156 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
158 sf.setValue("Parent", "transcript:" + transcriptId);
160 genomic.addSequenceFeature(sf);
162 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
163 sf.setValue("Parent", "transcript:" + transcriptId);
165 genomic.addSequenceFeature(sf);
167 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
173 * Test the method that retains features except for 'transcript' (or
174 * subtypes), or features with parent other than the given id
176 @Test(groups = "Functional")
177 public void testRetainFeature()
179 String accId = "ABC123";
180 EnsemblCdna testee = new EnsemblCdna();
182 SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
184 assertFalse(testee.retainFeature(sf, accId));
186 sf.setType("aberrant_processed_transcript");
187 assertFalse(testee.retainFeature(sf, accId));
189 sf.setType("NMD_transcript_variant");
190 assertFalse(testee.retainFeature(sf, accId));
192 // other feature with no parent is retained
193 sf.setType("sequence_variant");
194 assertTrue(testee.retainFeature(sf, accId));
196 // other feature with desired parent is retained
197 sf.setValue("Parent", "transcript:" + accId);
198 assertTrue(testee.retainFeature(sf, accId));
200 // feature with wrong parent is not retained
201 sf.setValue("Parent", "transcript:XYZ");
202 assertFalse(testee.retainFeature(sf, accId));
206 * Test the method that picks out 'exon' (or subtype) features with the
207 * accession id as parent
209 @Test(groups = "Functional")
210 public void testIdentifiesSequence()
212 String accId = "ABC123";
213 EnsemblCdna testee = new EnsemblCdna();
215 // exon with no parent not valid
216 SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
217 assertFalse(testee.identifiesSequence(sf, accId));
219 // exon with wrong parent not valid
220 sf.setValue("Parent", "transcript:XYZ");
221 assertFalse(testee.identifiesSequence(sf, accId));
223 // exon with right parent is valid
224 sf.setValue("Parent", "transcript:" + accId);
225 assertTrue(testee.identifiesSequence(sf, accId));
227 // exon sub-type with right parent is valid
228 sf.setType("coding_exon");
229 assertTrue(testee.identifiesSequence(sf, accId));
231 // transcript not valid:
232 sf.setType("transcript");
233 assertFalse(testee.identifiesSequence(sf, accId));
237 assertFalse(testee.identifiesSequence(sf, accId));
240 @Test(groups = "Functional")
241 public void testIsValidReference() throws Exception
243 EnsemblSequenceFetcher esq = new EnsemblCdna();
244 Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
245 Assert.assertTrue(esq.isValidReference("ENST00000288602"));
246 Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
247 Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
248 Assert.assertFalse(esq.isValidReference("ENST0000288602"));
249 // non-human species having a 3 character identifier included:
250 Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));