1 package jalview.ext.ensembl;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
15 import java.util.List;
17 import org.testng.Assert;
18 import org.testng.annotations.AfterClass;
19 import org.testng.annotations.BeforeClass;
20 import org.testng.annotations.Test;
22 public class EnsemblCdnaTest
27 SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
31 public void tearDown()
33 SequenceOntologyFactory.setInstance(null);
36 * Test that the cdna part of genomic sequence is correctly identified by
37 * 'exon' features (or subtypes) - reverse strand case.
39 @Test(groups = "Functional")
40 public void testGetGenomicRangesFromFeatures_reverseStrand()
42 EnsemblCdna testee = new EnsemblCdna();
43 SequenceI genomic = new SequenceDummy("chr7");
44 genomic.setStart(10000);
45 genomic.setEnd(50000);
46 String transcriptId = "ABC123";
48 // exon at (start+10000) length 501
49 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
51 sf.setValue("Parent", "transcript:" + transcriptId);
53 genomic.addSequenceFeature(sf);
55 // exon (sub-type) at (start + exon_variant) length 101
56 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
57 sf.setValue("Parent", "transcript:" + transcriptId);
59 genomic.addSequenceFeature(sf);
61 // exon belonging to a different transcript doesn't count
62 sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
63 sf.setValue("Parent", "transcript:anotherOne");
64 genomic.addSequenceFeature(sf);
66 // transcript feature doesn't count
67 sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
68 genomic.addSequenceFeature(sf);
70 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
72 List<int[]> fromRanges = ranges.getFromRanges();
73 assertEquals(2, fromRanges.size());
76 * from ranges should be sorted by start order descending
77 * and hold mappings from reverse strand sense
79 assertEquals(20500, fromRanges.get(0)[0]);
80 assertEquals(20000, fromRanges.get(0)[1]);
81 assertEquals(10600, fromRanges.get(1)[0]);
82 assertEquals(10500, fromRanges.get(1)[1]);
83 // to range should start from given start numbering
84 List<int[]> toRanges = ranges.getToRanges();
85 assertEquals(1, toRanges.size());
86 assertEquals(23, toRanges.get(0)[0]);
87 assertEquals(624, toRanges.get(0)[1]);
91 * Test that the cdna part of genomic sequence is correctly identified by
92 * 'exon' features (or subtypes) with the desired transcript as parent
94 @Test(groups = "Functional")
95 public void testGetGenomicRangesFromFeatures()
97 EnsemblCdna testee = new EnsemblCdna();
98 SequenceI genomic = new SequenceDummy("chr7");
99 genomic.setStart(10000);
100 genomic.setEnd(50000);
101 String transcriptId = "ABC123";
103 // exon at (start+10000) length 501
104 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
106 sf.setValue("Parent", "transcript:" + transcriptId);
108 genomic.addSequenceFeature(sf);
110 // exon (sub-type) at (start + exon_variant) length 101
111 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
112 sf.setValue("Parent", "transcript:" + transcriptId);
114 genomic.addSequenceFeature(sf);
116 // exon belonging to a different transcript doesn't count
117 sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
118 sf.setValue("Parent", "transcript:anotherOne");
119 genomic.addSequenceFeature(sf);
121 // transcript feature doesn't count
122 sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
123 sf.setStrand("-"); // weird but ignored
124 genomic.addSequenceFeature(sf);
126 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
128 List<int[]> fromRanges = ranges.getFromRanges();
129 assertEquals(2, fromRanges.size());
130 // from ranges should be sorted by start order
131 assertEquals(10500, fromRanges.get(0)[0]);
132 assertEquals(10600, fromRanges.get(0)[1]);
133 assertEquals(20000, fromRanges.get(1)[0]);
134 assertEquals(20500, fromRanges.get(1)[1]);
135 // to range should start from given start numbering
136 List<int[]> toRanges = ranges.getToRanges();
137 assertEquals(1, toRanges.size());
138 assertEquals(23, toRanges.get(0)[0]);
139 assertEquals(624, toRanges.get(0)[1]);
143 * The method under test should give up and return null if both forward and
144 * reverse strands are present in the features of interest
146 @Test(groups = "Functional")
147 public void testGetGenomicRangesFromFeatures_mixedStrand()
149 EnsemblCdna testee = new EnsemblCdna();
150 SequenceI genomic = new SequenceDummy("chr7");
151 genomic.setStart(10000);
152 genomic.setEnd(50000);
153 String transcriptId = "ABC123";
155 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
157 sf.setValue("Parent", "transcript:" + transcriptId);
159 genomic.addSequenceFeature(sf);
161 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
162 sf.setValue("Parent", "transcript:" + transcriptId);
164 genomic.addSequenceFeature(sf);
166 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
172 * Test the method that retains features except for 'transcript' (or
173 * subtypes), or features with parent other than the given id
175 @Test(groups = "Functional")
176 public void testRetainFeature()
178 String accId = "ABC123";
179 EnsemblCdna testee = new EnsemblCdna();
181 SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
183 assertFalse(testee.retainFeature(sf, accId));
185 sf.setType("aberrant_processed_transcript");
186 assertFalse(testee.retainFeature(sf, accId));
188 sf.setType("NMD_transcript_variant");
189 assertFalse(testee.retainFeature(sf, accId));
191 // other feature with no parent is retained
192 sf.setType("sequence_variant");
193 assertTrue(testee.retainFeature(sf, accId));
195 // other feature with desired parent is retained
196 sf.setValue("Parent", "transcript:" + accId);
197 assertTrue(testee.retainFeature(sf, accId));
199 // feature with wrong parent is not retained
200 sf.setValue("Parent", "transcript:XYZ");
201 assertFalse(testee.retainFeature(sf, accId));
205 * Test the method that picks out 'exon' (or subtype) features with the
206 * accession id as parent
208 @Test(groups = "Functional")
209 public void testIdentifiesSequence()
211 String accId = "ABC123";
212 EnsemblCdna testee = new EnsemblCdna();
214 // exon with no parent not valid
215 SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
216 assertFalse(testee.identifiesSequence(sf, accId));
218 // exon with wrong parent not valid
219 sf.setValue("Parent", "transcript:XYZ");
220 assertFalse(testee.identifiesSequence(sf, accId));
222 // exon with right parent is valid
223 sf.setValue("Parent", "transcript:" + accId);
224 assertTrue(testee.identifiesSequence(sf, accId));
226 // exon sub-type with right parent is valid
227 sf.setType("coding_exon");
228 assertTrue(testee.identifiesSequence(sf, accId));
230 // transcript not valid:
231 sf.setType("transcript");
232 assertFalse(testee.identifiesSequence(sf, accId));
236 assertFalse(testee.identifiesSequence(sf, accId));
239 @Test(groups = "Functional")
240 public void testIsValidReference() throws Exception
242 EnsemblSequenceFetcher esq = new EnsemblCdna();
243 Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
244 Assert.assertTrue(esq.isValidReference("ENST00000288602"));
245 Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
246 Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
247 Assert.assertFalse(esq.isValidReference("ENST0000288602"));
248 // non-human species having a 3 character identifier included:
249 Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));