2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.ensembl;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertTrue;
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.io.gff.SequenceOntologyFactory;
32 import jalview.io.gff.SequenceOntologyLite;
33 import jalview.util.MapList;
35 import java.util.List;
37 import org.testng.Assert;
38 import org.testng.annotations.AfterClass;
39 import org.testng.annotations.BeforeClass;
40 import org.testng.annotations.Test;
42 public class EnsemblCdnaTest
44 @BeforeClass(alwaysRun = true)
47 SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
50 @AfterClass(alwaysRun = true)
51 public void tearDown()
53 SequenceOntologyFactory.setInstance(null);
57 * Test that the cdna part of genomic sequence is correctly identified by
58 * 'exon' features (or subtypes) - reverse strand case.
60 @Test(groups = "Functional")
61 public void testGetGenomicRangesFromFeatures_reverseStrand()
63 EnsemblCdna testee = new EnsemblCdna();
64 SequenceI genomic = new SequenceDummy("chr7");
65 genomic.setStart(10000);
66 genomic.setEnd(50000);
67 String transcriptId = "ABC123";
69 // exon at (start+10000) length 501
70 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
72 sf.setValue("Parent", "transcript:" + transcriptId);
74 genomic.addSequenceFeature(sf);
76 // exon (sub-type) at (start + exon_variant) length 101
77 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
78 sf.setValue("Parent", "transcript:" + transcriptId);
80 genomic.addSequenceFeature(sf);
82 // exon belonging to a different transcript doesn't count
83 sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
84 sf.setValue("Parent", "transcript:anotherOne");
85 genomic.addSequenceFeature(sf);
87 // transcript feature doesn't count
88 sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
89 genomic.addSequenceFeature(sf);
91 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
93 List<int[]> fromRanges = ranges.getFromRanges();
94 assertEquals(2, fromRanges.size());
97 * from ranges should be sorted by start order descending
98 * and hold mappings from reverse strand sense
100 assertEquals(20500, fromRanges.get(0)[0]);
101 assertEquals(20000, fromRanges.get(0)[1]);
102 assertEquals(10600, fromRanges.get(1)[0]);
103 assertEquals(10500, fromRanges.get(1)[1]);
104 // to range should start from given start numbering
105 List<int[]> toRanges = ranges.getToRanges();
106 assertEquals(1, toRanges.size());
107 assertEquals(23, toRanges.get(0)[0]);
108 assertEquals(624, toRanges.get(0)[1]);
112 * Test that the cdna part of genomic sequence is correctly identified by
113 * 'exon' features (or subtypes) with the desired transcript as parent
115 @Test(groups = "Functional")
116 public void testGetGenomicRangesFromFeatures()
118 EnsemblCdna testee = new EnsemblCdna();
119 SequenceI genomic = new SequenceDummy("chr7");
120 genomic.setStart(10000);
121 genomic.setEnd(50000);
122 String transcriptId = "ABC123";
124 // exon at (start+10000) length 501
125 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
127 sf.setValue("Parent", "transcript:" + transcriptId);
129 genomic.addSequenceFeature(sf);
131 // exon (sub-type) at (start + exon_variant) length 101
132 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
133 sf.setValue("Parent", "transcript:" + transcriptId);
135 genomic.addSequenceFeature(sf);
137 // exon belonging to a different transcript doesn't count
138 sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
139 sf.setValue("Parent", "transcript:anotherOne");
140 genomic.addSequenceFeature(sf);
142 // transcript feature doesn't count
143 sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
144 sf.setStrand("-"); // weird but ignored
145 genomic.addSequenceFeature(sf);
147 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
149 List<int[]> fromRanges = ranges.getFromRanges();
150 assertEquals(2, fromRanges.size());
151 // from ranges should be sorted by start order
152 assertEquals(10500, fromRanges.get(0)[0]);
153 assertEquals(10600, fromRanges.get(0)[1]);
154 assertEquals(20000, fromRanges.get(1)[0]);
155 assertEquals(20500, fromRanges.get(1)[1]);
156 // to range should start from given start numbering
157 List<int[]> toRanges = ranges.getToRanges();
158 assertEquals(1, toRanges.size());
159 assertEquals(23, toRanges.get(0)[0]);
160 assertEquals(624, toRanges.get(0)[1]);
164 * The method under test should give up and return null if both forward and
165 * reverse strands are present in the features of interest
167 @Test(groups = "Functional")
168 public void testGetGenomicRangesFromFeatures_mixedStrand()
170 EnsemblCdna testee = new EnsemblCdna();
171 SequenceI genomic = new SequenceDummy("chr7");
172 genomic.setStart(10000);
173 genomic.setEnd(50000);
174 String transcriptId = "ABC123";
176 SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
178 sf.setValue("Parent", "transcript:" + transcriptId);
180 genomic.addSequenceFeature(sf);
182 sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
183 sf.setValue("Parent", "transcript:" + transcriptId);
185 genomic.addSequenceFeature(sf);
187 MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
193 * Test the method that retains features except for 'transcript' (or
194 * subtypes), or features with parent other than the given id
196 @Test(groups = "Functional")
197 public void testRetainFeature()
199 String accId = "ABC123";
200 EnsemblCdna testee = new EnsemblCdna();
202 SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
204 assertFalse(testee.retainFeature(sf, accId));
206 sf.setType("aberrant_processed_transcript");
207 assertFalse(testee.retainFeature(sf, accId));
209 sf.setType("NMD_transcript_variant");
210 assertFalse(testee.retainFeature(sf, accId));
212 // other feature with no parent is retained
213 sf.setType("sequence_variant");
214 assertTrue(testee.retainFeature(sf, accId));
216 // other feature with desired parent is retained
217 sf.setValue("Parent", "transcript:" + accId);
218 assertTrue(testee.retainFeature(sf, accId));
220 // feature with wrong parent is not retained
221 sf.setValue("Parent", "transcript:XYZ");
222 assertFalse(testee.retainFeature(sf, accId));
226 * Test the method that picks out 'exon' (or subtype) features with the
227 * accession id as parent
229 @Test(groups = "Functional")
230 public void testIdentifiesSequence()
232 String accId = "ABC123";
233 EnsemblCdna testee = new EnsemblCdna();
235 // exon with no parent not valid
236 SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
237 assertFalse(testee.identifiesSequence(sf, accId));
239 // exon with wrong parent not valid
240 sf.setValue("Parent", "transcript:XYZ");
241 assertFalse(testee.identifiesSequence(sf, accId));
243 // exon with right parent is valid
244 sf.setValue("Parent", "transcript:" + accId);
245 assertTrue(testee.identifiesSequence(sf, accId));
247 // exon sub-type with right parent is valid
248 sf.setType("coding_exon");
249 assertTrue(testee.identifiesSequence(sf, accId));
251 // transcript not valid:
252 sf.setType("transcript");
253 assertFalse(testee.identifiesSequence(sf, accId));
257 assertFalse(testee.identifiesSequence(sf, accId));
260 @Test(groups = "Functional")
261 public void testIsValidReference() throws Exception
263 EnsemblSequenceFetcher esq = new EnsemblCdna();
264 Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
265 Assert.assertTrue(esq.isValidReference("ENST00000288602"));
266 Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
267 Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
268 Assert.assertFalse(esq.isValidReference("ENST0000288602"));
269 // non-human species having a 3 character identifier included:
270 Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));