JAL-3076 refactor for more efficient scan of 'gene' features
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertTrue;
27
28 import jalview.datamodel.Sequence;
29 import jalview.datamodel.SequenceDummy;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceI;
32 import jalview.gui.JvOptionPane;
33 import jalview.io.gff.SequenceOntologyFactory;
34 import jalview.io.gff.SequenceOntologyLite;
35 import jalview.util.MapList;
36
37 import java.util.List;
38
39 import org.testng.Assert;
40 import org.testng.annotations.AfterClass;
41 import org.testng.annotations.BeforeClass;
42 import org.testng.annotations.Test;
43
44 public class EnsemblCdnaTest
45 {
46
47   @BeforeClass(alwaysRun = true)
48   public void setUpJvOptionPane()
49   {
50     JvOptionPane.setInteractiveMode(false);
51     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
52   }
53
54   @BeforeClass(alwaysRun = true)
55   public void setUp()
56   {
57     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
58   }
59
60   @AfterClass(alwaysRun = true)
61   public void tearDown()
62   {
63     SequenceOntologyFactory.setInstance(null);
64   }
65
66   /**
67    * Test that the cdna part of genomic sequence is correctly identified by
68    * 'exon' features (or subtypes) - reverse strand case.
69    */
70   @Test(groups = "Functional")
71   public void testGetGenomicRangesFromFeatures_reverseStrand()
72   {
73     EnsemblCdna testee = new EnsemblCdna();
74     SequenceI genomic = new SequenceDummy("chr7");
75     genomic.setStart(10000);
76     genomic.setEnd(50000);
77     String transcriptId = "ABC123";
78
79     // exon at (start+10000) length 501
80     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
81             null);
82     sf.setValue("Parent", "transcript:" + transcriptId);
83     sf.setStrand("-");
84     genomic.addSequenceFeature(sf);
85
86     // exon (sub-type) at (start + exon_variant) length 101
87     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
88     sf.setValue("Parent", "transcript:" + transcriptId);
89     sf.setStrand("-");
90     genomic.addSequenceFeature(sf);
91
92     // exon belonging to a different transcript doesn't count
93     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
94     sf.setValue("Parent", "transcript:anotherOne");
95     genomic.addSequenceFeature(sf);
96
97     // transcript feature doesn't count
98     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
99     genomic.addSequenceFeature(sf);
100
101     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
102             transcriptId, 23);
103     List<int[]> fromRanges = ranges.getFromRanges();
104     assertEquals(2, fromRanges.size());
105
106     /*
107      * from ranges should be sorted by start order descending
108      * and hold mappings from reverse strand sense
109      */
110     assertEquals(20500, fromRanges.get(0)[0]);
111     assertEquals(20000, fromRanges.get(0)[1]);
112     assertEquals(10600, fromRanges.get(1)[0]);
113     assertEquals(10500, fromRanges.get(1)[1]);
114     // to range should start from given start numbering
115     List<int[]> toRanges = ranges.getToRanges();
116     assertEquals(1, toRanges.size());
117     assertEquals(23, toRanges.get(0)[0]);
118     assertEquals(624, toRanges.get(0)[1]);
119   }
120
121   /**
122    * Test that the cdna part of genomic sequence is correctly identified by
123    * 'exon' features (or subtypes) with the desired transcript as parent
124    */
125   @Test(groups = "Functional")
126   public void testGetGenomicRangesFromFeatures()
127   {
128     EnsemblCdna testee = new EnsemblCdna();
129     SequenceI genomic = new SequenceDummy("chr7");
130     genomic.setStart(10000);
131     genomic.setEnd(50000);
132     String transcriptId = "ABC123";
133
134     // exon at (start+10000) length 501
135     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
136             null);
137     sf.setValue("Parent", "transcript:" + transcriptId);
138     sf.setStrand("+");
139     genomic.addSequenceFeature(sf);
140
141     // exon (sub-type) at (start + exon_variant) length 101
142     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
143     sf.setValue("Parent", "transcript:" + transcriptId);
144     sf.setStrand("+");
145     genomic.addSequenceFeature(sf);
146
147     // exon belonging to a different transcript doesn't count
148     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
149     sf.setValue("Parent", "transcript:anotherOne");
150     genomic.addSequenceFeature(sf);
151
152     // transcript feature doesn't count
153     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
154     sf.setStrand("-"); // weird but ignored
155     genomic.addSequenceFeature(sf);
156
157     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
158             transcriptId, 23);
159     List<int[]> fromRanges = ranges.getFromRanges();
160     assertEquals(2, fromRanges.size());
161     // from ranges should be sorted by start order
162     assertEquals(10500, fromRanges.get(0)[0]);
163     assertEquals(10600, fromRanges.get(0)[1]);
164     assertEquals(20000, fromRanges.get(1)[0]);
165     assertEquals(20500, fromRanges.get(1)[1]);
166     // to range should start from given start numbering
167     List<int[]> toRanges = ranges.getToRanges();
168     assertEquals(1, toRanges.size());
169     assertEquals(23, toRanges.get(0)[0]);
170     assertEquals(624, toRanges.get(0)[1]);
171   }
172
173   /**
174    * The method under test should give up and return null if both forward and
175    * reverse strands are present in the features of interest
176    */
177   @Test(groups = "Functional")
178   public void testGetGenomicRangesFromFeatures_mixedStrand()
179   {
180     EnsemblCdna testee = new EnsemblCdna();
181     SequenceI genomic = new SequenceDummy("chr7");
182     genomic.setStart(10000);
183     genomic.setEnd(50000);
184     String transcriptId = "ABC123";
185
186     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
187             null);
188     sf.setValue("Parent", "transcript:" + transcriptId);
189     sf.setStrand("-");
190     genomic.addSequenceFeature(sf);
191
192     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
193     sf.setValue("Parent", "transcript:" + transcriptId);
194     sf.setStrand("+");
195     genomic.addSequenceFeature(sf);
196
197     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
198             transcriptId, 23);
199     assertNull(ranges);
200   }
201
202   /**
203    * Test the method that retains features except for 'transcript' (or
204    * subtypes), or features with parent other than the given id
205    */
206   @Test(groups = "Functional")
207   public void testRetainFeature()
208   {
209     String accId = "ABC123";
210     EnsemblCdna testee = new EnsemblCdna();
211
212     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
213             20500, 0f, null);
214     assertFalse(testee.retainFeature(sf, accId));
215
216     sf = new SequenceFeature("aberrant_processed_transcript", "", 20000,
217             20500, 0f, null);
218     assertFalse(testee.retainFeature(sf, accId));
219
220     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
221             0f, null);
222     assertFalse(testee.retainFeature(sf, accId));
223
224     // other feature with no parent is retained
225     sf = new SequenceFeature("sequence_variant", "", 20000, 20500, 0f, null);
226     assertTrue(testee.retainFeature(sf, accId));
227
228     // other feature with desired parent is retained
229     sf.setValue("Parent", "transcript:" + accId);
230     assertTrue(testee.retainFeature(sf, accId));
231
232     // test is not case-sensitive
233     assertTrue(testee.retainFeature(sf, accId.toLowerCase()));
234
235     // feature with wrong parent is not retained
236     sf.setValue("Parent", "transcript:XYZ");
237     assertFalse(testee.retainFeature(sf, accId));
238   }
239
240   /**
241    * Test the method that picks out 'exon' (or subtype) features with the
242    * accession id as parent
243    */
244   @Test(groups = "Functional")
245   public void testGetIdentifyingFeatures()
246   {
247     String accId = "ABC123";
248     SequenceI seq = new Sequence(accId, "MKLNFRQIE");
249
250     // exon with no parent: not valid
251     SequenceFeature sf1 = new SequenceFeature("exon", "", 1, 2, 0f, null);
252     seq.addSequenceFeature(sf1);
253
254     // exon with wrong parent: not valid
255     SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
256     sf2.setValue("Parent", "transcript:XYZ");
257     seq.addSequenceFeature(sf2);
258
259     // exon with right parent is valid
260     SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
261     sf3.setValue("Parent", "transcript:" + accId);
262     seq.addSequenceFeature(sf3);
263
264     // exon sub-type with right parent is valid
265     SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
266             null);
267     sf4.setValue("Parent", "transcript:" + accId);
268     seq.addSequenceFeature(sf4);
269
270     // transcript not valid:
271     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
272             null);
273     sf5.setValue("Parent", "transcript:" + accId);
274     seq.addSequenceFeature(sf5);
275
276     // CDS not valid:
277     SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
278             null);
279     sf6.setValue("Parent", "transcript:" + accId);
280     seq.addSequenceFeature(sf6);
281
282     List<SequenceFeature> sfs = new EnsemblCdna()
283             .getIdentifyingFeatures(seq, accId);
284     assertFalse(sfs.contains(sf1));
285     assertFalse(sfs.contains(sf2));
286     assertTrue(sfs.contains(sf3));
287     assertTrue(sfs.contains(sf4));
288     assertFalse(sfs.contains(sf5));
289     assertFalse(sfs.contains(sf6));
290   }
291
292   @Test(groups = "Functional")
293   public void testIsValidReference() throws Exception
294   {
295     EnsemblSequenceFetcher esq = new EnsemblCdna();
296     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
297     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
298     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
299     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
300     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
301     // non-human species having a 3 character identifier included:
302     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
303   }
304 }