c927f0464629147d245ef2dd572d4d2d1130574a
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import java.util.Locale;
24
25 import static org.testng.AssertJUnit.assertEquals;
26 import static org.testng.AssertJUnit.assertFalse;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertTrue;
29
30 import jalview.datamodel.Sequence;
31 import jalview.datamodel.SequenceDummy;
32 import jalview.datamodel.SequenceFeature;
33 import jalview.datamodel.SequenceI;
34 import jalview.gui.JvOptionPane;
35 import jalview.io.gff.SequenceOntologyFactory;
36 import jalview.io.gff.SequenceOntologyLite;
37 import jalview.util.MapList;
38
39 import java.util.List;
40
41 import org.testng.Assert;
42 import org.testng.annotations.AfterClass;
43 import org.testng.annotations.BeforeClass;
44 import org.testng.annotations.Test;
45
46 public class EnsemblCdnaTest
47 {
48
49   @BeforeClass(alwaysRun = true)
50   public void setUpJvOptionPane()
51   {
52     JvOptionPane.setInteractiveMode(false);
53     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
54   }
55
56   @BeforeClass(alwaysRun = true)
57   public void setUp()
58   {
59     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
60   }
61
62   @AfterClass(alwaysRun = true)
63   public void tearDown()
64   {
65     SequenceOntologyFactory.setInstance(null);
66   }
67
68   /**
69    * Test that the cdna part of genomic sequence is correctly identified by
70    * 'exon' features (or subtypes) - reverse strand case.
71    */
72   @Test(groups = "Functional")
73   public void testGetGenomicRangesFromFeatures_reverseStrand()
74   {
75     EnsemblCdna testee = new EnsemblCdna();
76     SequenceI genomic = new SequenceDummy("chr7");
77     genomic.setStart(10000);
78     genomic.setEnd(50000);
79     String transcriptId = "ABC123";
80
81     // exon at (start+10000) length 501
82     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
83             null);
84     sf.setValue("Parent", transcriptId);
85     sf.setStrand("-");
86     genomic.addSequenceFeature(sf);
87
88     // exon (sub-type) at (start + exon_variant) length 101
89     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
90     sf.setValue("Parent", transcriptId);
91     sf.setStrand("-");
92     genomic.addSequenceFeature(sf);
93
94     // exon belonging to a different transcript doesn't count
95     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
96     sf.setValue("Parent", "anotherOne");
97     genomic.addSequenceFeature(sf);
98
99     // transcript feature doesn't count
100     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
101     genomic.addSequenceFeature(sf);
102
103     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
104             transcriptId, 23);
105     List<int[]> fromRanges = ranges.getFromRanges();
106     assertEquals(2, fromRanges.size());
107
108     /*
109      * from ranges should be sorted by start order descending
110      * and hold mappings from reverse strand sense
111      */
112     assertEquals(20500, fromRanges.get(0)[0]);
113     assertEquals(20000, fromRanges.get(0)[1]);
114     assertEquals(10600, fromRanges.get(1)[0]);
115     assertEquals(10500, fromRanges.get(1)[1]);
116     // to range should start from given start numbering
117     List<int[]> toRanges = ranges.getToRanges();
118     assertEquals(1, toRanges.size());
119     assertEquals(23, toRanges.get(0)[0]);
120     assertEquals(624, toRanges.get(0)[1]);
121   }
122
123   /**
124    * Test that the cdna part of genomic sequence is correctly identified by
125    * 'exon' features (or subtypes) with the desired transcript as parent
126    */
127   @Test(groups = "Functional")
128   public void testGetGenomicRangesFromFeatures()
129   {
130     EnsemblCdna testee = new EnsemblCdna();
131     SequenceI genomic = new SequenceDummy("chr7");
132     genomic.setStart(10000);
133     genomic.setEnd(50000);
134     String transcriptId = "ABC123";
135
136     // exon at (start+10000) length 501
137     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
138             null);
139     sf.setValue("Parent", transcriptId);
140     sf.setStrand("+");
141     genomic.addSequenceFeature(sf);
142
143     // exon (sub-type) at (start + exon_variant) length 101
144     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
145     sf.setValue("Parent", transcriptId);
146     sf.setStrand("+");
147     genomic.addSequenceFeature(sf);
148
149     // exon belonging to a different transcript doesn't count
150     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
151     sf.setValue("Parent", "anotherOne");
152     genomic.addSequenceFeature(sf);
153
154     // transcript feature doesn't count
155     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
156     sf.setStrand("-"); // weird but ignored
157     genomic.addSequenceFeature(sf);
158
159     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
160             transcriptId, 23);
161     List<int[]> fromRanges = ranges.getFromRanges();
162     assertEquals(2, fromRanges.size());
163     // from ranges should be sorted by start order
164     assertEquals(10500, fromRanges.get(0)[0]);
165     assertEquals(10600, fromRanges.get(0)[1]);
166     assertEquals(20000, fromRanges.get(1)[0]);
167     assertEquals(20500, fromRanges.get(1)[1]);
168     // to range should start from given start numbering
169     List<int[]> toRanges = ranges.getToRanges();
170     assertEquals(1, toRanges.size());
171     assertEquals(23, toRanges.get(0)[0]);
172     assertEquals(624, toRanges.get(0)[1]);
173   }
174
175   /**
176    * The method under test should give up and return null if both forward and
177    * reverse strands are present in the features of interest
178    */
179   @Test(groups = "Functional")
180   public void testGetGenomicRangesFromFeatures_mixedStrand()
181   {
182     EnsemblCdna testee = new EnsemblCdna();
183     SequenceI genomic = new SequenceDummy("chr7");
184     genomic.setStart(10000);
185     genomic.setEnd(50000);
186     String transcriptId = "ABC123";
187
188     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
189             null);
190     sf.setValue("Parent", "transcript:" + transcriptId);
191     sf.setStrand("-");
192     genomic.addSequenceFeature(sf);
193
194     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
195     sf.setValue("Parent", "transcript:" + transcriptId);
196     sf.setStrand("+");
197     genomic.addSequenceFeature(sf);
198
199     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
200             transcriptId, 23);
201     assertNull(ranges);
202   }
203
204   /**
205    * Test the method that retains features except for 'transcript' (or
206    * subtypes), or features with parent other than the given id
207    */
208   @Test(groups = "Functional")
209   public void testRetainFeature()
210   {
211     String accId = "ABC123";
212     EnsemblCdna testee = new EnsemblCdna();
213
214     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
215             20500, 0f, null);
216     assertFalse(testee.retainFeature(sf, accId));
217
218     sf = new SequenceFeature("aberrant_processed_transcript", "", 20000,
219             20500, 0f, null);
220     assertFalse(testee.retainFeature(sf, accId));
221
222     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
223             0f, null);
224     assertFalse(testee.retainFeature(sf, accId));
225
226     // other feature with no parent is retained
227     sf = new SequenceFeature("sequence_variant", "", 20000, 20500, 0f, null);
228     assertTrue(testee.retainFeature(sf, accId));
229
230     // other feature with desired parent is retained
231     sf.setValue("Parent", accId);
232     assertTrue(testee.retainFeature(sf, accId));
233
234     // test is not case-sensitive
235     assertTrue(testee.retainFeature(sf, accId.toLowerCase(Locale.ROOT)));
236
237     // feature with wrong parent is not retained
238     sf.setValue("Parent", "XYZ");
239     assertFalse(testee.retainFeature(sf, accId));
240   }
241
242   /**
243    * Test the method that picks out 'exon' (or subtype) features with the
244    * accession id as parent
245    */
246   @Test(groups = "Functional")
247   public void testGetIdentifyingFeatures()
248   {
249     String accId = "ABC123";
250     SequenceI seq = new Sequence(accId, "MKLNFRQIE");
251
252     // exon with no parent: not valid
253     SequenceFeature sf1 = new SequenceFeature("exon", "", 1, 2, 0f, null);
254     seq.addSequenceFeature(sf1);
255
256     // exon with wrong parent: not valid
257     SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
258     sf2.setValue("Parent", "XYZ");
259     seq.addSequenceFeature(sf2);
260
261     // exon with right parent is valid
262     SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
263     sf3.setValue("Parent", accId);
264     seq.addSequenceFeature(sf3);
265
266     // exon sub-type with right parent is valid
267     SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
268             null);
269     sf4.setValue("Parent", accId);
270     seq.addSequenceFeature(sf4);
271
272     // transcript not valid:
273     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
274             null);
275     sf5.setValue("Parent", accId);
276     seq.addSequenceFeature(sf5);
277
278     // CDS not valid:
279     SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
280             null);
281     sf6.setValue("Parent", accId);
282     seq.addSequenceFeature(sf6);
283
284     List<SequenceFeature> sfs = new EnsemblCdna()
285             .getIdentifyingFeatures(seq, accId);
286     assertFalse(sfs.contains(sf1));
287     assertFalse(sfs.contains(sf2));
288     assertTrue(sfs.contains(sf3));
289     assertTrue(sfs.contains(sf4));
290     assertFalse(sfs.contains(sf5));
291     assertFalse(sfs.contains(sf6));
292   }
293
294   @Test(groups = "Functional")
295   public void testIsValidReference() throws Exception
296   {
297     EnsemblSequenceFetcher esq = new EnsemblCdna();
298     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
299     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
300     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
301     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
302     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
303     // non-human species having a 3 character identifier included:
304     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
305   }
306 }