779962c1d187c342fd343cd7fec74a491f3fdf3e
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertTrue;
27
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.gui.JvOptionPane;
32 import jalview.io.gff.SequenceOntologyFactory;
33 import jalview.io.gff.SequenceOntologyLite;
34 import jalview.util.MapList;
35
36 import java.util.List;
37
38 import org.testng.Assert;
39 import org.testng.annotations.AfterClass;
40 import org.testng.annotations.BeforeClass;
41 import org.testng.annotations.Test;
42
43 public class EnsemblCdnaTest
44 {
45
46   @BeforeClass(alwaysRun = true)
47   public void setUpJvOptionPane()
48   {
49     JvOptionPane.setInteractiveMode(false);
50     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51   }
52
53   @BeforeClass(alwaysRun = true)
54   public void setUp()
55   {
56     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
57   }
58
59   @AfterClass(alwaysRun = true)
60   public void tearDown()
61   {
62     SequenceOntologyFactory.setInstance(null);
63   }
64
65   /**
66    * Test that the cdna part of genomic sequence is correctly identified by
67    * 'exon' features (or subtypes) - reverse strand case.
68    */
69   @Test(groups = "Functional")
70   public void testGetGenomicRangesFromFeatures_reverseStrand()
71   {
72     EnsemblCdna testee = new EnsemblCdna();
73     SequenceI genomic = new SequenceDummy("chr7");
74     genomic.setStart(10000);
75     genomic.setEnd(50000);
76     String transcriptId = "ABC123";
77
78     // exon at (start+10000) length 501
79     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
80             null);
81     sf.setValue("Parent", "transcript:" + transcriptId);
82     sf.setStrand("-");
83     genomic.addSequenceFeature(sf);
84
85     // exon (sub-type) at (start + exon_variant) length 101
86     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
87     sf.setValue("Parent", "transcript:" + transcriptId);
88     sf.setStrand("-");
89     genomic.addSequenceFeature(sf);
90
91     // exon belonging to a different transcript doesn't count
92     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
93     sf.setValue("Parent", "transcript:anotherOne");
94     genomic.addSequenceFeature(sf);
95
96     // transcript feature doesn't count
97     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
98     genomic.addSequenceFeature(sf);
99
100     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
101             transcriptId, 23);
102     List<int[]> fromRanges = ranges.getFromRanges();
103     assertEquals(2, fromRanges.size());
104
105     /*
106      * from ranges should be sorted by start order descending
107      * and hold mappings from reverse strand sense
108      */
109     assertEquals(20500, fromRanges.get(0)[0]);
110     assertEquals(20000, fromRanges.get(0)[1]);
111     assertEquals(10600, fromRanges.get(1)[0]);
112     assertEquals(10500, fromRanges.get(1)[1]);
113     // to range should start from given start numbering
114     List<int[]> toRanges = ranges.getToRanges();
115     assertEquals(1, toRanges.size());
116     assertEquals(23, toRanges.get(0)[0]);
117     assertEquals(624, toRanges.get(0)[1]);
118   }
119
120   /**
121    * Test that the cdna part of genomic sequence is correctly identified by
122    * 'exon' features (or subtypes) with the desired transcript as parent
123    */
124   @Test(groups = "Functional")
125   public void testGetGenomicRangesFromFeatures()
126   {
127     EnsemblCdna testee = new EnsemblCdna();
128     SequenceI genomic = new SequenceDummy("chr7");
129     genomic.setStart(10000);
130     genomic.setEnd(50000);
131     String transcriptId = "ABC123";
132
133     // exon at (start+10000) length 501
134     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
135             null);
136     sf.setValue("Parent", "transcript:" + transcriptId);
137     sf.setStrand("+");
138     genomic.addSequenceFeature(sf);
139
140     // exon (sub-type) at (start + exon_variant) length 101
141     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
142     sf.setValue("Parent", "transcript:" + transcriptId);
143     sf.setStrand("+");
144     genomic.addSequenceFeature(sf);
145
146     // exon belonging to a different transcript doesn't count
147     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
148     sf.setValue("Parent", "transcript:anotherOne");
149     genomic.addSequenceFeature(sf);
150
151     // transcript feature doesn't count
152     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
153     sf.setStrand("-"); // weird but ignored
154     genomic.addSequenceFeature(sf);
155
156     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
157             transcriptId, 23);
158     List<int[]> fromRanges = ranges.getFromRanges();
159     assertEquals(2, fromRanges.size());
160     // from ranges should be sorted by start order
161     assertEquals(10500, fromRanges.get(0)[0]);
162     assertEquals(10600, fromRanges.get(0)[1]);
163     assertEquals(20000, fromRanges.get(1)[0]);
164     assertEquals(20500, fromRanges.get(1)[1]);
165     // to range should start from given start numbering
166     List<int[]> toRanges = ranges.getToRanges();
167     assertEquals(1, toRanges.size());
168     assertEquals(23, toRanges.get(0)[0]);
169     assertEquals(624, toRanges.get(0)[1]);
170   }
171
172   /**
173    * The method under test should give up and return null if both forward and
174    * reverse strands are present in the features of interest
175    */
176   @Test(groups = "Functional")
177   public void testGetGenomicRangesFromFeatures_mixedStrand()
178   {
179     EnsemblCdna testee = new EnsemblCdna();
180     SequenceI genomic = new SequenceDummy("chr7");
181     genomic.setStart(10000);
182     genomic.setEnd(50000);
183     String transcriptId = "ABC123";
184
185     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
186             null);
187     sf.setValue("Parent", "transcript:" + transcriptId);
188     sf.setStrand("-");
189     genomic.addSequenceFeature(sf);
190
191     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
192     sf.setValue("Parent", "transcript:" + transcriptId);
193     sf.setStrand("+");
194     genomic.addSequenceFeature(sf);
195
196     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
197             transcriptId, 23);
198     assertNull(ranges);
199   }
200
201   /**
202    * Test the method that retains features except for 'transcript' (or
203    * subtypes), or features with parent other than the given id
204    */
205   @Test(groups = "Functional")
206   public void testRetainFeature()
207   {
208     String accId = "ABC123";
209     EnsemblCdna testee = new EnsemblCdna();
210
211     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
212             20500, 0f, null);
213     assertFalse(testee.retainFeature(sf, accId));
214
215     sf = new SequenceFeature("aberrant_processed_transcript", "", 20000,
216             20500, 0f, null);
217     assertFalse(testee.retainFeature(sf, accId));
218
219     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
220             0f, null);
221     assertFalse(testee.retainFeature(sf, accId));
222
223     // other feature with no parent is retained
224     sf = new SequenceFeature("sequence_variant", "", 20000, 20500, 0f, null);
225     assertTrue(testee.retainFeature(sf, accId));
226
227     // other feature with desired parent is retained
228     sf.setValue("Parent", "transcript:" + accId);
229     assertTrue(testee.retainFeature(sf, accId));
230
231     // test is not case-sensitive
232     assertTrue(testee.retainFeature(sf, accId.toLowerCase()));
233
234     // feature with wrong parent is not retained
235     sf.setValue("Parent", "transcript:XYZ");
236     assertFalse(testee.retainFeature(sf, accId));
237   }
238
239   /**
240    * Test the method that picks out 'exon' (or subtype) features with the
241    * accession id as parent
242    */
243   @Test(groups = "Functional")
244   public void testIdentifiesSequence()
245   {
246     String accId = "ABC123";
247     EnsemblCdna testee = new EnsemblCdna();
248
249     // exon with no parent not valid
250     SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
251     assertFalse(testee.identifiesSequence(sf, accId));
252
253     // exon with wrong parent not valid
254     sf.setValue("Parent", "transcript:XYZ");
255     assertFalse(testee.identifiesSequence(sf, accId));
256
257     // exon with right parent is valid
258     sf.setValue("Parent", "transcript:" + accId);
259     assertTrue(testee.identifiesSequence(sf, accId));
260
261     // exon sub-type with right parent is valid
262     sf = new SequenceFeature("coding_exon", "", 1, 2, 0f, null);
263     sf.setValue("Parent", "transcript:" + accId);
264     assertTrue(testee.identifiesSequence(sf, accId));
265
266     // transcript not valid:
267     sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
268     sf.setValue("Parent", "transcript:" + accId);
269     assertFalse(testee.identifiesSequence(sf, accId));
270
271     // CDS not valid:
272     sf = new SequenceFeature("CDS", "", 1, 2, 0f, null);
273     sf.setValue("Parent", "transcript:" + accId);
274     assertFalse(testee.identifiesSequence(sf, accId));
275   }
276
277   @Test(groups = "Functional")
278   public void testIsValidReference() throws Exception
279   {
280     EnsemblSequenceFetcher esq = new EnsemblCdna();
281     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
282     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
283     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
284     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
285     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
286     // non-human species having a 3 character identifier included:
287     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
288   }
289 }