JAL-3438 spotless for 2.11.2.0
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import java.util.Locale;
24
25 import static org.testng.AssertJUnit.assertEquals;
26 import static org.testng.AssertJUnit.assertFalse;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertTrue;
29
30 import jalview.datamodel.Sequence;
31 import jalview.datamodel.SequenceDummy;
32 import jalview.datamodel.SequenceFeature;
33 import jalview.datamodel.SequenceI;
34 import jalview.gui.JvOptionPane;
35 import jalview.io.gff.SequenceOntologyFactory;
36 import jalview.io.gff.SequenceOntologyLite;
37 import jalview.util.MapList;
38
39 import java.util.List;
40
41 import org.testng.Assert;
42 import org.testng.annotations.AfterClass;
43 import org.testng.annotations.BeforeClass;
44 import org.testng.annotations.Test;
45
46 public class EnsemblCdnaTest
47 {
48
49   @BeforeClass(alwaysRun = true)
50   public void setUpJvOptionPane()
51   {
52     JvOptionPane.setInteractiveMode(false);
53     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
54   }
55
56   @BeforeClass(alwaysRun = true)
57   public void setUp()
58   {
59     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
60   }
61
62   @AfterClass(alwaysRun = true)
63   public void tearDown()
64   {
65     SequenceOntologyFactory.setInstance(null);
66   }
67
68   /**
69    * Test that the cdna part of genomic sequence is correctly identified by
70    * 'exon' features (or subtypes) - reverse strand case.
71    */
72   @Test(groups = "Functional")
73   public void testGetGenomicRangesFromFeatures_reverseStrand()
74   {
75     EnsemblCdna testee = new EnsemblCdna();
76     SequenceI genomic = new SequenceDummy("chr7");
77     genomic.setStart(10000);
78     genomic.setEnd(50000);
79     String transcriptId = "ABC123";
80
81     // exon at (start+10000) length 501
82     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
83             null);
84     sf.setValue("Parent", transcriptId);
85     sf.setStrand("-");
86     genomic.addSequenceFeature(sf);
87
88     // exon (sub-type) at (start + exon_variant) length 101
89     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
90     sf.setValue("Parent", transcriptId);
91     sf.setStrand("-");
92     genomic.addSequenceFeature(sf);
93
94     // exon belonging to a different transcript doesn't count
95     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
96     sf.setValue("Parent", "anotherOne");
97     genomic.addSequenceFeature(sf);
98
99     // transcript feature doesn't count
100     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
101     genomic.addSequenceFeature(sf);
102
103     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
104             transcriptId, 23);
105     List<int[]> fromRanges = ranges.getFromRanges();
106     assertEquals(2, fromRanges.size());
107
108     /*
109      * from ranges should be sorted by start order descending
110      * and hold mappings from reverse strand sense
111      */
112     assertEquals(20500, fromRanges.get(0)[0]);
113     assertEquals(20000, fromRanges.get(0)[1]);
114     assertEquals(10600, fromRanges.get(1)[0]);
115     assertEquals(10500, fromRanges.get(1)[1]);
116     // to range should start from given start numbering
117     List<int[]> toRanges = ranges.getToRanges();
118     assertEquals(1, toRanges.size());
119     assertEquals(23, toRanges.get(0)[0]);
120     assertEquals(624, toRanges.get(0)[1]);
121   }
122
123   /**
124    * Test that the cdna part of genomic sequence is correctly identified by
125    * 'exon' features (or subtypes) with the desired transcript as parent
126    */
127   @Test(groups = "Functional")
128   public void testGetGenomicRangesFromFeatures()
129   {
130     EnsemblCdna testee = new EnsemblCdna();
131     SequenceI genomic = new SequenceDummy("chr7");
132     genomic.setStart(10000);
133     genomic.setEnd(50000);
134     String transcriptId = "ABC123";
135
136     // exon at (start+10000) length 501
137     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
138             null);
139     sf.setValue("Parent", transcriptId);
140     sf.setStrand("+");
141     genomic.addSequenceFeature(sf);
142
143     // exon (sub-type) at (start + exon_variant) length 101
144     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
145     sf.setValue("Parent", transcriptId);
146     sf.setStrand("+");
147     genomic.addSequenceFeature(sf);
148
149     // exon belonging to a different transcript doesn't count
150     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
151     sf.setValue("Parent", "anotherOne");
152     genomic.addSequenceFeature(sf);
153
154     // transcript feature doesn't count
155     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
156     sf.setStrand("-"); // weird but ignored
157     genomic.addSequenceFeature(sf);
158
159     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
160             transcriptId, 23);
161     List<int[]> fromRanges = ranges.getFromRanges();
162     assertEquals(2, fromRanges.size());
163     // from ranges should be sorted by start order
164     assertEquals(10500, fromRanges.get(0)[0]);
165     assertEquals(10600, fromRanges.get(0)[1]);
166     assertEquals(20000, fromRanges.get(1)[0]);
167     assertEquals(20500, fromRanges.get(1)[1]);
168     // to range should start from given start numbering
169     List<int[]> toRanges = ranges.getToRanges();
170     assertEquals(1, toRanges.size());
171     assertEquals(23, toRanges.get(0)[0]);
172     assertEquals(624, toRanges.get(0)[1]);
173   }
174
175   /**
176    * The method under test should give up and return null if both forward and
177    * reverse strands are present in the features of interest
178    */
179   @Test(groups = "Functional")
180   public void testGetGenomicRangesFromFeatures_mixedStrand()
181   {
182     EnsemblCdna testee = new EnsemblCdna();
183     SequenceI genomic = new SequenceDummy("chr7");
184     genomic.setStart(10000);
185     genomic.setEnd(50000);
186     String transcriptId = "ABC123";
187
188     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
189             null);
190     sf.setValue("Parent", "transcript:" + transcriptId);
191     sf.setStrand("-");
192     genomic.addSequenceFeature(sf);
193
194     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
195     sf.setValue("Parent", "transcript:" + transcriptId);
196     sf.setStrand("+");
197     genomic.addSequenceFeature(sf);
198
199     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
200             transcriptId, 23);
201     assertNull(ranges);
202   }
203
204   /**
205    * Test the method that retains features except for 'transcript' (or
206    * subtypes), or features with parent other than the given id
207    */
208   @Test(groups = "Functional")
209   public void testRetainFeature()
210   {
211     String accId = "ABC123";
212     EnsemblCdna testee = new EnsemblCdna();
213
214     SequenceFeature sf = new SequenceFeature("transcript", "", 20000, 20500,
215             0f, null);
216     assertFalse(testee.retainFeature(sf, accId));
217
218     sf = new SequenceFeature("aberrant_processed_transcript", "", 20000,
219             20500, 0f, null);
220     assertFalse(testee.retainFeature(sf, accId));
221
222     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500, 0f,
223             null);
224     assertFalse(testee.retainFeature(sf, accId));
225
226     // other feature with no parent is retained
227     sf = new SequenceFeature("sequence_variant", "", 20000, 20500, 0f,
228             null);
229     assertTrue(testee.retainFeature(sf, accId));
230
231     // other feature with desired parent is retained
232     sf.setValue("Parent", accId);
233     assertTrue(testee.retainFeature(sf, accId));
234
235     // test is not case-sensitive
236     assertTrue(testee.retainFeature(sf, accId.toLowerCase(Locale.ROOT)));
237
238     // feature with wrong parent is not retained
239     sf.setValue("Parent", "XYZ");
240     assertFalse(testee.retainFeature(sf, accId));
241   }
242
243   /**
244    * Test the method that picks out 'exon' (or subtype) features with the
245    * accession id as parent
246    */
247   @Test(groups = "Functional")
248   public void testGetIdentifyingFeatures()
249   {
250     String accId = "ABC123";
251     SequenceI seq = new Sequence(accId, "MKLNFRQIE");
252
253     // exon with no parent: not valid
254     SequenceFeature sf1 = new SequenceFeature("exon", "", 1, 2, 0f, null);
255     seq.addSequenceFeature(sf1);
256
257     // exon with wrong parent: not valid
258     SequenceFeature sf2 = new SequenceFeature("exon", "", 1, 2, 0f, null);
259     sf2.setValue("Parent", "XYZ");
260     seq.addSequenceFeature(sf2);
261
262     // exon with right parent is valid
263     SequenceFeature sf3 = new SequenceFeature("exon", "", 1, 2, 0f, null);
264     sf3.setValue("Parent", accId);
265     seq.addSequenceFeature(sf3);
266
267     // exon sub-type with right parent is valid
268     SequenceFeature sf4 = new SequenceFeature("coding_exon", "", 1, 2, 0f,
269             null);
270     sf4.setValue("Parent", accId);
271     seq.addSequenceFeature(sf4);
272
273     // transcript not valid:
274     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
275             null);
276     sf5.setValue("Parent", accId);
277     seq.addSequenceFeature(sf5);
278
279     // CDS not valid:
280     SequenceFeature sf6 = new SequenceFeature("transcript", "", 1, 2, 0f,
281             null);
282     sf6.setValue("Parent", accId);
283     seq.addSequenceFeature(sf6);
284
285     List<SequenceFeature> sfs = new EnsemblCdna()
286             .getIdentifyingFeatures(seq, accId);
287     assertFalse(sfs.contains(sf1));
288     assertFalse(sfs.contains(sf2));
289     assertTrue(sfs.contains(sf3));
290     assertTrue(sfs.contains(sf4));
291     assertFalse(sfs.contains(sf5));
292     assertFalse(sfs.contains(sf6));
293   }
294
295   @Test(groups = "Functional")
296   public void testIsValidReference() throws Exception
297   {
298     EnsemblSequenceFetcher esq = new EnsemblCdna();
299     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
300     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
301     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
302     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
303     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
304     // non-human species having a 3 character identifier included:
305     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
306   }
307 }