JAL-2189 apply license
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertTrue;
27
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.io.gff.SequenceOntologyFactory;
32 import jalview.io.gff.SequenceOntologyLite;
33 import jalview.util.MapList;
34
35 import java.util.List;
36
37 import org.testng.Assert;
38 import org.testng.annotations.AfterClass;
39 import org.testng.annotations.BeforeClass;
40 import org.testng.annotations.Test;
41
42 public class EnsemblCdnaTest
43 {
44   @BeforeClass(alwaysRun = true)
45   public void setUp()
46   {
47     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
48   }
49
50   @AfterClass(alwaysRun = true)
51   public void tearDown()
52   {
53     SequenceOntologyFactory.setInstance(null);
54   }
55
56   /**
57    * Test that the cdna part of genomic sequence is correctly identified by
58    * 'exon' features (or subtypes) - reverse strand case.
59    */
60   @Test(groups = "Functional")
61   public void testGetGenomicRangesFromFeatures_reverseStrand()
62   {
63     EnsemblCdna testee = new EnsemblCdna();
64     SequenceI genomic = new SequenceDummy("chr7");
65     genomic.setStart(10000);
66     genomic.setEnd(50000);
67     String transcriptId = "ABC123";
68
69     // exon at (start+10000) length 501
70     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
71             null);
72     sf.setValue("Parent", "transcript:" + transcriptId);
73     sf.setStrand("-");
74     genomic.addSequenceFeature(sf);
75
76     // exon (sub-type) at (start + exon_variant) length 101
77     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
78     sf.setValue("Parent", "transcript:" + transcriptId);
79     sf.setStrand("-");
80     genomic.addSequenceFeature(sf);
81
82     // exon belonging to a different transcript doesn't count
83     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
84     sf.setValue("Parent", "transcript:anotherOne");
85     genomic.addSequenceFeature(sf);
86
87     // transcript feature doesn't count
88     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
89     genomic.addSequenceFeature(sf);
90
91     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
92             transcriptId, 23);
93     List<int[]> fromRanges = ranges.getFromRanges();
94     assertEquals(2, fromRanges.size());
95
96     /*
97      * from ranges should be sorted by start order descending
98      * and hold mappings from reverse strand sense
99      */
100     assertEquals(20500, fromRanges.get(0)[0]);
101     assertEquals(20000, fromRanges.get(0)[1]);
102     assertEquals(10600, fromRanges.get(1)[0]);
103     assertEquals(10500, fromRanges.get(1)[1]);
104     // to range should start from given start numbering
105     List<int[]> toRanges = ranges.getToRanges();
106     assertEquals(1, toRanges.size());
107     assertEquals(23, toRanges.get(0)[0]);
108     assertEquals(624, toRanges.get(0)[1]);
109   }
110
111   /**
112    * Test that the cdna part of genomic sequence is correctly identified by
113    * 'exon' features (or subtypes) with the desired transcript as parent
114    */
115   @Test(groups = "Functional")
116   public void testGetGenomicRangesFromFeatures()
117   {
118     EnsemblCdna testee = new EnsemblCdna();
119     SequenceI genomic = new SequenceDummy("chr7");
120     genomic.setStart(10000);
121     genomic.setEnd(50000);
122     String transcriptId = "ABC123";
123
124     // exon at (start+10000) length 501
125     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
126             null);
127     sf.setValue("Parent", "transcript:" + transcriptId);
128     sf.setStrand("+");
129     genomic.addSequenceFeature(sf);
130
131     // exon (sub-type) at (start + exon_variant) length 101
132     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
133     sf.setValue("Parent", "transcript:" + transcriptId);
134     sf.setStrand("+");
135     genomic.addSequenceFeature(sf);
136
137     // exon belonging to a different transcript doesn't count
138     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
139     sf.setValue("Parent", "transcript:anotherOne");
140     genomic.addSequenceFeature(sf);
141
142     // transcript feature doesn't count
143     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
144     sf.setStrand("-"); // weird but ignored
145     genomic.addSequenceFeature(sf);
146
147     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
148             transcriptId, 23);
149     List<int[]> fromRanges = ranges.getFromRanges();
150     assertEquals(2, fromRanges.size());
151     // from ranges should be sorted by start order
152     assertEquals(10500, fromRanges.get(0)[0]);
153     assertEquals(10600, fromRanges.get(0)[1]);
154     assertEquals(20000, fromRanges.get(1)[0]);
155     assertEquals(20500, fromRanges.get(1)[1]);
156     // to range should start from given start numbering
157     List<int[]> toRanges = ranges.getToRanges();
158     assertEquals(1, toRanges.size());
159     assertEquals(23, toRanges.get(0)[0]);
160     assertEquals(624, toRanges.get(0)[1]);
161   }
162
163   /**
164    * The method under test should give up and return null if both forward and
165    * reverse strands are present in the features of interest
166    */
167   @Test(groups = "Functional")
168   public void testGetGenomicRangesFromFeatures_mixedStrand()
169   {
170     EnsemblCdna testee = new EnsemblCdna();
171     SequenceI genomic = new SequenceDummy("chr7");
172     genomic.setStart(10000);
173     genomic.setEnd(50000);
174     String transcriptId = "ABC123";
175
176     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
177             null);
178     sf.setValue("Parent", "transcript:" + transcriptId);
179     sf.setStrand("-");
180     genomic.addSequenceFeature(sf);
181
182     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
183     sf.setValue("Parent", "transcript:" + transcriptId);
184     sf.setStrand("+");
185     genomic.addSequenceFeature(sf);
186
187     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
188             transcriptId, 23);
189     assertNull(ranges);
190   }
191
192   /**
193    * Test the method that retains features except for 'transcript' (or
194    * subtypes), or features with parent other than the given id
195    */
196   @Test(groups = "Functional")
197   public void testRetainFeature()
198   {
199     String accId = "ABC123";
200     EnsemblCdna testee = new EnsemblCdna();
201
202     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
203             20500, 0f, null);
204     assertFalse(testee.retainFeature(sf, accId));
205
206     sf.setType("aberrant_processed_transcript");
207     assertFalse(testee.retainFeature(sf, accId));
208
209     sf.setType("NMD_transcript_variant");
210     assertFalse(testee.retainFeature(sf, accId));
211
212     // other feature with no parent is retained
213     sf.setType("sequence_variant");
214     assertTrue(testee.retainFeature(sf, accId));
215
216     // other feature with desired parent is retained
217     sf.setValue("Parent", "transcript:" + accId);
218     assertTrue(testee.retainFeature(sf, accId));
219
220     // feature with wrong parent is not retained
221     sf.setValue("Parent", "transcript:XYZ");
222     assertFalse(testee.retainFeature(sf, accId));
223   }
224
225   /**
226    * Test the method that picks out 'exon' (or subtype) features with the
227    * accession id as parent
228    */
229   @Test(groups = "Functional")
230   public void testIdentifiesSequence()
231   {
232     String accId = "ABC123";
233     EnsemblCdna testee = new EnsemblCdna();
234
235     // exon with no parent not valid
236     SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
237     assertFalse(testee.identifiesSequence(sf, accId));
238
239     // exon with wrong parent not valid
240     sf.setValue("Parent", "transcript:XYZ");
241     assertFalse(testee.identifiesSequence(sf, accId));
242
243     // exon with right parent is valid
244     sf.setValue("Parent", "transcript:" + accId);
245     assertTrue(testee.identifiesSequence(sf, accId));
246
247     // exon sub-type with right parent is valid
248     sf.setType("coding_exon");
249     assertTrue(testee.identifiesSequence(sf, accId));
250
251     // transcript not valid:
252     sf.setType("transcript");
253     assertFalse(testee.identifiesSequence(sf, accId));
254
255     // CDS not valid:
256     sf.setType("CDS");
257     assertFalse(testee.identifiesSequence(sf, accId));
258   }
259
260   @Test(groups = "Functional")
261   public void testIsValidReference() throws Exception
262   {
263     EnsemblSequenceFetcher esq = new EnsemblCdna();
264     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
265     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
266     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
267     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
268     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
269     // non-human species having a 3 character identifier included:
270     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
271   }
272 }