fb7e1438bd1b6c0c9c0ea531c563f52dc526c0ca
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
14
15 import java.util.List;
16
17 import org.testng.Assert;
18 import org.testng.annotations.AfterClass;
19 import org.testng.annotations.BeforeClass;
20 import org.testng.annotations.Test;
21
22 public class EnsemblCdnaTest
23 {
24   @BeforeClass(alwaysRun = true)
25   public void setUp()
26   {
27     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
28   }
29
30   @AfterClass(alwaysRun = true)
31   public void tearDown()
32   {
33     SequenceOntologyFactory.setInstance(null);
34   }
35   /**
36    * Test that the cdna part of genomic sequence is correctly identified by
37    * 'exon' features (or subtypes) - reverse strand case.
38    */
39   @Test(groups = "Functional")
40   public void testGetGenomicRangesFromFeatures_reverseStrand()
41   {
42     EnsemblCdna testee = new EnsemblCdna();
43     SequenceI genomic = new SequenceDummy("chr7");
44     genomic.setStart(10000);
45     genomic.setEnd(50000);
46     String transcriptId = "ABC123";
47
48     // exon at (start+10000) length 501
49     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
50             null);
51     sf.setValue("Parent", "transcript:" + transcriptId);
52     sf.setStrand("-");
53     genomic.addSequenceFeature(sf);
54
55     // exon (sub-type) at (start + exon_variant) length 101
56     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
57     sf.setValue("Parent", "transcript:" + transcriptId);
58     sf.setStrand("-");
59     genomic.addSequenceFeature(sf);
60
61     // exon belonging to a different transcript doesn't count
62     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
63     sf.setValue("Parent", "transcript:anotherOne");
64     genomic.addSequenceFeature(sf);
65
66     // transcript feature doesn't count
67     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
68     genomic.addSequenceFeature(sf);
69
70     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
71             transcriptId, 23);
72     List<int[]> fromRanges = ranges.getFromRanges();
73     assertEquals(2, fromRanges.size());
74
75     /*
76      * from ranges should be sorted by start order descending
77      * and hold mappings from reverse strand sense
78      */
79     assertEquals(20500, fromRanges.get(0)[0]);
80     assertEquals(20000, fromRanges.get(0)[1]);
81     assertEquals(10600, fromRanges.get(1)[0]);
82     assertEquals(10500, fromRanges.get(1)[1]);
83     // to range should start from given start numbering
84     List<int[]> toRanges = ranges.getToRanges();
85     assertEquals(1, toRanges.size());
86     assertEquals(23, toRanges.get(0)[0]);
87     assertEquals(624, toRanges.get(0)[1]);
88   }
89
90   /**
91    * Test that the cdna part of genomic sequence is correctly identified by
92    * 'exon' features (or subtypes) with the desired transcript as parent
93    */
94   @Test(groups = "Functional")
95   public void testGetGenomicRangesFromFeatures()
96   {
97     EnsemblCdna testee = new EnsemblCdna();
98     SequenceI genomic = new SequenceDummy("chr7");
99     genomic.setStart(10000);
100     genomic.setEnd(50000);
101     String transcriptId = "ABC123";
102   
103     // exon at (start+10000) length 501
104     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
105             null);
106     sf.setValue("Parent", "transcript:" + transcriptId);
107     sf.setStrand("+");
108     genomic.addSequenceFeature(sf);
109   
110     // exon (sub-type) at (start + exon_variant) length 101
111     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
112     sf.setValue("Parent", "transcript:" + transcriptId);
113     sf.setStrand("+");
114     genomic.addSequenceFeature(sf);
115   
116     // exon belonging to a different transcript doesn't count
117     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
118     sf.setValue("Parent", "transcript:anotherOne");
119     genomic.addSequenceFeature(sf);
120   
121     // transcript feature doesn't count
122     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
123     sf.setStrand("-"); // weird but ignored
124     genomic.addSequenceFeature(sf);
125   
126     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
127             transcriptId, 23);
128     List<int[]> fromRanges = ranges.getFromRanges();
129     assertEquals(2, fromRanges.size());
130     // from ranges should be sorted by start order
131     assertEquals(10500, fromRanges.get(0)[0]);
132     assertEquals(10600, fromRanges.get(0)[1]);
133     assertEquals(20000, fromRanges.get(1)[0]);
134     assertEquals(20500, fromRanges.get(1)[1]);
135     // to range should start from given start numbering
136     List<int[]> toRanges = ranges.getToRanges();
137     assertEquals(1, toRanges.size());
138     assertEquals(23, toRanges.get(0)[0]);
139     assertEquals(624, toRanges.get(0)[1]);
140   }
141
142   /**
143    * The method under test should give up and return null if both forward and
144    * reverse strands are present in the features of interest
145    */
146   @Test(groups = "Functional")
147   public void testGetGenomicRangesFromFeatures_mixedStrand()
148   {
149     EnsemblCdna testee = new EnsemblCdna();
150     SequenceI genomic = new SequenceDummy("chr7");
151     genomic.setStart(10000);
152     genomic.setEnd(50000);
153     String transcriptId = "ABC123";
154   
155     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
156             null);
157     sf.setValue("Parent", "transcript:" + transcriptId);
158     sf.setStrand("-");
159     genomic.addSequenceFeature(sf);
160   
161     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
162     sf.setValue("Parent", "transcript:" + transcriptId);
163     sf.setStrand("+");
164     genomic.addSequenceFeature(sf);
165   
166     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
167             transcriptId, 23);
168     assertNull(ranges);
169   }
170
171   /**
172    * Test the method that retains features except for 'transcript' (or
173    * subtypes), or features with parent other than the given id
174    */
175   @Test(groups = "Functional")
176   public void testRetainFeature()
177   {
178     String accId = "ABC123";
179     EnsemblCdna testee = new EnsemblCdna();
180
181     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
182             20500, 0f, null);
183     assertFalse(testee.retainFeature(sf, accId));
184
185     sf.setType("aberrant_processed_transcript");
186     assertFalse(testee.retainFeature(sf, accId));
187
188     sf.setType("NMD_transcript_variant");
189     assertFalse(testee.retainFeature(sf, accId));
190
191     // other feature with no parent is retained
192     sf.setType("sequence_variant");
193     assertTrue(testee.retainFeature(sf, accId));
194
195     // other feature with desired parent is retained
196     sf.setValue("Parent", "transcript:" + accId);
197     assertTrue(testee.retainFeature(sf, accId));
198
199     // feature with wrong parent is not retained
200     sf.setValue("Parent", "transcript:XYZ");
201     assertFalse(testee.retainFeature(sf, accId));
202   }
203
204   /**
205    * Test the method that picks out 'exon' (or subtype) features with the
206    * accession id as parent
207    */
208   @Test(groups = "Functional")
209   public void testIdentifiesSequence()
210   {
211     String accId = "ABC123";
212     EnsemblCdna testee = new EnsemblCdna();
213
214     // exon with no parent not valid
215     SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
216     assertFalse(testee.identifiesSequence(sf, accId));
217
218     // exon with wrong parent not valid
219     sf.setValue("Parent", "transcript:XYZ");
220     assertFalse(testee.identifiesSequence(sf, accId));
221
222     // exon with right parent is valid
223     sf.setValue("Parent", "transcript:" + accId);
224     assertTrue(testee.identifiesSequence(sf, accId));
225
226     // exon sub-type with right parent is valid
227     sf.setType("coding_exon");
228     assertTrue(testee.identifiesSequence(sf, accId));
229
230     // transcript not valid:
231     sf.setType("transcript");
232     assertFalse(testee.identifiesSequence(sf, accId));
233
234     // CDS not valid:
235     sf.setType("CDS");
236     assertFalse(testee.identifiesSequence(sf, accId));
237   }
238
239   @Test(groups = "Functional")
240   public void testIsValidReference() throws Exception
241   {
242     EnsemblSequenceFetcher esq = new EnsemblCdna();
243     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
244     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
245     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
246     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
247     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
248     // non-human species having a 3 character identifier included:
249     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
250   }
251 }