JAL-2189 format tests
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdnaTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertNull;
6 import static org.testng.AssertJUnit.assertTrue;
7
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
14
15 import java.util.List;
16
17 import org.testng.Assert;
18 import org.testng.annotations.AfterClass;
19 import org.testng.annotations.BeforeClass;
20 import org.testng.annotations.Test;
21
22 public class EnsemblCdnaTest
23 {
24   @BeforeClass(alwaysRun = true)
25   public void setUp()
26   {
27     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
28   }
29
30   @AfterClass(alwaysRun = true)
31   public void tearDown()
32   {
33     SequenceOntologyFactory.setInstance(null);
34   }
35
36   /**
37    * Test that the cdna part of genomic sequence is correctly identified by
38    * 'exon' features (or subtypes) - reverse strand case.
39    */
40   @Test(groups = "Functional")
41   public void testGetGenomicRangesFromFeatures_reverseStrand()
42   {
43     EnsemblCdna testee = new EnsemblCdna();
44     SequenceI genomic = new SequenceDummy("chr7");
45     genomic.setStart(10000);
46     genomic.setEnd(50000);
47     String transcriptId = "ABC123";
48
49     // exon at (start+10000) length 501
50     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
51             null);
52     sf.setValue("Parent", "transcript:" + transcriptId);
53     sf.setStrand("-");
54     genomic.addSequenceFeature(sf);
55
56     // exon (sub-type) at (start + exon_variant) length 101
57     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
58     sf.setValue("Parent", "transcript:" + transcriptId);
59     sf.setStrand("-");
60     genomic.addSequenceFeature(sf);
61
62     // exon belonging to a different transcript doesn't count
63     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
64     sf.setValue("Parent", "transcript:anotherOne");
65     genomic.addSequenceFeature(sf);
66
67     // transcript feature doesn't count
68     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
69     genomic.addSequenceFeature(sf);
70
71     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
72             transcriptId, 23);
73     List<int[]> fromRanges = ranges.getFromRanges();
74     assertEquals(2, fromRanges.size());
75
76     /*
77      * from ranges should be sorted by start order descending
78      * and hold mappings from reverse strand sense
79      */
80     assertEquals(20500, fromRanges.get(0)[0]);
81     assertEquals(20000, fromRanges.get(0)[1]);
82     assertEquals(10600, fromRanges.get(1)[0]);
83     assertEquals(10500, fromRanges.get(1)[1]);
84     // to range should start from given start numbering
85     List<int[]> toRanges = ranges.getToRanges();
86     assertEquals(1, toRanges.size());
87     assertEquals(23, toRanges.get(0)[0]);
88     assertEquals(624, toRanges.get(0)[1]);
89   }
90
91   /**
92    * Test that the cdna part of genomic sequence is correctly identified by
93    * 'exon' features (or subtypes) with the desired transcript as parent
94    */
95   @Test(groups = "Functional")
96   public void testGetGenomicRangesFromFeatures()
97   {
98     EnsemblCdna testee = new EnsemblCdna();
99     SequenceI genomic = new SequenceDummy("chr7");
100     genomic.setStart(10000);
101     genomic.setEnd(50000);
102     String transcriptId = "ABC123";
103
104     // exon at (start+10000) length 501
105     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
106             null);
107     sf.setValue("Parent", "transcript:" + transcriptId);
108     sf.setStrand("+");
109     genomic.addSequenceFeature(sf);
110
111     // exon (sub-type) at (start + exon_variant) length 101
112     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
113     sf.setValue("Parent", "transcript:" + transcriptId);
114     sf.setStrand("+");
115     genomic.addSequenceFeature(sf);
116
117     // exon belonging to a different transcript doesn't count
118     sf = new SequenceFeature("exon", "", 11500, 12600, 0f, null);
119     sf.setValue("Parent", "transcript:anotherOne");
120     genomic.addSequenceFeature(sf);
121
122     // transcript feature doesn't count
123     sf = new SequenceFeature("transcript", "", 10000, 50000, 0f, null);
124     sf.setStrand("-"); // weird but ignored
125     genomic.addSequenceFeature(sf);
126
127     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
128             transcriptId, 23);
129     List<int[]> fromRanges = ranges.getFromRanges();
130     assertEquals(2, fromRanges.size());
131     // from ranges should be sorted by start order
132     assertEquals(10500, fromRanges.get(0)[0]);
133     assertEquals(10600, fromRanges.get(0)[1]);
134     assertEquals(20000, fromRanges.get(1)[0]);
135     assertEquals(20500, fromRanges.get(1)[1]);
136     // to range should start from given start numbering
137     List<int[]> toRanges = ranges.getToRanges();
138     assertEquals(1, toRanges.size());
139     assertEquals(23, toRanges.get(0)[0]);
140     assertEquals(624, toRanges.get(0)[1]);
141   }
142
143   /**
144    * The method under test should give up and return null if both forward and
145    * reverse strands are present in the features of interest
146    */
147   @Test(groups = "Functional")
148   public void testGetGenomicRangesFromFeatures_mixedStrand()
149   {
150     EnsemblCdna testee = new EnsemblCdna();
151     SequenceI genomic = new SequenceDummy("chr7");
152     genomic.setStart(10000);
153     genomic.setEnd(50000);
154     String transcriptId = "ABC123";
155
156     SequenceFeature sf = new SequenceFeature("exon", "", 20000, 20500, 0f,
157             null);
158     sf.setValue("Parent", "transcript:" + transcriptId);
159     sf.setStrand("-");
160     genomic.addSequenceFeature(sf);
161
162     sf = new SequenceFeature("coding_exon", "", 10500, 10600, 0f, null);
163     sf.setValue("Parent", "transcript:" + transcriptId);
164     sf.setStrand("+");
165     genomic.addSequenceFeature(sf);
166
167     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
168             transcriptId, 23);
169     assertNull(ranges);
170   }
171
172   /**
173    * Test the method that retains features except for 'transcript' (or
174    * subtypes), or features with parent other than the given id
175    */
176   @Test(groups = "Functional")
177   public void testRetainFeature()
178   {
179     String accId = "ABC123";
180     EnsemblCdna testee = new EnsemblCdna();
181
182     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
183             20500, 0f, null);
184     assertFalse(testee.retainFeature(sf, accId));
185
186     sf.setType("aberrant_processed_transcript");
187     assertFalse(testee.retainFeature(sf, accId));
188
189     sf.setType("NMD_transcript_variant");
190     assertFalse(testee.retainFeature(sf, accId));
191
192     // other feature with no parent is retained
193     sf.setType("sequence_variant");
194     assertTrue(testee.retainFeature(sf, accId));
195
196     // other feature with desired parent is retained
197     sf.setValue("Parent", "transcript:" + accId);
198     assertTrue(testee.retainFeature(sf, accId));
199
200     // feature with wrong parent is not retained
201     sf.setValue("Parent", "transcript:XYZ");
202     assertFalse(testee.retainFeature(sf, accId));
203   }
204
205   /**
206    * Test the method that picks out 'exon' (or subtype) features with the
207    * accession id as parent
208    */
209   @Test(groups = "Functional")
210   public void testIdentifiesSequence()
211   {
212     String accId = "ABC123";
213     EnsemblCdna testee = new EnsemblCdna();
214
215     // exon with no parent not valid
216     SequenceFeature sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
217     assertFalse(testee.identifiesSequence(sf, accId));
218
219     // exon with wrong parent not valid
220     sf.setValue("Parent", "transcript:XYZ");
221     assertFalse(testee.identifiesSequence(sf, accId));
222
223     // exon with right parent is valid
224     sf.setValue("Parent", "transcript:" + accId);
225     assertTrue(testee.identifiesSequence(sf, accId));
226
227     // exon sub-type with right parent is valid
228     sf.setType("coding_exon");
229     assertTrue(testee.identifiesSequence(sf, accId));
230
231     // transcript not valid:
232     sf.setType("transcript");
233     assertFalse(testee.identifiesSequence(sf, accId));
234
235     // CDS not valid:
236     sf.setType("CDS");
237     assertFalse(testee.identifiesSequence(sf, accId));
238   }
239
240   @Test(groups = "Functional")
241   public void testIsValidReference() throws Exception
242   {
243     EnsemblSequenceFetcher esq = new EnsemblCdna();
244     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
245     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
246     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
247     Assert.assertFalse(esq.isValidReference("ENSP00000288602"));
248     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
249     // non-human species having a 3 character identifier included:
250     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
251   }
252 }