JAL-3076 refactor for more efficient scan of 'gene' features
[jalview.git] / test / jalview / ext / ensembl / EnsemblCdsTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.datamodel.Sequence;
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.gui.JvOptionPane;
32 import jalview.io.gff.SequenceOntologyFactory;
33 import jalview.io.gff.SequenceOntologyLite;
34 import jalview.util.MapList;
35
36 import java.util.List;
37
38 import org.testng.Assert;
39 import org.testng.annotations.AfterClass;
40 import org.testng.annotations.BeforeClass;
41 import org.testng.annotations.Test;
42
43 public class EnsemblCdsTest
44 {
45
46   @BeforeClass(alwaysRun = true)
47   public void setUpJvOptionPane()
48   {
49     JvOptionPane.setInteractiveMode(false);
50     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51   }
52
53   @BeforeClass(alwaysRun = true)
54   public void setUp()
55   {
56     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
57   }
58
59   @AfterClass(alwaysRun = true)
60   public void tearDown()
61   {
62     SequenceOntologyFactory.setInstance(null);
63   }
64
65   /**
66    * Test that the cdna part of genomic sequence is correctly identified by
67    * 'CDS' features (or subtypes) with the desired transcript as parent
68    */
69   @Test(groups = "Functional")
70   public void testGetGenomicRangesFromFeatures()
71   {
72     EnsemblCds testee = new EnsemblCds();
73     SequenceI genomic = new SequenceDummy("chr7");
74     genomic.setStart(10000);
75     genomic.setEnd(50000);
76     String transcriptId = "ABC123";
77
78     // CDS at (start+10000) length 501
79     SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
80             null);
81     sf.setValue("Parent", "transcript:" + transcriptId);
82     sf.setStrand("+");
83     genomic.addSequenceFeature(sf);
84
85     // CDS (sub-type) at (start + 10500) length 101
86     sf = new SequenceFeature("CDS_predicted", "", 10500, 10600, 0f, null);
87     sf.setValue("Parent", "transcript:" + transcriptId);
88     sf.setStrand("+");
89     genomic.addSequenceFeature(sf);
90
91     // CDS belonging to a different transcript doesn't count
92     sf = new SequenceFeature("CDS", "", 11500, 12600, 0f, null);
93     sf.setValue("Parent", "transcript:anotherOne");
94     genomic.addSequenceFeature(sf);
95
96     // exon feature doesn't count
97     sf = new SequenceFeature("exon", "", 10000, 50000, 0f, null);
98     genomic.addSequenceFeature(sf);
99
100     // mRNA_region feature doesn't count (parent of CDS)
101     sf = new SequenceFeature("mRNA_region", "", 10000, 50000, 0f, null);
102     genomic.addSequenceFeature(sf);
103
104     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
105             transcriptId, 23);
106     List<int[]> fromRanges = ranges.getFromRanges();
107     assertEquals(2, fromRanges.size());
108     // from ranges should be sorted by start order
109     assertEquals(10500, fromRanges.get(0)[0]);
110     assertEquals(10600, fromRanges.get(0)[1]);
111     assertEquals(20000, fromRanges.get(1)[0]);
112     assertEquals(20500, fromRanges.get(1)[1]);
113     // to range should start from given start numbering
114     List<int[]> toRanges = ranges.getToRanges();
115     assertEquals(1, toRanges.size());
116     assertEquals(23, toRanges.get(0)[0]);
117     assertEquals(624, toRanges.get(0)[1]);
118   }
119
120   /**
121    * Test the method that retains features except for 'CDS' (or subtypes), or
122    * features with parent other than the given id
123    */
124   @Test(groups = "Functional")
125   public void testRetainFeature()
126   {
127     String accId = "ABC123";
128     EnsemblCds testee = new EnsemblCds();
129
130     SequenceFeature sf = new SequenceFeature("CDS", "", 20000, 20500, 0f,
131             null);
132     assertFalse(testee.retainFeature(sf, accId));
133
134     sf = new SequenceFeature("CDS_predicted", "", 20000, 20500, 0f, null);
135     assertFalse(testee.retainFeature(sf, accId));
136
137     // other feature with no parent is retained
138     sf = new SequenceFeature("CDS_psequence_variantredicted", "", 20000,
139             20500, 0f, null);
140     assertTrue(testee.retainFeature(sf, accId));
141
142     // other feature with desired parent is retained
143     sf.setValue("Parent", "transcript:" + accId);
144     assertTrue(testee.retainFeature(sf, accId));
145
146     // feature with wrong parent is not retained
147     sf.setValue("Parent", "transcript:XYZ");
148     assertFalse(testee.retainFeature(sf, accId));
149   }
150
151   /**
152    * Test the method that picks out 'CDS' (or subtype) features with the
153    * accession id as parent
154    */
155   @Test(groups = "Functional")
156   public void testGetIdentifyingFeatures()
157   {
158     String accId = "ABC123";
159     SequenceI seq = new Sequence(accId, "MKDONS");
160
161     // cds with no parent not valid
162     SequenceFeature sf1 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
163     seq.addSequenceFeature(sf1);
164
165     // cds with wrong parent not valid
166     SequenceFeature sf2 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
167     sf2.setValue("Parent", "transcript:XYZ");
168     seq.addSequenceFeature(sf2);
169
170     // cds with right parent is valid
171     SequenceFeature sf3 = new SequenceFeature("CDS", "", 1, 2, 0f, null);
172     sf3.setValue("Parent", "transcript:" + accId);
173     seq.addSequenceFeature(sf3);
174
175     // cds sub-type with right parent is valid
176     SequenceFeature sf4 = new SequenceFeature("CDS_predicted", "", 1, 2, 0f,
177             null);
178     sf4.setValue("Parent", "transcript:" + accId);
179     seq.addSequenceFeature(sf4);
180
181     // transcript not valid:
182     SequenceFeature sf5 = new SequenceFeature("transcript", "", 1, 2, 0f,
183             null);
184     sf5.setValue("Parent", "transcript:" + accId);
185     seq.addSequenceFeature(sf5);
186
187     // exon not valid:
188     SequenceFeature sf6 = new SequenceFeature("exon", "", 1, 2, 0f, null);
189     sf6.setValue("Parent", "transcript:" + accId);
190     seq.addSequenceFeature(sf6);
191
192     List<SequenceFeature> sfs = new EnsemblCds().getIdentifyingFeatures(seq,
193             accId);
194     assertFalse(sfs.contains(sf1));
195     assertFalse(sfs.contains(sf2));
196     assertTrue(sfs.contains(sf3));
197     assertTrue(sfs.contains(sf4));
198     assertFalse(sfs.contains(sf5));
199     assertFalse(sfs.contains(sf6));
200   }
201
202   @Test(groups = "Functional")
203   public void testIsValidReference() throws Exception
204   {
205     EnsemblSequenceFetcher esq = new EnsemblCds();
206     Assert.assertTrue(esq.isValidReference("CCDS5863.1"));
207     Assert.assertTrue(esq.isValidReference("ENST00000288602"));
208     Assert.assertTrue(esq.isValidReference("ENSG00000288602"));
209     Assert.assertTrue(esq.isValidReference("ENSP00000288602"));
210     Assert.assertFalse(esq.isValidReference("ENST0000288602"));
211     // non-human species have a 3 character identifier included:
212     Assert.assertTrue(esq.isValidReference("ENSMUSG00000099398"));
213   }
214
215 }