217742da679926ac1283313c1b0836a51b0721a2
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.api.FeatureSettingsModelI;
28 import jalview.bin.Cache;
29 import jalview.datamodel.SequenceDummy;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceI;
32 import jalview.gui.JvOptionPane;
33 import jalview.io.gff.SequenceOntologyFactory;
34 import jalview.io.gff.SequenceOntologyLite;
35 import jalview.util.MapList;
36
37 import java.awt.Color;
38 import java.util.List;
39
40 import org.testng.annotations.AfterClass;
41 import org.testng.annotations.BeforeClass;
42 import org.testng.annotations.Test;
43
44 public class EnsemblGeneTest
45 {
46
47   @BeforeClass(alwaysRun = true)
48   public void setUpJvOptionPane()
49   {
50     JvOptionPane.setInteractiveMode(false);
51     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
52   }
53
54   @BeforeClass(alwaysRun = true)
55   public void setUp()
56   {
57     Cache.loadProperties("test/jalview/io/testProps.jvprops");
58     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
59   }
60
61   @AfterClass(alwaysRun = true)
62   public void tearDown()
63   {
64     SequenceOntologyFactory.setInstance(null);
65   }
66
67   /**
68    * Test that the gene part of genomic sequence is uniquely identified by a
69    * 'gene' features (or subtype) with the correct gene ID
70    */
71   @Test(groups = "Functional")
72   public void testGetGenomicRangesFromFeatures()
73   {
74     EnsemblGene testee = new EnsemblGene();
75     SequenceI genomic = new SequenceDummy("chr7");
76     genomic.setStart(10000);
77     genomic.setEnd(50000);
78     String geneId = "ABC123";
79
80     // gene at (start+20000) length 501
81     // should be ignored - the first 'gene' found defines the whole range
82     // (note features are found in position order, not addition order)
83     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
84             null);
85     sf.setValue("ID", "gene:" + geneId);
86     sf.setStrand("+");
87     genomic.addSequenceFeature(sf);
88
89     // gene at (start + 10500) length 101
90     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
91     sf.setValue("ID", "gene:" + geneId);
92     sf.setStrand("+");
93     genomic.addSequenceFeature(sf);
94
95     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
96             23);
97     List<int[]> fromRanges = ranges.getFromRanges();
98     assertEquals(1, fromRanges.size());
99     assertEquals(10500, fromRanges.get(0)[0]);
100     assertEquals(10600, fromRanges.get(0)[1]);
101     // to range should start from given start numbering
102     List<int[]> toRanges = ranges.getToRanges();
103     assertEquals(1, toRanges.size());
104     assertEquals(23, toRanges.get(0)[0]);
105     assertEquals(123, toRanges.get(0)[1]);
106   }
107
108   /**
109    * Test variant using a sub-type of gene from the Sequence Ontology
110    */
111   @Test(groups = "Functional")
112   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
113   {
114     EnsemblGene testee = new EnsemblGene();
115     SequenceI genomic = new SequenceDummy("chr7");
116     genomic.setStart(10000);
117     genomic.setEnd(50000);
118     String geneId = "ABC123";
119
120     // gene at (start+20000) length 501
121     // should be ignored - the first 'gene' found defines the whole range
122     // (real data would only have one such feature)
123     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
124             20500, 0f, null);
125     sf.setValue("ID", "gene:" + geneId);
126     sf.setStrand("-");
127     genomic.addSequenceFeature(sf);
128
129     // gene at (start + 10500) length 101
130     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
131     sf.setValue("ID", "gene:" + geneId);
132     sf.setStrand("+");
133     genomic.addSequenceFeature(sf);
134
135     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
136             23);
137     List<int[]> fromRanges = ranges.getFromRanges();
138     assertEquals(1, fromRanges.size());
139     // from range on reverse strand:
140     assertEquals(10500, fromRanges.get(0)[0]);
141     assertEquals(10600, fromRanges.get(0)[1]);
142     // to range should start from given start numbering
143     List<int[]> toRanges = ranges.getToRanges();
144     assertEquals(1, toRanges.size());
145     assertEquals(23, toRanges.get(0)[0]);
146     assertEquals(123, toRanges.get(0)[1]);
147   }
148
149   /**
150    * Test the method that extracts transcript (or subtype) features with a
151    * specified gene as parent
152    */
153   @Test(groups = "Functional")
154   public void testGetTranscriptFeatures()
155   {
156     SequenceI genomic = new SequenceDummy("chr7");
157     genomic.setStart(10000);
158     genomic.setEnd(50000);
159     String geneId = "ABC123";
160
161     // transcript feature
162     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
163             20500, 0f, null);
164     sf1.setValue("Parent", "gene:" + geneId);
165     sf1.setValue("transcript_id", "transcript1");
166     genomic.addSequenceFeature(sf1);
167
168     // transcript sub-type feature
169     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
170             0f, null);
171     sf2.setValue("Parent", "gene:" + geneId);
172     sf2.setValue("transcript_id", "transcript2");
173     genomic.addSequenceFeature(sf2);
174
175     // NMD_transcript_variant treated like transcript in Ensembl
176     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
177             22000, 22500, 0f, null);
178     // id matching should not be case-sensitive
179     sf3.setValue("Parent", "gene:" + geneId.toLowerCase());
180     sf3.setValue("transcript_id", "transcript3");
181     genomic.addSequenceFeature(sf3);
182
183     // transcript for a different gene - ignored
184     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
185             0f, null);
186     sf4.setValue("Parent", "gene:XYZ");
187     sf4.setValue("transcript_id", "transcript4");
188     genomic.addSequenceFeature(sf4);
189
190     EnsemblGene testee = new EnsemblGene();
191
192     /*
193      * with no filter
194      */
195     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
196             genomic);
197     assertEquals(3, features.size());
198     assertTrue(features.contains(sf1));
199     assertTrue(features.contains(sf2));
200     assertTrue(features.contains(sf3));
201   }
202
203   /**
204    * Test the method that retains features except for 'gene', or 'transcript'
205    * with parent other than the given id
206    */
207   @Test(groups = "Functional")
208   public void testRetainFeature()
209   {
210     String geneId = "ABC123";
211     EnsemblGene testee = new EnsemblGene();
212     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
213             null);
214     sf.setValue("ID", "gene:" + geneId);
215     assertFalse(testee.retainFeature(sf, geneId));
216
217     sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null);
218     sf.setValue("Parent", "gene:" + geneId);
219     assertTrue(testee.retainFeature(sf, geneId));
220
221     sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
222             null);
223     sf.setValue("Parent", "gene:" + geneId);
224     assertTrue(testee.retainFeature(sf, geneId));
225
226     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
227             0f, null);
228     sf.setValue("Parent", "gene:" + geneId);
229     assertTrue(testee.retainFeature(sf, geneId));
230
231     sf.setValue("Parent", "gene:XYZ");
232     assertFalse(testee.retainFeature(sf, geneId));
233
234     sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
235     assertTrue(testee.retainFeature(sf, geneId));
236   }
237
238   /**
239    * Test the method that picks out 'gene' (or subtype) features with the
240    * accession id as ID
241    */
242   @Test(groups = "Functional")
243   public void testIdentifiesSequence()
244   {
245     String accId = "ABC123";
246     EnsemblGene testee = new EnsemblGene();
247
248     // gene with no ID not valid
249     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
250     assertFalse(testee.identifiesSequence(sf, accId));
251
252     // gene with wrong ID not valid
253     sf.setValue("ID", "gene:XYZ");
254     assertFalse(testee.identifiesSequence(sf, accId));
255
256     // gene with right ID is valid
257     sf.setValue("ID", "gene:" + accId);
258     assertTrue(testee.identifiesSequence(sf, accId));
259
260     // gene sub-type with right ID is valid
261     sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
262     sf.setValue("ID", "gene:" + accId);
263     assertTrue(testee.identifiesSequence(sf, accId));
264
265     // test is not case-sensitive
266     assertTrue(testee.identifiesSequence(sf, accId.toLowerCase()));
267
268     // transcript not valid:
269     sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
270     sf.setValue("ID", "gene:" + accId);
271     assertFalse(testee.identifiesSequence(sf, accId));
272
273     // exon not valid:
274     sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
275     sf.setValue("ID", "gene:" + accId);
276     assertFalse(testee.identifiesSequence(sf, accId));
277   }
278
279   /**
280    * Check behaviour of feature colour scheme for EnsemblGene sequences.
281    * Currently coded to display exon and sequence_variant (or sub-types) only,
282    * with sequence_variant in red above exon coloured by label.
283    */
284   @Test(groups = "Functional")
285   public void testGetFeatureColourScheme()
286   {
287     FeatureSettingsModelI fc = new EnsemblGene().getFeatureColourScheme();
288     assertTrue(fc.isFeatureDisplayed("exon"));
289     assertTrue(fc.isFeatureDisplayed("coding_exon")); // subtype of exon
290     assertTrue(fc.isFeatureDisplayed("sequence_variant"));
291     assertTrue(fc.isFeatureDisplayed("feature_variant")); // subtype
292     assertFalse(fc.isFeatureDisplayed("transcript"));
293     assertEquals(Color.RED, fc.getFeatureColour("sequence_variant")
294             .getColour());
295     assertEquals(Color.RED, fc.getFeatureColour("feature_variant")
296             .getColour());
297     assertTrue(fc.getFeatureColour("exon").isColourByLabel());
298     assertTrue(fc.getFeatureColour("coding_exon").isColourByLabel());
299     assertEquals(1, fc.compare("sequence_variant", "exon"));
300     assertEquals(-1, fc.compare("exon", "sequence_variant"));
301     assertEquals(1, fc.compare("feature_variant", "coding_exon"));
302     assertEquals(-1, fc.compare("coding_exon", "feature_variant"));
303     assertEquals(1f, fc.getTransparency());
304   }
305
306   @Test(groups = "Network")
307   public void testGetGeneIds()
308   {
309     /*
310      * ENSG00000158828 gene id PINK1 human
311      * ENST00000321556 transcript for the same gene - should not be duplicated
312      * P30419 Uniprot identifier for ENSG00000136448
313      * ENST00000592782 transcript for Uniprot gene - should not be duplicated
314      * BRAF - gene name resolvabe (at time of writing) for 6 model species
315      */
316     String ids = "ENSG00000158828 ENST00000321556 P30419 ENST00000592782 BRAF";
317     EnsemblGene testee = new EnsemblGene();
318     List<String> geneIds = testee.getGeneIds(ids);
319     assertEquals(8, geneIds.size());
320     assertTrue(geneIds.contains("ENSG00000158828"));
321     assertTrue(geneIds.contains("ENSG00000136448"));
322     assertTrue(geneIds.contains("ENSG00000157764")); // BRAF human
323     assertTrue(geneIds.contains("ENSMUSG00000002413")); // mouse
324     assertTrue(geneIds.contains("ENSRNOG00000010957")); // rat
325     assertTrue(geneIds.contains("ENSXETG00000004845")); // xenopus
326     assertTrue(geneIds.contains("ENSDARG00000017661")); // zebrafish
327     assertTrue(geneIds.contains("ENSGALG00000012865")); // chicken
328   }
329 }