5920b89e85af17c1d94fc0257f5873f2c2c7c664
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.api.FeatureSettingsModelI;
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.gui.JvOptionPane;
32 import jalview.io.gff.SequenceOntologyFactory;
33 import jalview.io.gff.SequenceOntologyLite;
34 import jalview.util.MapList;
35
36 import java.awt.Color;
37 import java.util.List;
38
39 import org.testng.annotations.AfterClass;
40 import org.testng.annotations.BeforeClass;
41 import org.testng.annotations.Test;
42
43 public class EnsemblGeneTest
44 {
45
46   @BeforeClass(alwaysRun = true)
47   public void setUpJvOptionPane()
48   {
49     JvOptionPane.setInteractiveMode(false);
50     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51   }
52
53   @BeforeClass(alwaysRun = true)
54   public void setUp()
55   {
56     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
57   }
58
59   @AfterClass(alwaysRun = true)
60   public void tearDown()
61   {
62     SequenceOntologyFactory.setInstance(null);
63   }
64
65   /**
66    * Test that the gene part of genomic sequence is uniquely identified by a
67    * 'gene' features (or subtype) with the correct gene ID
68    */
69   @Test(groups = "Functional")
70   public void testGetGenomicRangesFromFeatures()
71   {
72     EnsemblGene testee = new EnsemblGene();
73     SequenceI genomic = new SequenceDummy("chr7");
74     genomic.setStart(10000);
75     genomic.setEnd(50000);
76     String geneId = "ABC123";
77
78     // gene at (start+20000) length 501
79     // should be ignored - the first 'gene' found defines the whole range
80     // (note features are found in position order, not addition order)
81     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
82             null);
83     sf.setValue("ID", "gene:" + geneId);
84     sf.setStrand("+");
85     genomic.addSequenceFeature(sf);
86
87     // gene at (start + 10500) length 101
88     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
89     sf.setValue("ID", "gene:" + geneId);
90     sf.setStrand("+");
91     genomic.addSequenceFeature(sf);
92
93     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
94             23);
95     List<int[]> fromRanges = ranges.getFromRanges();
96     assertEquals(1, fromRanges.size());
97     assertEquals(10500, fromRanges.get(0)[0]);
98     assertEquals(10600, fromRanges.get(0)[1]);
99     // to range should start from given start numbering
100     List<int[]> toRanges = ranges.getToRanges();
101     assertEquals(1, toRanges.size());
102     assertEquals(23, toRanges.get(0)[0]);
103     assertEquals(123, toRanges.get(0)[1]);
104   }
105
106   /**
107    * Test variant using a sub-type of gene from the Sequence Ontology
108    */
109   @Test(groups = "Functional")
110   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
111   {
112     EnsemblGene testee = new EnsemblGene();
113     SequenceI genomic = new SequenceDummy("chr7");
114     genomic.setStart(10000);
115     genomic.setEnd(50000);
116     String geneId = "ABC123";
117
118     // gene at (start+20000) length 501
119     // should be ignored - the first 'gene' found defines the whole range
120     // (real data would only have one such feature)
121     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
122             20500, 0f, null);
123     sf.setValue("ID", "gene:" + geneId);
124     sf.setStrand("-");
125     genomic.addSequenceFeature(sf);
126
127     // gene at (start + 10500) length 101
128     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
129     sf.setValue("ID", "gene:" + geneId);
130     sf.setStrand("+");
131     genomic.addSequenceFeature(sf);
132
133     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
134             23);
135     List<int[]> fromRanges = ranges.getFromRanges();
136     assertEquals(1, fromRanges.size());
137     // from range on reverse strand:
138     assertEquals(10500, fromRanges.get(0)[0]);
139     assertEquals(10600, fromRanges.get(0)[1]);
140     // to range should start from given start numbering
141     List<int[]> toRanges = ranges.getToRanges();
142     assertEquals(1, toRanges.size());
143     assertEquals(23, toRanges.get(0)[0]);
144     assertEquals(123, toRanges.get(0)[1]);
145   }
146
147   /**
148    * Test the method that extracts transcript (or subtype) features with a
149    * specified gene as parent
150    */
151   @Test(groups = "Functional")
152   public void testGetTranscriptFeatures()
153   {
154     SequenceI genomic = new SequenceDummy("chr7");
155     genomic.setStart(10000);
156     genomic.setEnd(50000);
157     String geneId = "ABC123";
158
159     // transcript feature
160     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
161             20500, 0f, null);
162     sf1.setValue("Parent", "gene:" + geneId);
163     sf1.setValue("transcript_id", "transcript1");
164     genomic.addSequenceFeature(sf1);
165
166     // transcript sub-type feature
167     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
168             0f, null);
169     sf2.setValue("Parent", "gene:" + geneId);
170     sf2.setValue("transcript_id", "transcript2");
171     genomic.addSequenceFeature(sf2);
172
173     // NMD_transcript_variant treated like transcript in Ensembl
174     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
175             22000, 22500, 0f, null);
176     sf3.setValue("Parent", "gene:" + geneId);
177     sf3.setValue("transcript_id", "transcript3");
178     genomic.addSequenceFeature(sf3);
179
180     // transcript for a different gene - ignored
181     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
182             0f, null);
183     sf4.setValue("Parent", "gene:XYZ");
184     sf4.setValue("transcript_id", "transcript4");
185     genomic.addSequenceFeature(sf4);
186
187     EnsemblGene testee = new EnsemblGene();
188
189     /*
190      * with no filter
191      */
192     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
193             genomic);
194     assertEquals(3, features.size());
195     assertTrue(features.contains(sf1));
196     assertTrue(features.contains(sf2));
197     assertTrue(features.contains(sf3));
198   }
199
200   /**
201    * Test the method that retains features except for 'gene', or 'transcript'
202    * with parent other than the given id
203    */
204   @Test(groups = "Functional")
205   public void testRetainFeature()
206   {
207     String geneId = "ABC123";
208     EnsemblGene testee = new EnsemblGene();
209     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
210             null);
211     sf.setValue("ID", "gene:" + geneId);
212     assertFalse(testee.retainFeature(sf, geneId));
213
214     sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null);
215     sf.setValue("Parent", "gene:" + geneId);
216     assertTrue(testee.retainFeature(sf, geneId));
217
218     sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
219             null);
220     sf.setValue("Parent", "gene:" + geneId);
221     assertTrue(testee.retainFeature(sf, geneId));
222
223     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
224             0f, null);
225     sf.setValue("Parent", "gene:" + geneId);
226     assertTrue(testee.retainFeature(sf, geneId));
227
228     sf.setValue("Parent", "gene:XYZ");
229     assertFalse(testee.retainFeature(sf, geneId));
230
231     sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
232     assertTrue(testee.retainFeature(sf, geneId));
233   }
234
235   /**
236    * Test the method that picks out 'gene' (or subtype) features with the
237    * accession id as ID
238    */
239   @Test(groups = "Functional")
240   public void testIdentifiesSequence()
241   {
242     String accId = "ABC123";
243     EnsemblGene testee = new EnsemblGene();
244
245     // gene with no ID not valid
246     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
247     assertFalse(testee.identifiesSequence(sf, accId));
248
249     // gene with wrong ID not valid
250     sf.setValue("ID", "gene:XYZ");
251     assertFalse(testee.identifiesSequence(sf, accId));
252
253     // gene with right ID is valid
254     sf.setValue("ID", "gene:" + accId);
255     assertTrue(testee.identifiesSequence(sf, accId));
256
257     // gene sub-type with right ID is valid
258     sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
259     sf.setValue("ID", "gene:" + accId);
260     assertTrue(testee.identifiesSequence(sf, accId));
261
262     // transcript not valid:
263     sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
264     sf.setValue("ID", "gene:" + accId);
265     assertFalse(testee.identifiesSequence(sf, accId));
266
267     // exon not valid:
268     sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
269     sf.setValue("ID", "gene:" + accId);
270     assertFalse(testee.identifiesSequence(sf, accId));
271   }
272
273   /**
274    * Check behaviour of feature colour scheme for EnsemblGene sequences.
275    * Currently coded to display exon and sequence_variant (or sub-types) only,
276    * with sequence_variant in red above exon coloured by label.
277    */
278   @Test(groups = "Functional")
279   public void testGetFeatureColourScheme()
280   {
281     FeatureSettingsModelI fc = new EnsemblGene().getFeatureColourScheme();
282     assertTrue(fc.isFeatureDisplayed("exon"));
283     assertTrue(fc.isFeatureDisplayed("coding_exon")); // subtype of exon
284     assertTrue(fc.isFeatureDisplayed("sequence_variant"));
285     assertTrue(fc.isFeatureDisplayed("feature_variant")); // subtype
286     assertFalse(fc.isFeatureDisplayed("transcript"));
287     assertEquals(Color.RED, fc.getFeatureColour("sequence_variant")
288             .getColour());
289     assertEquals(Color.RED, fc.getFeatureColour("feature_variant")
290             .getColour());
291     assertTrue(fc.getFeatureColour("exon").isColourByLabel());
292     assertTrue(fc.getFeatureColour("coding_exon").isColourByLabel());
293     assertEquals(1, fc.compare("sequence_variant", "exon"));
294     assertEquals(-1, fc.compare("exon", "sequence_variant"));
295     assertEquals(1, fc.compare("feature_variant", "coding_exon"));
296     assertEquals(-1, fc.compare("coding_exon", "feature_variant"));
297     assertEquals(1f, fc.getTransparency());
298   }
299
300   @Test(groups = "Network")
301   public void testGetGeneIds()
302   {
303     /*
304      * ENSG00000158828 gene id PINK1 human
305      * ENST00000321556 transcript for the same gene - should not be duplicated
306      * P30419 Uniprot identifier for ENSG00000136448
307      * ENST00000592782 transcript for Uniprot gene - should not be duplicated
308      * BRAF - gene name resolvabe (at time of writing) for 6 model species
309      */
310     String ids = "ENSG00000158828 ENST00000321556 P30419 ENST00000592782 BRAF";
311     EnsemblGene testee = new EnsemblGene();
312     List<String> geneIds = testee.getGeneIds(ids);
313     assertEquals(8, geneIds.size());
314     assertTrue(geneIds.contains("ENSG00000158828"));
315     assertTrue(geneIds.contains("ENSG00000136448"));
316     assertTrue(geneIds.contains("ENSG00000157764")); // BRAF human
317     assertTrue(geneIds.contains("ENSMUSG00000002413")); // mouse
318     assertTrue(geneIds.contains("ENSRNOG00000010957")); // rat
319     assertTrue(geneIds.contains("ENSXETG00000004845")); // xenopus
320     assertTrue(geneIds.contains("ENSDARG00000017661")); // zebrafish
321     assertTrue(geneIds.contains("ENSGALG00000012865")); // chicken
322   }
323 }