JAL-2885 uniprot now https, uniprot/ensembl/pfam/xfam configurable
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.api.FeatureSettingsModelI;
28 import jalview.bin.Cache;
29 import jalview.datamodel.SequenceDummy;
30 import jalview.datamodel.SequenceFeature;
31 import jalview.datamodel.SequenceI;
32 import jalview.gui.JvOptionPane;
33 import jalview.io.gff.SequenceOntologyFactory;
34 import jalview.io.gff.SequenceOntologyLite;
35 import jalview.util.MapList;
36
37 import java.awt.Color;
38 import java.util.List;
39
40 import org.testng.annotations.AfterClass;
41 import org.testng.annotations.BeforeClass;
42 import org.testng.annotations.Test;
43
44 public class EnsemblGeneTest
45 {
46
47   @BeforeClass(alwaysRun = true)
48   public void setUpJvOptionPane()
49   {
50     JvOptionPane.setInteractiveMode(false);
51     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
52   }
53
54   @BeforeClass(alwaysRun = true)
55   public void setUp()
56   {
57     Cache.loadProperties("test/jalview/io/testProps.jvprops");
58     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
59   }
60
61   @AfterClass(alwaysRun = true)
62   public void tearDown()
63   {
64     SequenceOntologyFactory.setInstance(null);
65   }
66
67   /**
68    * Test that the gene part of genomic sequence is uniquely identified by a
69    * 'gene' features (or subtype) with the correct gene ID
70    */
71   @Test(groups = "Functional")
72   public void testGetGenomicRangesFromFeatures()
73   {
74     EnsemblGene testee = new EnsemblGene();
75     SequenceI genomic = new SequenceDummy("chr7");
76     genomic.setStart(10000);
77     genomic.setEnd(50000);
78     String geneId = "ABC123";
79
80     // gene at (start+20000) length 501
81     // should be ignored - the first 'gene' found defines the whole range
82     // (note features are found in position order, not addition order)
83     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
84             null);
85     sf.setValue("ID", "gene:" + geneId);
86     sf.setStrand("+");
87     genomic.addSequenceFeature(sf);
88
89     // gene at (start + 10500) length 101
90     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
91     sf.setValue("ID", "gene:" + geneId);
92     sf.setStrand("+");
93     genomic.addSequenceFeature(sf);
94
95     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
96             23);
97     List<int[]> fromRanges = ranges.getFromRanges();
98     assertEquals(1, fromRanges.size());
99     assertEquals(10500, fromRanges.get(0)[0]);
100     assertEquals(10600, fromRanges.get(0)[1]);
101     // to range should start from given start numbering
102     List<int[]> toRanges = ranges.getToRanges();
103     assertEquals(1, toRanges.size());
104     assertEquals(23, toRanges.get(0)[0]);
105     assertEquals(123, toRanges.get(0)[1]);
106   }
107
108   /**
109    * Test variant using a sub-type of gene from the Sequence Ontology
110    */
111   @Test(groups = "Functional")
112   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
113   {
114     EnsemblGene testee = new EnsemblGene();
115     SequenceI genomic = new SequenceDummy("chr7");
116     genomic.setStart(10000);
117     genomic.setEnd(50000);
118     String geneId = "ABC123";
119
120     // gene at (start+20000) length 501
121     // should be ignored - the first 'gene' found defines the whole range
122     // (real data would only have one such feature)
123     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
124             20500, 0f, null);
125     sf.setValue("ID", "gene:" + geneId);
126     sf.setStrand("-");
127     genomic.addSequenceFeature(sf);
128
129     // gene at (start + 10500) length 101
130     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
131     sf.setValue("ID", "gene:" + geneId);
132     sf.setStrand("+");
133     genomic.addSequenceFeature(sf);
134
135     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
136             23);
137     List<int[]> fromRanges = ranges.getFromRanges();
138     assertEquals(1, fromRanges.size());
139     // from range on reverse strand:
140     assertEquals(10500, fromRanges.get(0)[0]);
141     assertEquals(10600, fromRanges.get(0)[1]);
142     // to range should start from given start numbering
143     List<int[]> toRanges = ranges.getToRanges();
144     assertEquals(1, toRanges.size());
145     assertEquals(23, toRanges.get(0)[0]);
146     assertEquals(123, toRanges.get(0)[1]);
147   }
148
149   /**
150    * Test the method that extracts transcript (or subtype) features with a
151    * specified gene as parent
152    */
153   @Test(groups = "Functional")
154   public void testGetTranscriptFeatures()
155   {
156     SequenceI genomic = new SequenceDummy("chr7");
157     genomic.setStart(10000);
158     genomic.setEnd(50000);
159     String geneId = "ABC123";
160
161     // transcript feature
162     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
163             20500, 0f, null);
164     sf1.setValue("Parent", "gene:" + geneId);
165     sf1.setValue("transcript_id", "transcript1");
166     genomic.addSequenceFeature(sf1);
167
168     // transcript sub-type feature
169     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
170             0f, null);
171     sf2.setValue("Parent", "gene:" + geneId);
172     sf2.setValue("transcript_id", "transcript2");
173     genomic.addSequenceFeature(sf2);
174
175     // NMD_transcript_variant treated like transcript in Ensembl
176     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
177             22000, 22500, 0f, null);
178     sf3.setValue("Parent", "gene:" + geneId);
179     sf3.setValue("transcript_id", "transcript3");
180     genomic.addSequenceFeature(sf3);
181
182     // transcript for a different gene - ignored
183     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
184             0f, null);
185     sf4.setValue("Parent", "gene:XYZ");
186     sf4.setValue("transcript_id", "transcript4");
187     genomic.addSequenceFeature(sf4);
188
189     EnsemblGene testee = new EnsemblGene();
190
191     /*
192      * with no filter
193      */
194     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
195             genomic);
196     assertEquals(3, features.size());
197     assertTrue(features.contains(sf1));
198     assertTrue(features.contains(sf2));
199     assertTrue(features.contains(sf3));
200   }
201
202   /**
203    * Test the method that retains features except for 'gene', or 'transcript'
204    * with parent other than the given id
205    */
206   @Test(groups = "Functional")
207   public void testRetainFeature()
208   {
209     String geneId = "ABC123";
210     EnsemblGene testee = new EnsemblGene();
211     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
212             null);
213     sf.setValue("ID", "gene:" + geneId);
214     assertFalse(testee.retainFeature(sf, geneId));
215
216     sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null);
217     sf.setValue("Parent", "gene:" + geneId);
218     assertTrue(testee.retainFeature(sf, geneId));
219
220     sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
221             null);
222     sf.setValue("Parent", "gene:" + geneId);
223     assertTrue(testee.retainFeature(sf, geneId));
224
225     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
226             0f, null);
227     sf.setValue("Parent", "gene:" + geneId);
228     assertTrue(testee.retainFeature(sf, geneId));
229
230     sf.setValue("Parent", "gene:XYZ");
231     assertFalse(testee.retainFeature(sf, geneId));
232
233     sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
234     assertTrue(testee.retainFeature(sf, geneId));
235   }
236
237   /**
238    * Test the method that picks out 'gene' (or subtype) features with the
239    * accession id as ID
240    */
241   @Test(groups = "Functional")
242   public void testIdentifiesSequence()
243   {
244     String accId = "ABC123";
245     EnsemblGene testee = new EnsemblGene();
246
247     // gene with no ID not valid
248     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
249     assertFalse(testee.identifiesSequence(sf, accId));
250
251     // gene with wrong ID not valid
252     sf.setValue("ID", "gene:XYZ");
253     assertFalse(testee.identifiesSequence(sf, accId));
254
255     // gene with right ID is valid
256     sf.setValue("ID", "gene:" + accId);
257     assertTrue(testee.identifiesSequence(sf, accId));
258
259     // gene sub-type with right ID is valid
260     sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
261     sf.setValue("ID", "gene:" + accId);
262     assertTrue(testee.identifiesSequence(sf, accId));
263
264     // transcript not valid:
265     sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
266     sf.setValue("ID", "gene:" + accId);
267     assertFalse(testee.identifiesSequence(sf, accId));
268
269     // exon not valid:
270     sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
271     sf.setValue("ID", "gene:" + accId);
272     assertFalse(testee.identifiesSequence(sf, accId));
273   }
274
275   /**
276    * Check behaviour of feature colour scheme for EnsemblGene sequences.
277    * Currently coded to display exon and sequence_variant (or sub-types) only,
278    * with sequence_variant in red above exon coloured by label.
279    */
280   @Test(groups = "Functional")
281   public void testGetFeatureColourScheme()
282   {
283     FeatureSettingsModelI fc = new EnsemblGene().getFeatureColourScheme();
284     assertTrue(fc.isFeatureDisplayed("exon"));
285     assertTrue(fc.isFeatureDisplayed("coding_exon")); // subtype of exon
286     assertTrue(fc.isFeatureDisplayed("sequence_variant"));
287     assertTrue(fc.isFeatureDisplayed("feature_variant")); // subtype
288     assertFalse(fc.isFeatureDisplayed("transcript"));
289     assertEquals(Color.RED, fc.getFeatureColour("sequence_variant")
290             .getColour());
291     assertEquals(Color.RED, fc.getFeatureColour("feature_variant")
292             .getColour());
293     assertTrue(fc.getFeatureColour("exon").isColourByLabel());
294     assertTrue(fc.getFeatureColour("coding_exon").isColourByLabel());
295     assertEquals(1, fc.compare("sequence_variant", "exon"));
296     assertEquals(-1, fc.compare("exon", "sequence_variant"));
297     assertEquals(1, fc.compare("feature_variant", "coding_exon"));
298     assertEquals(-1, fc.compare("coding_exon", "feature_variant"));
299     assertEquals(1f, fc.getTransparency());
300   }
301
302   @Test(groups = "Network")
303   public void testGetGeneIds()
304   {
305     /*
306      * ENSG00000158828 gene id PINK1 human
307      * ENST00000321556 transcript for the same gene - should not be duplicated
308      * P30419 Uniprot identifier for ENSG00000136448
309      * ENST00000592782 transcript for Uniprot gene - should not be duplicated
310      * BRAF - gene name resolvabe (at time of writing) for 6 model species
311      */
312     String ids = "ENSG00000158828 ENST00000321556 P30419 ENST00000592782 BRAF";
313     EnsemblGene testee = new EnsemblGene();
314     List<String> geneIds = testee.getGeneIds(ids);
315     assertEquals(8, geneIds.size());
316     assertTrue(geneIds.contains("ENSG00000158828"));
317     assertTrue(geneIds.contains("ENSG00000136448"));
318     assertTrue(geneIds.contains("ENSG00000157764")); // BRAF human
319     assertTrue(geneIds.contains("ENSMUSG00000002413")); // mouse
320     assertTrue(geneIds.contains("ENSRNOG00000010957")); // rat
321     assertTrue(geneIds.contains("ENSXETG00000004845")); // xenopus
322     assertTrue(geneIds.contains("ENSDARG00000017661")); // zebrafish
323     assertTrue(geneIds.contains("ENSGALG00000012865")); // chicken
324   }
325 }