JAL-2679 case-insensitive comparison of query and retrieved accession
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.api.FeatureSettingsModelI;
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.gui.JvOptionPane;
32 import jalview.io.gff.SequenceOntologyFactory;
33 import jalview.io.gff.SequenceOntologyLite;
34 import jalview.util.MapList;
35
36 import java.awt.Color;
37 import java.util.List;
38
39 import org.testng.annotations.AfterClass;
40 import org.testng.annotations.BeforeClass;
41 import org.testng.annotations.Test;
42
43 public class EnsemblGeneTest
44 {
45
46   @BeforeClass(alwaysRun = true)
47   public void setUpJvOptionPane()
48   {
49     JvOptionPane.setInteractiveMode(false);
50     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
51   }
52
53   @BeforeClass(alwaysRun = true)
54   public void setUp()
55   {
56     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
57   }
58
59   @AfterClass(alwaysRun = true)
60   public void tearDown()
61   {
62     SequenceOntologyFactory.setInstance(null);
63   }
64
65   /**
66    * Test that the gene part of genomic sequence is uniquely identified by a
67    * 'gene' features (or subtype) with the correct gene ID
68    */
69   @Test(groups = "Functional")
70   public void testGetGenomicRangesFromFeatures()
71   {
72     EnsemblGene testee = new EnsemblGene();
73     SequenceI genomic = new SequenceDummy("chr7");
74     genomic.setStart(10000);
75     genomic.setEnd(50000);
76     String geneId = "ABC123";
77
78     // gene at (start+20000) length 501
79     // should be ignored - the first 'gene' found defines the whole range
80     // (note features are found in position order, not addition order)
81     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
82             null);
83     sf.setValue("ID", "gene:" + geneId);
84     sf.setStrand("+");
85     genomic.addSequenceFeature(sf);
86
87     // gene at (start + 10500) length 101
88     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
89     sf.setValue("ID", "gene:" + geneId);
90     sf.setStrand("+");
91     genomic.addSequenceFeature(sf);
92
93     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
94             23);
95     List<int[]> fromRanges = ranges.getFromRanges();
96     assertEquals(1, fromRanges.size());
97     assertEquals(10500, fromRanges.get(0)[0]);
98     assertEquals(10600, fromRanges.get(0)[1]);
99     // to range should start from given start numbering
100     List<int[]> toRanges = ranges.getToRanges();
101     assertEquals(1, toRanges.size());
102     assertEquals(23, toRanges.get(0)[0]);
103     assertEquals(123, toRanges.get(0)[1]);
104   }
105
106   /**
107    * Test variant using a sub-type of gene from the Sequence Ontology
108    */
109   @Test(groups = "Functional")
110   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
111   {
112     EnsemblGene testee = new EnsemblGene();
113     SequenceI genomic = new SequenceDummy("chr7");
114     genomic.setStart(10000);
115     genomic.setEnd(50000);
116     String geneId = "ABC123";
117
118     // gene at (start+20000) length 501
119     // should be ignored - the first 'gene' found defines the whole range
120     // (real data would only have one such feature)
121     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
122             20500, 0f, null);
123     sf.setValue("ID", "gene:" + geneId);
124     sf.setStrand("-");
125     genomic.addSequenceFeature(sf);
126
127     // gene at (start + 10500) length 101
128     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
129     sf.setValue("ID", "gene:" + geneId);
130     sf.setStrand("+");
131     genomic.addSequenceFeature(sf);
132
133     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
134             23);
135     List<int[]> fromRanges = ranges.getFromRanges();
136     assertEquals(1, fromRanges.size());
137     // from range on reverse strand:
138     assertEquals(10500, fromRanges.get(0)[0]);
139     assertEquals(10600, fromRanges.get(0)[1]);
140     // to range should start from given start numbering
141     List<int[]> toRanges = ranges.getToRanges();
142     assertEquals(1, toRanges.size());
143     assertEquals(23, toRanges.get(0)[0]);
144     assertEquals(123, toRanges.get(0)[1]);
145   }
146
147   /**
148    * Test the method that extracts transcript (or subtype) features with a
149    * specified gene as parent
150    */
151   @Test(groups = "Functional")
152   public void testGetTranscriptFeatures()
153   {
154     SequenceI genomic = new SequenceDummy("chr7");
155     genomic.setStart(10000);
156     genomic.setEnd(50000);
157     String geneId = "ABC123";
158
159     // transcript feature
160     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
161             20500, 0f, null);
162     sf1.setValue("Parent", "gene:" + geneId);
163     sf1.setValue("transcript_id", "transcript1");
164     genomic.addSequenceFeature(sf1);
165
166     // transcript sub-type feature
167     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500,
168             0f, null);
169     sf2.setValue("Parent", "gene:" + geneId);
170     sf2.setValue("transcript_id", "transcript2");
171     genomic.addSequenceFeature(sf2);
172
173     // NMD_transcript_variant treated like transcript in Ensembl
174     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
175             22000, 22500, 0f, null);
176     // id matching should not be case-sensitive
177     sf3.setValue("Parent", "gene:" + geneId.toLowerCase());
178     sf3.setValue("transcript_id", "transcript3");
179     genomic.addSequenceFeature(sf3);
180
181     // transcript for a different gene - ignored
182     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500,
183             0f, null);
184     sf4.setValue("Parent", "gene:XYZ");
185     sf4.setValue("transcript_id", "transcript4");
186     genomic.addSequenceFeature(sf4);
187
188     EnsemblGene testee = new EnsemblGene();
189
190     /*
191      * with no filter
192      */
193     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
194             genomic);
195     assertEquals(3, features.size());
196     assertTrue(features.contains(sf1));
197     assertTrue(features.contains(sf2));
198     assertTrue(features.contains(sf3));
199   }
200
201   /**
202    * Test the method that retains features except for 'gene', or 'transcript'
203    * with parent other than the given id
204    */
205   @Test(groups = "Functional")
206   public void testRetainFeature()
207   {
208     String geneId = "ABC123";
209     EnsemblGene testee = new EnsemblGene();
210     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
211             null);
212     sf.setValue("ID", "gene:" + geneId);
213     assertFalse(testee.retainFeature(sf, geneId));
214
215     sf = new SequenceFeature("transcript", "", 20000, 20500, 0f, null);
216     sf.setValue("Parent", "gene:" + geneId);
217     assertTrue(testee.retainFeature(sf, geneId));
218
219     sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
220             null);
221     sf.setValue("Parent", "gene:" + geneId);
222     assertTrue(testee.retainFeature(sf, geneId));
223
224     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
225             0f, null);
226     sf.setValue("Parent", "gene:" + geneId);
227     assertTrue(testee.retainFeature(sf, geneId));
228
229     sf.setValue("Parent", "gene:XYZ");
230     assertFalse(testee.retainFeature(sf, geneId));
231
232     sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
233     assertTrue(testee.retainFeature(sf, geneId));
234   }
235
236   /**
237    * Test the method that picks out 'gene' (or subtype) features with the
238    * accession id as ID
239    */
240   @Test(groups = "Functional")
241   public void testIdentifiesSequence()
242   {
243     String accId = "ABC123";
244     EnsemblGene testee = new EnsemblGene();
245
246     // gene with no ID not valid
247     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
248     assertFalse(testee.identifiesSequence(sf, accId));
249
250     // gene with wrong ID not valid
251     sf.setValue("ID", "gene:XYZ");
252     assertFalse(testee.identifiesSequence(sf, accId));
253
254     // gene with right ID is valid
255     sf.setValue("ID", "gene:" + accId);
256     assertTrue(testee.identifiesSequence(sf, accId));
257
258     // gene sub-type with right ID is valid
259     sf = new SequenceFeature("snRNA_gene", "", 1, 2, 0f, null);
260     sf.setValue("ID", "gene:" + accId);
261     assertTrue(testee.identifiesSequence(sf, accId));
262
263     // test is not case-sensitive
264     assertTrue(testee.identifiesSequence(sf, accId.toLowerCase()));
265
266     // transcript not valid:
267     sf = new SequenceFeature("transcript", "", 1, 2, 0f, null);
268     sf.setValue("ID", "gene:" + accId);
269     assertFalse(testee.identifiesSequence(sf, accId));
270
271     // exon not valid:
272     sf = new SequenceFeature("exon", "", 1, 2, 0f, null);
273     sf.setValue("ID", "gene:" + accId);
274     assertFalse(testee.identifiesSequence(sf, accId));
275   }
276
277   /**
278    * Check behaviour of feature colour scheme for EnsemblGene sequences.
279    * Currently coded to display exon and sequence_variant (or sub-types) only,
280    * with sequence_variant in red above exon coloured by label.
281    */
282   @Test(groups = "Functional")
283   public void testGetFeatureColourScheme()
284   {
285     FeatureSettingsModelI fc = new EnsemblGene().getFeatureColourScheme();
286     assertTrue(fc.isFeatureDisplayed("exon"));
287     assertTrue(fc.isFeatureDisplayed("coding_exon")); // subtype of exon
288     assertTrue(fc.isFeatureDisplayed("sequence_variant"));
289     assertTrue(fc.isFeatureDisplayed("feature_variant")); // subtype
290     assertFalse(fc.isFeatureDisplayed("transcript"));
291     assertEquals(Color.RED, fc.getFeatureColour("sequence_variant")
292             .getColour());
293     assertEquals(Color.RED, fc.getFeatureColour("feature_variant")
294             .getColour());
295     assertTrue(fc.getFeatureColour("exon").isColourByLabel());
296     assertTrue(fc.getFeatureColour("coding_exon").isColourByLabel());
297     assertEquals(1, fc.compare("sequence_variant", "exon"));
298     assertEquals(-1, fc.compare("exon", "sequence_variant"));
299     assertEquals(1, fc.compare("feature_variant", "coding_exon"));
300     assertEquals(-1, fc.compare("coding_exon", "feature_variant"));
301     assertEquals(1f, fc.getTransparency());
302   }
303
304   @Test(groups = "Network")
305   public void testGetGeneIds()
306   {
307     /*
308      * ENSG00000158828 gene id PINK1 human
309      * ENST00000321556 transcript for the same gene - should not be duplicated
310      * P30419 Uniprot identifier for ENSG00000136448
311      * ENST00000592782 transcript for Uniprot gene - should not be duplicated
312      * BRAF - gene name resolvabe (at time of writing) for 6 model species
313      */
314     String ids = "ENSG00000158828 ENST00000321556 P30419 ENST00000592782 BRAF";
315     EnsemblGene testee = new EnsemblGene();
316     List<String> geneIds = testee.getGeneIds(ids);
317     assertEquals(8, geneIds.size());
318     assertTrue(geneIds.contains("ENSG00000158828"));
319     assertTrue(geneIds.contains("ENSG00000136448"));
320     assertTrue(geneIds.contains("ENSG00000157764")); // BRAF human
321     assertTrue(geneIds.contains("ENSMUSG00000002413")); // mouse
322     assertTrue(geneIds.contains("ENSRNOG00000010957")); // rat
323     assertTrue(geneIds.contains("ENSXETG00000004845")); // xenopus
324     assertTrue(geneIds.contains("ENSDARG00000017661")); // zebrafish
325     assertTrue(geneIds.contains("ENSGALG00000012865")); // chicken
326   }
327 }