JAL-1705 return Ensembl genes for model species for a gene name
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertSame;
6 import static org.testng.AssertJUnit.assertTrue;
7
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
14
15 import java.util.List;
16
17 import org.testng.annotations.AfterClass;
18 import org.testng.annotations.BeforeClass;
19 import org.testng.annotations.Test;
20
21 public class EnsemblGeneTest
22 {
23   @BeforeClass
24   public void setUp()
25   {
26     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
27   }
28
29   @AfterClass
30   public void tearDown()
31   {
32     SequenceOntologyFactory.setInstance(null);
33   }
34
35   /**
36    * Test that the gene part of genomic sequence is uniquely identified by a
37    * 'gene' features (or subtype) with the correct gene ID
38    */
39   @Test(groups = "Functional")
40   public void testGetGenomicRangesFromFeatures()
41   {
42     EnsemblGene testee = new EnsemblGene();
43     SequenceI genomic = new SequenceDummy("chr7");
44     genomic.setStart(10000);
45     genomic.setEnd(50000);
46     String geneId = "ABC123";
47
48     // gene at (start+10000) length 501
49     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
50             null);
51     sf.setValue("ID", "gene:" + geneId);
52     sf.setStrand("+");
53     genomic.addSequenceFeature(sf);
54
55     // gene at (start + 10500) length 101
56     // should be ignored - the first 'gene' found defines the whole range
57     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
58     sf.setValue("ID", "gene:" + geneId);
59     sf.setStrand("+");
60     genomic.addSequenceFeature(sf);
61
62     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
63             23);
64     List<int[]> fromRanges = ranges.getFromRanges();
65     assertEquals(1, fromRanges.size());
66     assertEquals(20000, fromRanges.get(0)[0]);
67     assertEquals(20500, fromRanges.get(0)[1]);
68     // to range should start from given start numbering
69     List<int[]> toRanges = ranges.getToRanges();
70     assertEquals(1, toRanges.size());
71     assertEquals(23, toRanges.get(0)[0]);
72     assertEquals(523, toRanges.get(0)[1]);
73   }
74
75   /**
76    * Test variant using a sub-type of gene from the Sequence Ontology
77    */
78   @Test(groups = "Functional")
79   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
80   {
81     EnsemblGene testee = new EnsemblGene();
82     SequenceI genomic = new SequenceDummy("chr7");
83     genomic.setStart(10000);
84     genomic.setEnd(50000);
85     String geneId = "ABC123";
86
87     // gene at (start+10000) length 501
88     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
89             20500, 0f, null);
90     sf.setValue("ID", "gene:" + geneId);
91     sf.setStrand("-");
92     genomic.addSequenceFeature(sf);
93
94     // gene at (start + 10500) length 101
95     // should be ignored - the first 'gene' found defines the whole range
96     // (real data would only have one such feature)
97     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
98     sf.setValue("ID", "gene:" + geneId);
99     sf.setStrand("+");
100     genomic.addSequenceFeature(sf);
101
102     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
103             23);
104     List<int[]> fromRanges = ranges.getFromRanges();
105     assertEquals(1, fromRanges.size());
106     // from range on reverse strand:
107     assertEquals(20500, fromRanges.get(0)[0]);
108     assertEquals(20000, fromRanges.get(0)[1]);
109     // to range should start from given start numbering
110     List<int[]> toRanges = ranges.getToRanges();
111     assertEquals(1, toRanges.size());
112     assertEquals(23, toRanges.get(0)[0]);
113     assertEquals(523, toRanges.get(0)[1]);
114   }
115
116   /**
117    * Test the method that extracts transcript (or subtype) features with a
118    * specified gene as parent
119    */
120   @Test(groups = "Functional")
121   public void testGetTranscriptFeatures()
122   {
123     SequenceI genomic = new SequenceDummy("chr7");
124     genomic.setStart(10000);
125     genomic.setEnd(50000);
126     String geneId = "ABC123";
127
128     // transcript feature
129     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
130             20500, 0f, null);
131     sf1.setValue("Parent", "gene:" + geneId);
132     sf1.setValue("transcript_id", "transcript1");
133     genomic.addSequenceFeature(sf1);
134
135     // transcript sub-type feature
136     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
137             20500, 0f, null);
138     sf2.setValue("Parent", "gene:" + geneId);
139     sf2.setValue("transcript_id", "transcript2");
140     genomic.addSequenceFeature(sf2);
141
142     // NMD_transcript_variant treated like transcript in Ensembl
143     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
144             20000, 20500, 0f, null);
145     sf3.setValue("Parent", "gene:" + geneId);
146     sf3.setValue("transcript_id", "transcript3");
147     genomic.addSequenceFeature(sf3);
148
149     // transcript for a different gene - ignored
150     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
151             0f, null);
152     sf4.setValue("Parent", "gene:XYZ");
153     sf4.setValue("transcript_id", "transcript4");
154     genomic.addSequenceFeature(sf4);
155
156     EnsemblGene testee = new EnsemblGene();
157
158     /*
159      * with no filter
160      */
161     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
162             genomic);
163     assertEquals(3, features.size());
164     assertSame(sf1, features.get(0));
165     assertSame(sf2, features.get(1));
166     assertSame(sf3, features.get(2));
167   }
168
169   /**
170    * Test the method that retains features except for 'gene', or 'transcript'
171    * with parent other than the given id
172    */
173   @Test(groups = "Functional")
174   public void testRetainFeature()
175   {
176     String geneId = "ABC123";
177     EnsemblGene testee = new EnsemblGene();
178     SequenceFeature sf = new SequenceFeature("gene", "", 20000,
179             20500, 0f, null);
180     sf.setValue("ID", "gene:" + geneId);
181     assertFalse(testee.retainFeature(sf, geneId));
182
183     sf.setType("transcript");
184     sf.setValue("Parent", "gene:" + geneId);
185     assertTrue(testee.retainFeature(sf, geneId));
186
187     sf.setType("mature_transcript");
188     sf.setValue("Parent", "gene:" + geneId);
189     assertTrue(testee.retainFeature(sf, geneId));
190
191     sf.setType("NMD_transcript_variant");
192     sf.setValue("Parent", "gene:" + geneId);
193     assertTrue(testee.retainFeature(sf, geneId));
194
195     sf.setValue("Parent", "gene:XYZ");
196     assertFalse(testee.retainFeature(sf, geneId));
197
198     sf.setType("anything");
199     assertTrue(testee.retainFeature(sf, geneId));
200   }
201
202   /**
203    * Test the method that picks out 'gene' (or subtype) features with the
204    * accession id as ID
205    */
206   @Test(groups = "Functional")
207   public void testIdentifiesSequence()
208   {
209     String accId = "ABC123";
210     EnsemblGene testee = new EnsemblGene();
211   
212     // gene with no ID not valid
213     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
214     assertFalse(testee.identifiesSequence(sf, accId));
215   
216     // gene with wrong ID not valid
217     sf.setValue("ID", "gene:XYZ");
218     assertFalse(testee.identifiesSequence(sf, accId));
219   
220     // gene with right ID is valid
221     sf.setValue("ID", "gene:" + accId);
222     assertTrue(testee.identifiesSequence(sf, accId));
223   
224     // gene sub-type with right ID is valid
225     sf.setType("snRNA_gene");
226     assertTrue(testee.identifiesSequence(sf, accId));
227   
228     // transcript not valid:
229     sf.setType("transcript");
230     assertFalse(testee.identifiesSequence(sf, accId));
231   
232     // exon not valid:
233     sf.setType("exon");
234     assertFalse(testee.identifiesSequence(sf, accId));
235   }
236 }