a262c1eb6ce9c82140a876cb5cfe09decc9c0a6c
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertSame;
6 import static org.testng.AssertJUnit.assertTrue;
7
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
14
15 import java.util.Arrays;
16 import java.util.List;
17
18 import org.testng.annotations.AfterClass;
19 import org.testng.annotations.BeforeClass;
20 import org.testng.annotations.Test;
21
22 public class EnsemblGeneTest
23 {
24   @BeforeClass
25   public void setUp()
26   {
27     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
28   }
29
30   @AfterClass
31   public void tearDown()
32   {
33     SequenceOntologyFactory.setInstance(null);
34   }
35
36   /**
37    * Test that the gene part of genomic sequence is uniquely identified by a
38    * 'gene' features (or subtype) with the correct gene ID
39    */
40   @Test(groups = "Functional")
41   public void testGetGenomicRangesFromFeatures()
42   {
43     EnsemblGene testee = new EnsemblGene();
44     SequenceI genomic = new SequenceDummy("chr7");
45     genomic.setStart(10000);
46     genomic.setEnd(50000);
47     String geneId = "ABC123";
48
49     // gene at (start+10000) length 501
50     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
51             null);
52     sf.setValue("ID", "gene:" + geneId);
53     sf.setStrand("+");
54     genomic.addSequenceFeature(sf);
55
56     // gene at (start + 10500) length 101
57     // should be ignored - the first 'gene' found defines the whole range
58     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
59     sf.setValue("ID", "gene:" + geneId);
60     sf.setStrand("+");
61     genomic.addSequenceFeature(sf);
62
63     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
64             23);
65     List<int[]> fromRanges = ranges.getFromRanges();
66     assertEquals(1, fromRanges.size());
67     assertEquals(20000, fromRanges.get(0)[0]);
68     assertEquals(20500, fromRanges.get(0)[1]);
69     // to range should start from given start numbering
70     List<int[]> toRanges = ranges.getToRanges();
71     assertEquals(1, toRanges.size());
72     assertEquals(23, toRanges.get(0)[0]);
73     assertEquals(523, toRanges.get(0)[1]);
74   }
75
76   /**
77    * Test variant using a sub-type of gene from the Sequence Ontology
78    */
79   @Test(groups = "Functional")
80   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
81   {
82     EnsemblGene testee = new EnsemblGene();
83     SequenceI genomic = new SequenceDummy("chr7");
84     genomic.setStart(10000);
85     genomic.setEnd(50000);
86     String geneId = "ABC123";
87
88     // gene at (start+10000) length 501
89     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
90             20500, 0f, null);
91     sf.setValue("ID", "gene:" + geneId);
92     sf.setStrand("-");
93     genomic.addSequenceFeature(sf);
94
95     // gene at (start + 10500) length 101
96     // should be ignored - the first 'gene' found defines the whole range
97     // (real data would only have one such feature)
98     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
99     sf.setValue("ID", "gene:" + geneId);
100     sf.setStrand("+");
101     genomic.addSequenceFeature(sf);
102
103     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
104             23);
105     List<int[]> fromRanges = ranges.getFromRanges();
106     assertEquals(1, fromRanges.size());
107     // from range on reverse strand:
108     assertEquals(20500, fromRanges.get(0)[0]);
109     assertEquals(20000, fromRanges.get(0)[1]);
110     // to range should start from given start numbering
111     List<int[]> toRanges = ranges.getToRanges();
112     assertEquals(1, toRanges.size());
113     assertEquals(23, toRanges.get(0)[0]);
114     assertEquals(523, toRanges.get(0)[1]);
115   }
116
117   /**
118    * Test the method that extracts transcript (or subtype) features with a
119    * specified gene as parent
120    */
121   @Test(groups = "Functional")
122   public void testGetTranscriptFeatures()
123   {
124     SequenceI genomic = new SequenceDummy("chr7");
125     genomic.setStart(10000);
126     genomic.setEnd(50000);
127     String geneId = "ABC123";
128
129     // transcript feature
130     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
131             20500, 0f, null);
132     sf1.setValue("Parent", "gene:" + geneId);
133     sf1.setValue("transcript_id", "transcript1");
134     genomic.addSequenceFeature(sf1);
135
136     // transcript sub-type feature
137     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
138             20500, 0f, null);
139     sf2.setValue("Parent", "gene:" + geneId);
140     sf2.setValue("transcript_id", "transcript2");
141     genomic.addSequenceFeature(sf2);
142
143     // NMD_transcript_variant treated like transcript in Ensembl
144     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
145             20000, 20500, 0f, null);
146     sf3.setValue("Parent", "gene:" + geneId);
147     sf3.setValue("transcript_id", "transcript3");
148     genomic.addSequenceFeature(sf3);
149
150     // transcript for a different gene - ignored
151     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
152             0f, null);
153     sf4.setValue("Parent", "gene:XYZ");
154     sf4.setValue("transcript_id", "transcript4");
155     genomic.addSequenceFeature(sf4);
156
157     EnsemblGene testee = new EnsemblGene();
158
159     /*
160      * with no filter
161      */
162     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
163             genomic, null);
164     assertEquals(3, features.size());
165     assertSame(sf1, features.get(0));
166     assertSame(sf2, features.get(1));
167     assertSame(sf3, features.get(2));
168
169     /*
170      * with filter
171      */
172     List<String> ids = Arrays.asList(new String[] { "transcript2",
173         "transcript3" });
174     features = testee.getTranscriptFeatures(geneId, genomic, ids);
175     assertEquals(2, features.size());
176     assertSame(sf2, features.get(0));
177     assertSame(sf3, features.get(1));
178   }
179
180   /**
181    * Test the method that retains features except for 'gene', or 'transcript'
182    * with parent other than the given id
183    */
184   @Test(groups = "Functional")
185   public void testRetainFeature()
186   {
187     String geneId = "ABC123";
188     EnsemblGene testee = new EnsemblGene();
189     SequenceFeature sf = new SequenceFeature("gene", "", 20000,
190             20500, 0f, null);
191     sf.setValue("ID", "gene:" + geneId);
192     assertFalse(testee.retainFeature(sf, geneId));
193
194     sf.setType("transcript");
195     sf.setValue("Parent", "gene:" + geneId);
196     assertTrue(testee.retainFeature(sf, geneId));
197
198     sf.setType("mature_transcript");
199     sf.setValue("Parent", "gene:" + geneId);
200     assertTrue(testee.retainFeature(sf, geneId));
201
202     sf.setType("NMD_transcript_variant");
203     sf.setValue("Parent", "gene:" + geneId);
204     assertTrue(testee.retainFeature(sf, geneId));
205
206     sf.setValue("Parent", "gene:XYZ");
207     assertFalse(testee.retainFeature(sf, geneId));
208
209     sf.setType("anything");
210     assertTrue(testee.retainFeature(sf, geneId));
211   }
212
213   /**
214    * Test the method that picks out 'gene' (or subtype) features with the
215    * accession id as ID
216    */
217   @Test(groups = "Functional")
218   public void testIdentifiesSequence()
219   {
220     String accId = "ABC123";
221     EnsemblGene testee = new EnsemblGene();
222   
223     // gene with no ID not valid
224     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
225     assertFalse(testee.identifiesSequence(sf, accId));
226   
227     // gene with wrong ID not valid
228     sf.setValue("ID", "gene:XYZ");
229     assertFalse(testee.identifiesSequence(sf, accId));
230   
231     // gene with right ID is valid
232     sf.setValue("ID", "gene:" + accId);
233     assertTrue(testee.identifiesSequence(sf, accId));
234   
235     // gene sub-type with right ID is valid
236     sf.setType("snRNA_gene");
237     assertTrue(testee.identifiesSequence(sf, accId));
238   
239     // transcript not valid:
240     sf.setType("transcript");
241     assertFalse(testee.identifiesSequence(sf, accId));
242   
243     // exon not valid:
244     sf.setType("exon");
245     assertFalse(testee.identifiesSequence(sf, accId));
246   }
247 }