JAL-1705 further unit tests
[jalview.git] / test / jalview / ext / ensembl / EnsemblGeneTest.java
1 package jalview.ext.ensembl;
2
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertFalse;
5 import static org.testng.AssertJUnit.assertSame;
6 import static org.testng.AssertJUnit.assertTrue;
7
8 import jalview.datamodel.SequenceDummy;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.gff.SequenceOntologyFactory;
12 import jalview.io.gff.SequenceOntologyLite;
13 import jalview.util.MapList;
14
15 import java.util.List;
16
17 import org.testng.annotations.AfterClass;
18 import org.testng.annotations.BeforeClass;
19 import org.testng.annotations.Test;
20
21 public class EnsemblGeneTest
22 {
23   @BeforeClass
24   public void setUp()
25   {
26     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
27   }
28
29   @AfterClass
30   public void tearDown()
31   {
32     SequenceOntologyFactory.setInstance(null);
33   }
34
35   /**
36    * Test that the gene part of genomic sequence is uniquely identified by a
37    * 'gene' features (or subtype) with the correct gene ID
38    */
39   @Test(groups = "Functional")
40   public void testGetGenomicRangesFromFeatures()
41   {
42     EnsemblGene testee = new EnsemblGene();
43     SequenceI genomic = new SequenceDummy("chr7");
44     genomic.setStart(10000);
45     genomic.setEnd(50000);
46     String geneId = "ABC123";
47
48     // gene at (start+10000) length 501
49     SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f,
50             null);
51     sf.setValue("ID", "gene:" + geneId);
52     sf.setStrand("+");
53     genomic.addSequenceFeature(sf);
54
55     // gene at (start + 10500) length 101
56     // should be ignored - the first 'gene' found defines the whole range
57     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
58     sf.setValue("ID", "gene:" + geneId);
59     sf.setStrand("+");
60     genomic.addSequenceFeature(sf);
61
62     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
63             23);
64     List<int[]> fromRanges = ranges.getFromRanges();
65     assertEquals(1, fromRanges.size());
66     assertEquals(20000, fromRanges.get(0)[0]);
67     assertEquals(20500, fromRanges.get(0)[1]);
68     // to range should start from given start numbering
69     List<int[]> toRanges = ranges.getToRanges();
70     assertEquals(1, toRanges.size());
71     assertEquals(23, toRanges.get(0)[0]);
72     assertEquals(523, toRanges.get(0)[1]);
73   }
74
75   /**
76    * Test variant using a sub-type of gene from the Sequence Ontology
77    */
78   @Test(groups = "Functional")
79   public void testGetGenomicRangesFromFeatures_ncRNA_gene_reverseStrand()
80   {
81     EnsemblGene testee = new EnsemblGene();
82     SequenceI genomic = new SequenceDummy("chr7");
83     genomic.setStart(10000);
84     genomic.setEnd(50000);
85     String geneId = "ABC123";
86
87     // gene at (start+10000) length 501
88     SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000,
89             20500, 0f, null);
90     sf.setValue("ID", "gene:" + geneId);
91     sf.setStrand("-");
92     genomic.addSequenceFeature(sf);
93
94     // gene at (start + 10500) length 101
95     // should be ignored - the first 'gene' found defines the whole range
96     // (real data would only have one such feature)
97     sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null);
98     sf.setValue("ID", "gene:" + geneId);
99     sf.setStrand("+");
100     genomic.addSequenceFeature(sf);
101
102     MapList ranges = testee.getGenomicRangesFromFeatures(genomic, geneId,
103             23);
104     List<int[]> fromRanges = ranges.getFromRanges();
105     assertEquals(1, fromRanges.size());
106     // from range on reverse strand:
107     assertEquals(20500, fromRanges.get(0)[0]);
108     assertEquals(20000, fromRanges.get(0)[1]);
109     // to range should start from given start numbering
110     List<int[]> toRanges = ranges.getToRanges();
111     assertEquals(1, toRanges.size());
112     assertEquals(23, toRanges.get(0)[0]);
113     assertEquals(523, toRanges.get(0)[1]);
114   }
115
116   /**
117    * Test the method that extracts transcript (or subtype) features with a
118    * specified gene as parent
119    */
120   @Test(groups = "Functional")
121   public void testGetTranscriptFeatures()
122   {
123     SequenceI genomic = new SequenceDummy("chr7");
124     genomic.setStart(10000);
125     genomic.setEnd(50000);
126     String geneId = "ABC123";
127
128     // transcript feature
129     SequenceFeature sf1 = new SequenceFeature("transcript", "", 20000,
130             20500, 0f, null);
131     sf1.setValue("Parent", "gene:" + geneId);
132     genomic.addSequenceFeature(sf1);
133
134     // transcript sub-type feature
135     SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000,
136             20500, 0f, null);
137     sf2.setValue("Parent", "gene:" + geneId);
138     genomic.addSequenceFeature(sf2);
139
140     // NMD_transcript_variant treated like transcript in Ensembl
141     SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "",
142             20000, 20500, 0f, null);
143     sf3.setValue("Parent", "gene:" + geneId);
144     genomic.addSequenceFeature(sf3);
145
146     // transcript for a different gene - ignored
147     SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500,
148             0f, null);
149     sf4.setValue("Parent", "gene:XYZ");
150     genomic.addSequenceFeature(sf4);
151
152     EnsemblGene testee = new EnsemblGene();
153     List<SequenceFeature> features = testee.getTranscriptFeatures(geneId,
154             genomic);
155     assertEquals(3, features.size());
156     assertSame(sf1, features.get(0));
157     assertSame(sf2, features.get(1));
158     assertSame(sf3, features.get(2));
159   }
160
161   /**
162    * Test the method that retains features except for 'gene', or 'transcript'
163    * with parent other than the given id
164    */
165   @Test(groups = "Functional")
166   public void testRetainFeature()
167   {
168     String geneId = "ABC123";
169     EnsemblGene testee = new EnsemblGene();
170     SequenceFeature sf = new SequenceFeature("gene", "", 20000,
171             20500, 0f, null);
172     sf.setValue("ID", "gene:" + geneId);
173     assertFalse(testee.retainFeature(sf, geneId));
174
175     sf.setType("transcript");
176     sf.setValue("Parent", "gene:" + geneId);
177     assertTrue(testee.retainFeature(sf, geneId));
178
179     sf.setType("mature_transcript");
180     sf.setValue("Parent", "gene:" + geneId);
181     assertTrue(testee.retainFeature(sf, geneId));
182
183     sf.setType("NMD_transcript_variant");
184     sf.setValue("Parent", "gene:" + geneId);
185     assertTrue(testee.retainFeature(sf, geneId));
186
187     sf.setValue("Parent", "gene:XYZ");
188     assertFalse(testee.retainFeature(sf, geneId));
189
190     sf.setType("anything");
191     assertTrue(testee.retainFeature(sf, geneId));
192   }
193
194   /**
195    * Test the method that picks out 'gene' (or subtype) features with the
196    * accession id as ID
197    */
198   @Test(groups = "Functional")
199   public void testIdentifiesSequence()
200   {
201     String accId = "ABC123";
202     EnsemblGene testee = new EnsemblGene();
203   
204     // gene with no ID not valid
205     SequenceFeature sf = new SequenceFeature("gene", "", 1, 2, 0f, null);
206     assertFalse(testee.identifiesSequence(sf, accId));
207   
208     // gene with wrong ID not valid
209     sf.setValue("ID", "gene:XYZ");
210     assertFalse(testee.identifiesSequence(sf, accId));
211   
212     // gene with right ID is valid
213     sf.setValue("ID", "gene:" + accId);
214     assertTrue(testee.identifiesSequence(sf, accId));
215   
216     // gene sub-type with right ID is valid
217     sf.setType("snRNA_gene");
218     assertTrue(testee.identifiesSequence(sf, accId));
219   
220     // transcript not valid:
221     sf.setType("transcript");
222     assertFalse(testee.identifiesSequence(sf, accId));
223   
224     // exon not valid:
225     sf.setType("exon");
226     assertFalse(testee.identifiesSequence(sf, accId));
227   }
228 }