JAL-3076 refactor for more efficient scan of 'gene' features
[jalview.git] / test / jalview / ext / ensembl / EnsemblGenomeTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ext.ensembl;
22
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertTrue;
26
27 import jalview.datamodel.Sequence;
28 import jalview.datamodel.SequenceDummy;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.gui.JvOptionPane;
32 import jalview.io.gff.SequenceOntologyFactory;
33 import jalview.io.gff.SequenceOntologyLite;
34 import jalview.util.MapList;
35
36 import java.util.List;
37
38 import org.testng.annotations.AfterClass;
39 import org.testng.annotations.BeforeClass;
40 import org.testng.annotations.Test;
41
42 public class EnsemblGenomeTest
43 {
44
45   @BeforeClass(alwaysRun = true)
46   public void setUpJvOptionPane()
47   {
48     JvOptionPane.setInteractiveMode(false);
49     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
50   }
51
52   @BeforeClass(alwaysRun = true)
53   public void setUp()
54   {
55     SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
56   }
57
58   @AfterClass(alwaysRun = true)
59   public void tearDown()
60   {
61     SequenceOntologyFactory.setInstance(null);
62   }
63
64   /**
65    * Test that the genomic sequence part of genomic sequence is correctly
66    * identified by 'transcript' features (or subtypes) with the correct gene ID
67    */
68   @Test(groups = "Functional")
69   public void testGetGenomicRangesFromFeatures()
70   {
71     EnsemblGenome testee = new EnsemblGenome();
72     SequenceI genomic = new SequenceDummy("chr7");
73     genomic.setStart(10000);
74     genomic.setEnd(50000);
75     String transcriptId = "ABC123";
76
77     // transcript at (start+10000) length 501
78     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
79             20500, 0f, null);
80     sf.setValue("ID", "transcript:" + transcriptId);
81     sf.setStrand("+");
82     genomic.addSequenceFeature(sf);
83
84     // transcript (sub-type) at (start + 10500) length 101
85     sf = new SequenceFeature("ncRNA", "", 10500, 10600, 0f, null);
86     sf.setValue("ID", "transcript:" + transcriptId);
87     sf.setStrand("+");
88     genomic.addSequenceFeature(sf);
89
90     // Ensembl treats NMD_transcript_variant as if transcript
91     // although strictly it is a sequence_variant in SO
92     sf = new SequenceFeature("NMD_transcript_variant", "", 11000, 12000,
93             0f, null);
94     sf.setValue("ID", "transcript:" + transcriptId);
95     sf.setStrand("+");
96     genomic.addSequenceFeature(sf);
97
98     // transcript with a different ID doesn't count
99     sf = new SequenceFeature("transcript", "", 11500, 12600, 0f, null);
100     sf.setValue("ID", "transcript:anotherOne");
101     genomic.addSequenceFeature(sf);
102
103     // parent of transcript feature doesn't count
104     sf = new SequenceFeature("gene_member_region", "", 10000, 50000, 0f,
105             null);
106     genomic.addSequenceFeature(sf);
107
108     MapList ranges = testee.getGenomicRangesFromFeatures(genomic,
109             transcriptId, 23);
110     List<int[]> fromRanges = ranges.getFromRanges();
111     assertEquals(3, fromRanges.size());
112     // from ranges should be sorted by start order
113     assertEquals(10500, fromRanges.get(0)[0]);
114     assertEquals(10600, fromRanges.get(0)[1]);
115     assertEquals(11000, fromRanges.get(1)[0]);
116     assertEquals(12000, fromRanges.get(1)[1]);
117     assertEquals(20000, fromRanges.get(2)[0]);
118     assertEquals(20500, fromRanges.get(2)[1]);
119     // to range should start from given start numbering
120     List<int[]> toRanges = ranges.getToRanges();
121     assertEquals(1, toRanges.size());
122     assertEquals(23, toRanges.get(0)[0]);
123     assertEquals(1625, toRanges.get(0)[1]);
124   }
125
126   /**
127    * Test the method that retains features except for 'transcript' (or
128    * sub-type), or those with parent other than the given id
129    */
130   @Test(groups = "Functional")
131   public void testRetainFeature()
132   {
133     String accId = "ABC123";
134     EnsemblGenome testee = new EnsemblGenome();
135
136     SequenceFeature sf = new SequenceFeature("transcript", "", 20000,
137             20500, 0f, null);
138     assertFalse(testee.retainFeature(sf, accId));
139
140     sf = new SequenceFeature("mature_transcript", "", 20000, 20500, 0f,
141             null);
142     assertFalse(testee.retainFeature(sf, accId));
143
144     sf = new SequenceFeature("NMD_transcript_variant", "", 20000, 20500,
145             0f, null);
146     assertFalse(testee.retainFeature(sf, accId));
147
148     // other feature with no parent is kept
149     sf = new SequenceFeature("anything", "", 20000, 20500, 0f, null);
150     assertTrue(testee.retainFeature(sf, accId));
151
152     // other feature with correct parent is kept
153     sf.setValue("Parent", "transcript:" + accId);
154     assertTrue(testee.retainFeature(sf, accId));
155
156     // other feature with wrong parent is not kept
157     sf.setValue("Parent", "transcript:XYZ");
158     assertFalse(testee.retainFeature(sf, accId));
159   }
160
161   /**
162    * Test the method that picks out 'transcript' (or subtype) features with the
163    * accession id as ID
164    */
165   @Test(groups = "Functional")
166   public void testGetIdentifyingFeatures()
167   {
168     String accId = "ABC123";
169     SequenceI seq = new Sequence(accId, "HEARTS");
170
171     // transcript with no ID not valid
172     SequenceFeature sf1 = new SequenceFeature("transcript", "", 1, 2, 0f,
173             null);
174     seq.addSequenceFeature(sf1);
175
176     // transcript with wrong ID not valid
177     SequenceFeature sf2 = new SequenceFeature("transcript", "", 1, 2, 0f,
178             null);
179     sf2.setValue("ID", "transcript");
180     seq.addSequenceFeature(sf2);
181
182     // transcript with right ID is valid
183     SequenceFeature sf3 = new SequenceFeature("transcript", "", 1, 2, 0f,
184             null);
185     sf3.setValue("ID", "transcript:" + accId);
186     seq.addSequenceFeature(sf3);
187
188     // transcript sub-type with right ID is valid
189     SequenceFeature sf4 = new SequenceFeature("ncRNA", "", 1, 2, 0f, null);
190     sf4.setValue("ID", "transcript:" + accId);
191     seq.addSequenceFeature(sf4);
192
193     // Ensembl treats NMD_transcript_variant as if a transcript
194     SequenceFeature sf5 = new SequenceFeature("NMD_transcript_variant", "",
195             1, 2, 0f, null);
196     sf5.setValue("ID", "transcript:" + accId);
197     seq.addSequenceFeature(sf5);
198
199     // gene not valid:
200     SequenceFeature sf6 = new SequenceFeature("gene", "", 1, 2, 0f, null);
201     sf6.setValue("ID", "transcript:" + accId);
202     seq.addSequenceFeature(sf6);
203
204     // exon not valid:
205     SequenceFeature sf7 = new SequenceFeature("exon", "", 1, 2, 0f, null);
206     sf7.setValue("ID", "transcript:" + accId);
207     seq.addSequenceFeature(sf7);
208
209     List<SequenceFeature> sfs = new EnsemblGenome()
210             .getIdentifyingFeatures(seq, accId);
211     assertFalse(sfs.contains(sf1));
212     assertFalse(sfs.contains(sf2));
213     assertTrue(sfs.contains(sf3));
214     assertTrue(sfs.contains(sf4));
215     assertTrue(sfs.contains(sf5));
216     assertFalse(sfs.contains(sf6));
217     assertFalse(sfs.contains(sf7));
218   }
219
220 }