2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.io.gff;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
25 import static org.testng.AssertJUnit.assertSame;
26 import static org.testng.AssertJUnit.assertTrue;
27 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
29 import jalview.datamodel.AlignedCodonFrame;
30 import jalview.datamodel.Alignment;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceDummy;
34 import jalview.datamodel.SequenceFeature;
35 import jalview.datamodel.SequenceI;
36 import jalview.gui.JvOptionPane;
38 import java.io.IOException;
39 import java.util.ArrayList;
40 import java.util.HashMap;
41 import java.util.List;
44 import org.testng.annotations.BeforeClass;
45 import org.testng.annotations.Test;
47 public class Gff3HelperTest
50 @BeforeClass(alwaysRun = true)
51 public void setUpJvOptionPane()
53 JvOptionPane.setInteractiveMode(false);
54 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
58 * Test processing one PASA GFF line giving a match from forward strand to
63 @Test(groups = "Functional")
64 public void testProcessCdnaMatch_forwardToForward() throws IOException
66 GffHelperBase testee = new Gff3Helper();
67 List<SequenceI> newseqs = new ArrayList<SequenceI>();
68 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
70 SequenceI seq = new Sequence("gi|68711",
71 "GAATTCGTTCATGTAGGTTGATTTTTATT");
72 seq.createDatasetSequence();
73 AlignmentI align = new Alignment(new SequenceI[] {});
76 * this should create a mapping from gi|68711/12923-13060
77 * to virtual sequence gi|N37351 (added to newseqs) positions 1-138
79 testee.processGff(seq, gff, align, newseqs, false);
80 assertEquals(1, newseqs.size());
81 assertTrue(newseqs.get(0) instanceof SequenceDummy);
82 assertEquals("gi|N37351", newseqs.get(0).getName());
83 assertEquals(1, align.getCodonFrames().size());
84 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
87 * 'dnaseqs' (map from) is here [gi|68711]
88 * 'aaseqs' (map to) is here [gi|N37351]
90 // TODO use more suitable naming in AlignedCodonFrame
91 assertEquals(1, mapping.getAaSeqs().length);
92 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
93 assertEquals(1, mapping.getdnaSeqs().length);
94 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
95 assertEquals(1, mapping.getdnaToProt().length);
96 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
97 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
98 .getFromRanges().get(0));
99 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
100 assertArrayEquals(new int[] { 1, 138 }, mapping.getdnaToProt()[0]
101 .getToRanges().get(0));
105 * Test processing one PASA GFF line giving a match from forward strand to
108 * @throws IOException
110 @Test(groups = "Functional")
111 public void testProcessCdnaMatch_forwardToReverse() throws IOException
113 GffHelperBase testee = new Gff3Helper();
114 List<SequenceI> newseqs = new ArrayList<SequenceI>();
115 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 -"
117 SequenceI seq = new Sequence("gi|68711",
118 "GAATTCGTTCATGTAGGTTGATTTTTATT");
119 seq.createDatasetSequence();
120 AlignmentI align = new Alignment(new SequenceI[] {});
123 * this should create a mapping from gi|68711/12923-13060
124 * to virtual sequence gi|N37351 (added to newseqs) positions 138-1
126 testee.processGff(seq, gff, align, newseqs, false);
127 assertEquals(1, newseqs.size());
128 assertTrue(newseqs.get(0) instanceof SequenceDummy);
129 assertEquals("gi|N37351", newseqs.get(0).getName());
130 assertEquals(1, align.getCodonFrames().size());
131 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
134 * 'dnaseqs' (map from) is here [gi|68711]
135 * 'aaseqs' (map to) is here [gi|N37351]
137 // TODO use more suitable naming in AlignedCodonFrame
138 assertEquals(1, mapping.getAaSeqs().length);
139 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
140 assertEquals(1, mapping.getdnaSeqs().length);
141 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
142 assertEquals(1, mapping.getdnaToProt().length);
143 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
144 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
145 .getFromRanges().get(0));
146 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
147 assertArrayEquals(new int[] { 138, 1 }, mapping.getdnaToProt()[0]
148 .getToRanges().get(0));
152 * Test processing one PASA GFF line giving a match from reverse complement
153 * strand to forward strand
155 * @throws IOException
157 @Test(groups = "Functional")
158 public void testProcessCdnaMatch_reverseToForward() throws IOException
160 GffHelperBase testee = new Gff3Helper();
161 List<SequenceI> newseqs = new ArrayList<SequenceI>();
162 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t-\t.\tID=align_68;Target=gi|N37351 1 138 +"
164 SequenceI seq = new Sequence("gi|68711",
165 "GAATTCGTTCATGTAGGTTGATTTTTATT");
166 seq.createDatasetSequence();
167 AlignmentI align = new Alignment(new SequenceI[] {});
170 * (For now) we don't process reverse complement mappings; to do this
171 * would require (a) creating a virtual sequence placeholder for the
172 * reverse complement (b) resolving the sequence by its id from some
173 * source (GFF ##FASTA or other) (c) creating the reverse complement
174 * sequence (d) updating the mapping to be to the reverse complement
176 SequenceFeature sf = testee.processGff(seq, gff, align, newseqs, false);
178 assertTrue(newseqs.isEmpty());
182 * Test processing two PASA GFF lines representing a spliced mapping
184 * @throws IOException
186 @Test(groups = "Functional")
187 public void testProcessCdnaMatch_spliced() throws IOException
189 GffHelperBase testee = new Gff3Helper();
190 List<SequenceI> newseqs = new ArrayList<SequenceI>();
191 SequenceI seq = new Sequence("gi|68711",
192 "GAATTCGTTCATGTAGGTTGATTTTTATT");
193 seq.createDatasetSequence();
194 AlignmentI align = new Alignment(new SequenceI[] {});
196 // mapping from gi|68711 12923-13060 to gi|N37351 1-138
197 String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
199 testee.processGff(seq, gff, align, newseqs, false);
200 // mapping from gi|68711 13411-13550 to gi|N37351 139-278
201 gff = "gi|68711\tblat-pasa\tcDNA_match\t13411\t13550\t98.55\t+\t.\tID=align_68;Target=gi|N37351 139 278 +"
203 testee.processGff(seq, gff, align, newseqs, false);
205 assertEquals(1, newseqs.size());
206 assertTrue(newseqs.get(0) instanceof SequenceDummy);
207 assertEquals("gi|N37351", newseqs.get(0).getName());
209 // only 1 AlignedCodonFrame added to the alignment with both mappings!
210 // (this is important for 'align cdna to genome' to work correctly)
211 assertEquals(1, align.getCodonFrames().size());
212 AlignedCodonFrame mapping = align.getCodonFrames().get(0);
215 * 'dnaseqs' (map from) is here [gi|68711]
216 * 'aaseqs' (map to) is here [gi|N37351]
218 // TODO use more suitable naming in AlignedCodonFrame
219 assertEquals(1, mapping.getAaSeqs().length);
220 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
221 assertEquals(1, mapping.getdnaSeqs().length);
222 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
223 assertEquals(1, mapping.getdnaToProt().length);
224 assertEquals(2, mapping.getdnaToProt()[0].getFromRanges().size());
225 // the two spliced dna ranges are combined in one MapList
226 assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
227 .getFromRanges().get(0));
228 assertArrayEquals(new int[] { 13411, 13550 }, mapping.getdnaToProt()[0]
229 .getFromRanges().get(1));
230 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
231 // the two cdna ranges are merged into one contiguous region
232 assertArrayEquals(new int[] { 1, 278 }, mapping.getdnaToProt()[0]
233 .getToRanges().get(0));
236 @Test(groups = "Functional")
237 public void testGetDescription()
239 Gff3Helper testee = new Gff3Helper();
240 SequenceFeature sf = new SequenceFeature("type", "desc", 10, 20, 3f,
242 Map<String, List<String>> attributes = new HashMap<String, List<String>>();
243 assertNull(testee.getDescription(sf, attributes));
245 // ID if any is a fall-back for description
246 sf.setValue("ID", "Patrick");
247 assertEquals("Patrick", testee.getDescription(sf, attributes));
249 // Target is set by Exonerate
250 sf.setValue("Target", "Destination Moon");
251 assertEquals("Destination", testee.getDescription(sf, attributes));
253 // Ensembl variant feature - extract "alleles" value
254 // may be sequence_variant or a sub-type in the sequence ontology
255 sf = new SequenceFeature("feature_variant", "desc", 10, 20, 3f, "group");
256 List<String> atts = new ArrayList<String>();
260 attributes.put("alleles", atts);
261 assertEquals("A,C,T", testee.getDescription(sf, attributes));
263 // Ensembl transcript or exon feature - extract Name
264 List<String> atts2 = new ArrayList<String>();
265 atts2.add("ENSE00001871077");
266 attributes.put("Name", atts2);
267 sf = new SequenceFeature("transcript", "desc", 10, 20, 3f, "group");
268 assertEquals("ENSE00001871077", testee.getDescription(sf, attributes));
269 // transcript sub-type in SO
270 sf = new SequenceFeature("mRNA", "desc", 10, 20, 3f, "group");
271 assertEquals("ENSE00001871077", testee.getDescription(sf, attributes));
272 // special usage of feature by Ensembl
273 sf = new SequenceFeature("NMD_transcript_variant", "desc", 10, 20, 3f,
275 assertEquals("ENSE00001871077", testee.getDescription(sf, attributes));
277 sf = new SequenceFeature("exon", "desc", 10, 20, 3f, "group");
278 assertEquals("ENSE00001871077", testee.getDescription(sf, attributes));