2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.io.gff;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertNull;
25 import static org.testng.AssertJUnit.assertSame;
26 import static org.testng.AssertJUnit.assertTrue;
27 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
29 import jalview.datamodel.AlignedCodonFrame;
30 import jalview.datamodel.Alignment;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.Mapping;
33 import jalview.datamodel.MappingType;
34 import jalview.datamodel.Sequence;
35 import jalview.datamodel.SequenceDummy;
36 import jalview.datamodel.SequenceI;
37 import jalview.gui.AlignFrame;
38 import jalview.io.FileLoader;
39 import jalview.io.FormatAdapter;
41 import java.io.IOException;
42 import java.util.ArrayList;
43 import java.util.Iterator;
44 import java.util.List;
47 import org.testng.annotations.Test;
49 public class ExonerateHelperTest
51 @Test(groups = "Functional")
52 public void testGetMappingType()
55 assertSame(MappingType.PeptideToNucleotide,
57 .getMappingType("exonerate:protein2genome:local"));
58 assertSame(MappingType.PeptideToNucleotide,
59 ExonerateHelper.getMappingType("exonerate:protein2dna:local"));
62 assertSame(MappingType.NucleotideToNucleotide,
63 ExonerateHelper.getMappingType("coding2coding"));
64 assertSame(MappingType.NucleotideToNucleotide,
65 ExonerateHelper.getMappingType("coding2genome"));
66 assertSame(MappingType.NucleotideToNucleotide,
67 ExonerateHelper.getMappingType("cdna2genome"));
68 assertSame(MappingType.NucleotideToNucleotide,
69 ExonerateHelper.getMappingType("genome2genome"));
70 assertNull(ExonerateHelper.getMappingType("affine:local"));
74 * Test processing one exonerate GFF line for the case where the mapping is
75 * protein2dna, similarity feature is on the query (the protein), match to the
76 * forward strand, target sequence is in neither the alignment nor the 'new
81 @Test(groups = "Functional")
82 public void testProcessGffSimilarity_protein2dna_forward_querygff()
85 ExonerateHelper testee = new ExonerateHelper();
86 List<SequenceI> newseqs = new ArrayList<SequenceI>();
87 String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t.\t+\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
89 SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
90 seq.createDatasetSequence();
91 AlignmentI align = new Alignment(new SequenceI[] {});
92 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
95 * this should create a mapping from Seq2/3-10 to virtual sequence
96 * dna1 (added to newseqs) positions 400-423
98 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
99 assertEquals(1, newseqs.size());
100 assertTrue(newseqs.get(0) instanceof SequenceDummy);
101 assertEquals("dna1", newseqs.get(0).getName());
102 assertEquals(1, align.getCodonFrames().size());
103 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
104 assertEquals(1, mapping.getAaSeqs().length);
105 assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
106 assertEquals(1, mapping.getdnaSeqs().length);
107 assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
108 assertEquals(1, mapping.getdnaToProt().length);
109 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
110 assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
111 .getFromRanges().get(0));
112 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
113 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
114 .getToRanges().get(0));
118 * Test processing one exonerate GFF line for the case where the mapping is
119 * protein2dna, similarity feature is on the query (the protein), match to the
122 * @throws IOException
124 @Test(groups = "Functional")
125 public void testProcessGffSimilarity_protein2dna_reverse_querygff()
128 ExonerateHelper testee = new ExonerateHelper();
129 List<SequenceI> newseqs = new ArrayList<SequenceI>();
130 String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t0\t-\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
132 SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
133 seq.createDatasetSequence();
134 AlignmentI align = new Alignment(new SequenceI[] {});
135 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
138 * this should create a mapping from Seq2/3-10 to virtual sequence
139 * dna1 (added to newseqs) positions 400-377 (reverse)
141 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
142 assertEquals(1, newseqs.size());
143 assertTrue(newseqs.get(0) instanceof SequenceDummy);
144 assertEquals("dna1", newseqs.get(0).getName());
145 assertEquals(1, align.getCodonFrames().size());
146 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
147 assertEquals(1, mapping.getAaSeqs().length);
148 assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
149 assertEquals(1, mapping.getdnaSeqs().length);
150 assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
151 assertEquals(1, mapping.getdnaToProt().length);
152 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
153 assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
154 .getFromRanges().get(0));
155 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
156 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
157 .getToRanges().get(0));
161 * Test processing one exonerate GFF line for the case where the mapping is
162 * protein2dna, similarity feature is on the target (the dna), match to the
165 * @throws IOException
167 @Test(groups = "Functional")
168 public void testProcessGffSimilarity_protein2dna_forward_targetgff()
171 ExonerateHelper testee = new ExonerateHelper();
172 List<SequenceI> newseqs = new ArrayList<SequenceI>();
173 String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t400\t423\t0\t+\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
175 SequenceI seq = new Sequence("dna1/391-430",
176 "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
177 seq.createDatasetSequence();
178 AlignmentI align = new Alignment(new SequenceI[] { seq });
179 // GFF feature on the target describes mapping from base 400 for
180 // count 24 to position 3
181 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
184 * this should create a mapping from virtual sequence dna1 (added to
185 * newseqs) positions 400-423 to Prot1/3-10
187 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
188 assertEquals(1, newseqs.size());
189 assertTrue(newseqs.get(0) instanceof SequenceDummy);
190 assertEquals("Prot1", newseqs.get(0).getName());
191 assertEquals(1, align.getCodonFrames().size());
192 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
193 assertEquals(1, mapping.getAaSeqs().length);
194 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
195 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
196 assertEquals(1, mapping.getdnaSeqs().length);
197 assertEquals(1, mapping.getdnaToProt().length);
198 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
199 assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
200 .getFromRanges().get(0));
201 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
202 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
203 .getToRanges().get(0));
207 * Test processing one exonerate GFF line for the case where the mapping is
208 * protein2dna, similarity feature is on the target (the dna), match to the
211 * @throws IOException
213 @Test(groups = "Functional")
214 public void testProcessGffSimilarity_protein2dna_reverse_targetgff()
217 ExonerateHelper testee = new ExonerateHelper();
218 List<SequenceI> newseqs = new ArrayList<SequenceI>();
219 String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t377\t400\t0\t-\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
221 SequenceI seq = new Sequence("dna1/371-410",
222 "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
223 seq.createDatasetSequence();
224 AlignmentI align = new Alignment(new SequenceI[] { seq });
225 // GFF feature on the target describes mapping from base 400 for
226 // count 24 to position 3
227 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
230 * this should create a mapping from virtual sequence dna1 (added to
231 * newseqs) positions 400-377 (reverse) to Prot1/3-10
233 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
234 assertEquals(1, newseqs.size());
235 assertTrue(newseqs.get(0) instanceof SequenceDummy);
236 assertEquals("Prot1", newseqs.get(0).getName());
237 assertEquals(1, align.getCodonFrames().size());
238 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
239 assertEquals(1, mapping.getAaSeqs().length);
240 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
241 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
242 assertEquals(1, mapping.getdnaSeqs().length);
243 assertEquals(1, mapping.getdnaToProt().length);
244 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
245 assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
246 .getFromRanges().get(0));
247 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
248 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
249 .getToRanges().get(0));
253 * Tests loading exonerate GFF2 output, including 'similarity' alignment
254 * feature, on to sequences
256 @Test(groups = { "Functional" })
257 public void testAddExonerateGffToAlignment()
259 FileLoader loader = new FileLoader(false);
260 AlignFrame af = loader.LoadFileWaitTillLoaded(
261 "examples/testdata/exonerateseqs.fa", FormatAdapter.FILE);
263 af.loadJalviewDataFile("examples/testdata/exonerateoutput.gff",
264 FormatAdapter.FILE, null, null);
267 * verify one mapping to a dummy sequence, one to a real one
269 List<AlignedCodonFrame> mappings = af.getViewport().getAlignment()
270 .getDataset().getCodonFrames();
271 assertEquals(2, mappings.size());
272 Iterator<AlignedCodonFrame> iter = mappings.iterator();
274 // first mapping is to dummy sequence
275 AlignedCodonFrame mapping = iter.next();
276 Mapping[] mapList = mapping.getProtMappings();
277 assertEquals(1, mapList.length);
278 assertTrue(mapList[0].getTo() instanceof SequenceDummy);
279 assertEquals("DDB_G0269124", mapList[0].getTo().getName());
281 // 143 in protein should map to codon [11270, 11269, 11268] in dna
282 int[] mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
283 assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
285 // second mapping is to a sequence in the alignment
286 mapping = iter.next();
287 mapList = mapping.getProtMappings();
288 assertEquals(1, mapList.length);
289 SequenceI proteinSeq = af.getViewport().getAlignment()
290 .findName("DDB_G0280897");
291 assertSame(proteinSeq.getDatasetSequence(), mapList[0].getTo());
292 assertEquals(1, mapping.getdnaToProt().length);
294 // 143 in protein should map to codon [11270, 11269, 11268] in dna
295 mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
296 assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
298 // 182 in protein should map to codon [11153, 11152, 11151] in dna
299 mappedRegion = mapList[0].getMap().locateInFrom(182, 182);
300 assertArrayEquals(new int[] { 11153, 11151 }, mappedRegion);
302 // and the reverse mapping:
303 mappedRegion = mapList[0].getMap().locateInTo(11151, 11153);
304 assertArrayEquals(new int[] { 182, 182 }, mappedRegion);
306 // 11150 in dna should _not_ map to protein
307 mappedRegion = mapList[0].getMap().locateInTo(11150, 11150);
308 assertNull(mappedRegion);
310 // similarly 183 in protein should _not_ map to dna
311 mappedRegion = mapList[0].getMap().locateInFrom(183, 183);
312 assertNull(mappedRegion);