1 package jalview.io.gff;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertNull;
5 import static org.testng.AssertJUnit.assertSame;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
9 import jalview.datamodel.AlignedCodonFrame;
10 import jalview.datamodel.Alignment;
11 import jalview.datamodel.AlignmentI;
12 import jalview.datamodel.Mapping;
13 import jalview.datamodel.MappingType;
14 import jalview.datamodel.Sequence;
15 import jalview.datamodel.SequenceDummy;
16 import jalview.datamodel.SequenceI;
17 import jalview.gui.AlignFrame;
18 import jalview.io.FileLoader;
19 import jalview.io.FormatAdapter;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Iterator;
24 import java.util.List;
27 import org.testng.annotations.Test;
29 public class ExonerateHelperTest
31 @Test(groups = "Functional")
32 public void testGetMappingType()
35 assertSame(MappingType.PeptideToNucleotide,
37 .getMappingType("exonerate:protein2genome:local"));
38 assertSame(MappingType.PeptideToNucleotide,
39 ExonerateHelper.getMappingType("exonerate:protein2dna:local"));
42 assertSame(MappingType.NucleotideToNucleotide,
43 ExonerateHelper.getMappingType("coding2coding"));
44 assertSame(MappingType.NucleotideToNucleotide,
45 ExonerateHelper.getMappingType("coding2genome"));
46 assertSame(MappingType.NucleotideToNucleotide,
47 ExonerateHelper.getMappingType("cdna2genome"));
48 assertSame(MappingType.NucleotideToNucleotide,
49 ExonerateHelper.getMappingType("genome2genome"));
50 assertNull(ExonerateHelper.getMappingType("affine:local"));
54 * Test processing one exonerate GFF line for the case where the mapping is
55 * protein2dna, similarity feature is on the query (the protein), match to the
56 * forward strand, target sequence is in neither the alignment nor the 'new
61 @Test(groups = "Functional")
62 public void testProcessGffSimilarity_protein2dna_forward_querygff()
65 ExonerateHelper testee = new ExonerateHelper();
66 List<SequenceI> newseqs = new ArrayList<SequenceI>();
67 String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t.\t+\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
69 SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
70 seq.createDatasetSequence();
71 AlignmentI align = new Alignment(new SequenceI[] {});
72 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
75 * this should create a mapping from Seq2/3-10 to virtual sequence
76 * dna1 (added to newseqs) positions 400-423
78 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
79 assertEquals(1, newseqs.size());
80 assertTrue(newseqs.get(0) instanceof SequenceDummy);
81 assertEquals("dna1", newseqs.get(0).getName());
82 assertEquals(1, align.getCodonFrames().size());
83 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
84 assertEquals(1, mapping.getAaSeqs().length);
85 assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
86 assertEquals(1, mapping.getdnaSeqs().length);
87 assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
88 assertEquals(1, mapping.getdnaToProt().length);
89 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
90 assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
91 .getFromRanges().get(0));
92 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
93 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
94 .getToRanges().get(0));
98 * Test processing one exonerate GFF line for the case where the mapping is
99 * protein2dna, similarity feature is on the query (the protein), match to the
102 * @throws IOException
104 @Test(groups = "Functional")
105 public void testProcessGffSimilarity_protein2dna_reverse_querygff()
108 ExonerateHelper testee = new ExonerateHelper();
109 List<SequenceI> newseqs = new ArrayList<SequenceI>();
110 String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t0\t-\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
112 SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
113 seq.createDatasetSequence();
114 AlignmentI align = new Alignment(new SequenceI[] {});
115 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
118 * this should create a mapping from Seq2/3-10 to virtual sequence
119 * dna1 (added to newseqs) positions 400-377 (reverse)
121 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
122 assertEquals(1, newseqs.size());
123 assertTrue(newseqs.get(0) instanceof SequenceDummy);
124 assertEquals("dna1", newseqs.get(0).getName());
125 assertEquals(1, align.getCodonFrames().size());
126 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
127 assertEquals(1, mapping.getAaSeqs().length);
128 assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
129 assertEquals(1, mapping.getdnaSeqs().length);
130 assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
131 assertEquals(1, mapping.getdnaToProt().length);
132 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
133 assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
134 .getFromRanges().get(0));
135 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
136 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
137 .getToRanges().get(0));
141 * Test processing one exonerate GFF line for the case where the mapping is
142 * protein2dna, similarity feature is on the target (the dna), match to the
145 * @throws IOException
147 @Test(groups = "Functional")
148 public void testProcessGffSimilarity_protein2dna_forward_targetgff()
151 ExonerateHelper testee = new ExonerateHelper();
152 List<SequenceI> newseqs = new ArrayList<SequenceI>();
153 String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t400\t423\t0\t+\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
155 SequenceI seq = new Sequence("dna1/391-430",
156 "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
157 seq.createDatasetSequence();
158 AlignmentI align = new Alignment(new SequenceI[] { seq });
159 // GFF feature on the target describes mapping from base 400 for
160 // count 24 to position 3
161 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
164 * this should create a mapping from virtual sequence dna1 (added to
165 * newseqs) positions 400-423 to Prot1/3-10
167 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
168 assertEquals(1, newseqs.size());
169 assertTrue(newseqs.get(0) instanceof SequenceDummy);
170 assertEquals("Prot1", newseqs.get(0).getName());
171 assertEquals(1, align.getCodonFrames().size());
172 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
173 assertEquals(1, mapping.getAaSeqs().length);
174 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
175 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
176 assertEquals(1, mapping.getdnaSeqs().length);
177 assertEquals(1, mapping.getdnaToProt().length);
178 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
179 assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
180 .getFromRanges().get(0));
181 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
182 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
183 .getToRanges().get(0));
187 * Test processing one exonerate GFF line for the case where the mapping is
188 * protein2dna, similarity feature is on the target (the dna), match to the
191 * @throws IOException
193 @Test(groups = "Functional")
194 public void testProcessGffSimilarity_protein2dna_reverse_targetgff()
197 ExonerateHelper testee = new ExonerateHelper();
198 List<SequenceI> newseqs = new ArrayList<SequenceI>();
199 String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t377\t400\t0\t-\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
201 SequenceI seq = new Sequence("dna1/371-410",
202 "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
203 seq.createDatasetSequence();
204 AlignmentI align = new Alignment(new SequenceI[] { seq });
205 // GFF feature on the target describes mapping from base 400 for
206 // count 24 to position 3
207 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
210 * this should create a mapping from virtual sequence dna1 (added to
211 * newseqs) positions 400-377 (reverse) to Prot1/3-10
213 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
214 assertEquals(1, newseqs.size());
215 assertTrue(newseqs.get(0) instanceof SequenceDummy);
216 assertEquals("Prot1", newseqs.get(0).getName());
217 assertEquals(1, align.getCodonFrames().size());
218 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
219 assertEquals(1, mapping.getAaSeqs().length);
220 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
221 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
222 assertEquals(1, mapping.getdnaSeqs().length);
223 assertEquals(1, mapping.getdnaToProt().length);
224 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
225 assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
226 .getFromRanges().get(0));
227 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
228 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
229 .getToRanges().get(0));
233 * Tests loading exonerate GFF2 output, including 'similarity' alignment
234 * feature, on to sequences
236 @Test(groups = { "Functional" })
237 public void testAddExonerateGffToAlignment()
239 FileLoader loader = new FileLoader(false);
240 AlignFrame af = loader.LoadFileWaitTillLoaded(
241 "examples/testdata/exonerateseqs.fa",
244 af.loadJalviewDataFile("examples/testdata/exonerateoutput.gff",
245 FormatAdapter.FILE, null, null);
248 * verify one mapping to a dummy sequence, one to a real one
250 List<AlignedCodonFrame> mappings = af
251 .getViewport().getAlignment().getDataset().getCodonFrames();
252 assertEquals(2, mappings.size());
253 Iterator<AlignedCodonFrame> iter = mappings.iterator();
255 // first mapping is to dummy sequence
256 AlignedCodonFrame mapping = iter.next();
257 Mapping[] mapList = mapping.getProtMappings();
258 assertEquals(1, mapList.length);
259 assertTrue(mapList[0].getTo() instanceof SequenceDummy);
260 assertEquals("DDB_G0269124", mapList[0].getTo().getName());
262 // 143 in protein should map to codon [11270, 11269, 11268] in dna
263 int[] mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
264 assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
266 // second mapping is to a sequence in the alignment
267 mapping = iter.next();
268 mapList = mapping.getProtMappings();
269 assertEquals(1, mapList.length);
270 SequenceI proteinSeq = af.getViewport().getAlignment()
271 .findName("DDB_G0280897");
272 assertSame(proteinSeq.getDatasetSequence(), mapList[0].getTo());
273 assertEquals(1, mapping.getdnaToProt().length);
275 // 143 in protein should map to codon [11270, 11269, 11268] in dna
276 mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
277 assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
279 // 182 in protein should map to codon [11153, 11152, 11151] in dna
280 mappedRegion = mapList[0].getMap().locateInFrom(182, 182);
281 assertArrayEquals(new int[] { 11153, 11151 }, mappedRegion);
283 // and the reverse mapping:
284 mappedRegion = mapList[0].getMap().locateInTo(11151, 11153);
285 assertArrayEquals(new int[] { 182, 182 }, mappedRegion);
287 // 11150 in dna should _not_ map to protein
288 mappedRegion = mapList[0].getMap().locateInTo(11150, 11150);
289 assertNull(mappedRegion);
291 // similarly 183 in protein should _not_ map to dna
292 mappedRegion = mapList[0].getMap().locateInFrom(183, 183);
293 assertNull(mappedRegion);