1 package jalview.io.gff;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertNull;
5 import static org.testng.AssertJUnit.assertSame;
6 import static org.testng.AssertJUnit.assertTrue;
7 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
9 import jalview.datamodel.AlignedCodonFrame;
10 import jalview.datamodel.Alignment;
11 import jalview.datamodel.AlignmentI;
12 import jalview.datamodel.Mapping;
13 import jalview.datamodel.MappingType;
14 import jalview.datamodel.Sequence;
15 import jalview.datamodel.SequenceDummy;
16 import jalview.datamodel.SequenceI;
17 import jalview.gui.AlignFrame;
18 import jalview.io.FileLoader;
19 import jalview.io.FormatAdapter;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Iterator;
24 import java.util.List;
27 import org.testng.annotations.Test;
29 public class ExonerateHelperTest
31 @Test(groups = "Functional")
32 public void testGetMappingType()
35 assertSame(MappingType.PeptideToNucleotide,
37 .getMappingType("exonerate:protein2genome:local"));
38 assertSame(MappingType.PeptideToNucleotide,
39 ExonerateHelper.getMappingType("exonerate:protein2dna:local"));
42 assertSame(MappingType.NucleotideToNucleotide,
43 ExonerateHelper.getMappingType("coding2coding"));
44 assertSame(MappingType.NucleotideToNucleotide,
45 ExonerateHelper.getMappingType("coding2genome"));
46 assertSame(MappingType.NucleotideToNucleotide,
47 ExonerateHelper.getMappingType("cdna2genome"));
48 assertSame(MappingType.NucleotideToNucleotide,
49 ExonerateHelper.getMappingType("genome2genome"));
50 assertNull(ExonerateHelper.getMappingType("affine:local"));
54 * Test processing one exonerate GFF line for the case where the mapping is
55 * protein2dna, similarity feature is on the query (the protein), match to the
56 * forward strand, target sequence is in neither the alignment nor the 'new
61 @Test(groups = "Functional")
62 public void testProcessGffSimilarity_protein2dna_forward_querygff()
65 ExonerateHelper testee = new ExonerateHelper();
66 List<SequenceI> newseqs = new ArrayList<SequenceI>();
67 String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t.\t+\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
69 SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
70 seq.createDatasetSequence();
71 AlignmentI align = new Alignment(new SequenceI[] {});
72 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
75 * this should create a mapping from Seq2/3-10 to virtual sequence
76 * dna1 (added to newseqs) positions 400-423
78 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
79 assertEquals(1, newseqs.size());
80 assertTrue(newseqs.get(0) instanceof SequenceDummy);
81 assertEquals("dna1", newseqs.get(0).getName());
82 assertEquals(1, align.getCodonFrames().size());
83 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
84 assertEquals(1, mapping.getAaSeqs().length);
85 assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
86 assertEquals(1, mapping.getdnaSeqs().length);
87 assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
88 assertEquals(1, mapping.getdnaToProt().length);
89 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
90 assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
91 .getFromRanges().get(0));
92 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
93 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
94 .getToRanges().get(0));
98 * Test processing one exonerate GFF line for the case where the mapping is
99 * protein2dna, similarity feature is on the query (the protein), match to the
102 * @throws IOException
104 @Test(groups = "Functional")
105 public void testProcessGffSimilarity_protein2dna_reverse_querygff()
108 ExonerateHelper testee = new ExonerateHelper();
109 List<SequenceI> newseqs = new ArrayList<SequenceI>();
110 String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t0\t-\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
112 SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
113 seq.createDatasetSequence();
114 AlignmentI align = new Alignment(new SequenceI[] {});
115 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
118 * this should create a mapping from Seq2/3-10 to virtual sequence
119 * dna1 (added to newseqs) positions 400-377 (reverse)
121 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
122 assertEquals(1, newseqs.size());
123 assertTrue(newseqs.get(0) instanceof SequenceDummy);
124 assertEquals("dna1", newseqs.get(0).getName());
125 assertEquals(1, align.getCodonFrames().size());
126 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
127 assertEquals(1, mapping.getAaSeqs().length);
128 assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
129 assertEquals(1, mapping.getdnaSeqs().length);
130 assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
131 assertEquals(1, mapping.getdnaToProt().length);
132 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
133 assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
134 .getFromRanges().get(0));
135 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
136 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
137 .getToRanges().get(0));
141 * Test processing one exonerate GFF line for the case where the mapping is
142 * protein2dna, similarity feature is on the target (the dna), match to the
145 * @throws IOException
147 @Test(groups = "Functional")
148 public void testProcessGffSimilarity_protein2dna_forward_targetgff()
151 ExonerateHelper testee = new ExonerateHelper();
152 List<SequenceI> newseqs = new ArrayList<SequenceI>();
153 String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t400\t423\t0\t+\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
155 SequenceI seq = new Sequence("dna1/391-430",
156 "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
157 seq.createDatasetSequence();
158 AlignmentI align = new Alignment(new SequenceI[] { seq });
159 // GFF feature on the target describes mapping from base 400 for
160 // count 24 to position 3
161 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
164 * this should create a mapping from virtual sequence dna1 (added to
165 * newseqs) positions 400-423 to Prot1/3-10
167 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
168 assertEquals(1, newseqs.size());
169 assertTrue(newseqs.get(0) instanceof SequenceDummy);
170 assertEquals("Prot1", newseqs.get(0).getName());
171 assertEquals(1, align.getCodonFrames().size());
172 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
173 assertEquals(1, mapping.getAaSeqs().length);
174 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
175 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
176 assertEquals(1, mapping.getdnaSeqs().length);
177 assertEquals(1, mapping.getdnaToProt().length);
178 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
179 assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
180 .getFromRanges().get(0));
181 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
182 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
183 .getToRanges().get(0));
187 * Test processing one exonerate GFF line for the case where the mapping is
188 * protein2dna, similarity feature is on the target (the dna), match to the
191 * @throws IOException
193 @Test(groups = "Functional")
194 public void testProcessGffSimilarity_protein2dna_reverse_targetgff()
197 ExonerateHelper testee = new ExonerateHelper();
198 List<SequenceI> newseqs = new ArrayList<SequenceI>();
199 String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t377\t400\t0\t-\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
201 SequenceI seq = new Sequence("dna1/371-410",
202 "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
203 seq.createDatasetSequence();
204 AlignmentI align = new Alignment(new SequenceI[] { seq });
205 // GFF feature on the target describes mapping from base 400 for
206 // count 24 to position 3
207 Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
210 * this should create a mapping from virtual sequence dna1 (added to
211 * newseqs) positions 400-377 (reverse) to Prot1/3-10
213 testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
214 assertEquals(1, newseqs.size());
215 assertTrue(newseqs.get(0) instanceof SequenceDummy);
216 assertEquals("Prot1", newseqs.get(0).getName());
217 assertEquals(1, align.getCodonFrames().size());
218 AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
219 assertEquals(1, mapping.getAaSeqs().length);
220 assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
221 assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
222 assertEquals(1, mapping.getdnaSeqs().length);
223 assertEquals(1, mapping.getdnaToProt().length);
224 assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
225 assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
226 .getFromRanges().get(0));
227 assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
228 assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
229 .getToRanges().get(0));
233 * Tests loading exonerate GFF2 output, including 'similarity' alignment
234 * feature, on to sequences
236 @Test(groups = { "Functional" })
237 public void testAddExonerateGffToAlignment()
239 FileLoader loader = new FileLoader(false);
240 AlignFrame af = loader.LoadFileWaitTillLoaded(
241 "examples/testdata/exonerateseqs.fa", FormatAdapter.FILE);
243 af.loadJalviewDataFile("examples/testdata/exonerateoutput.gff",
244 FormatAdapter.FILE, null, null);
247 * verify one mapping to a dummy sequence, one to a real one
249 List<AlignedCodonFrame> mappings = af.getViewport().getAlignment()
250 .getDataset().getCodonFrames();
251 assertEquals(2, mappings.size());
252 Iterator<AlignedCodonFrame> iter = mappings.iterator();
254 // first mapping is to dummy sequence
255 AlignedCodonFrame mapping = iter.next();
256 Mapping[] mapList = mapping.getProtMappings();
257 assertEquals(1, mapList.length);
258 assertTrue(mapList[0].getTo() instanceof SequenceDummy);
259 assertEquals("DDB_G0269124", mapList[0].getTo().getName());
261 // 143 in protein should map to codon [11270, 11269, 11268] in dna
262 int[] mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
263 assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
265 // second mapping is to a sequence in the alignment
266 mapping = iter.next();
267 mapList = mapping.getProtMappings();
268 assertEquals(1, mapList.length);
269 SequenceI proteinSeq = af.getViewport().getAlignment()
270 .findName("DDB_G0280897");
271 assertSame(proteinSeq.getDatasetSequence(), mapList[0].getTo());
272 assertEquals(1, mapping.getdnaToProt().length);
274 // 143 in protein should map to codon [11270, 11269, 11268] in dna
275 mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
276 assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
278 // 182 in protein should map to codon [11153, 11152, 11151] in dna
279 mappedRegion = mapList[0].getMap().locateInFrom(182, 182);
280 assertArrayEquals(new int[] { 11153, 11151 }, mappedRegion);
282 // and the reverse mapping:
283 mappedRegion = mapList[0].getMap().locateInTo(11151, 11153);
284 assertArrayEquals(new int[] { 182, 182 }, mappedRegion);
286 // 11150 in dna should _not_ map to protein
287 mappedRegion = mapList[0].getMap().locateInTo(11150, 11150);
288 assertNull(mappedRegion);
290 // similarly 183 in protein should _not_ map to dna
291 mappedRegion = mapList[0].getMap().locateInFrom(183, 183);
292 assertNull(mappedRegion);