3 import static org.junit.Assert.assertEquals;
4 import static org.junit.Assert.assertSame;
5 import static org.junit.Assert.assertTrue;
8 import java.io.IOException;
9 import java.util.Arrays;
10 import java.util.Collections;
13 import org.junit.Test;
15 import jalview.api.AlignViewportI;
16 import jalview.datamodel.AlignedCodonFrame;
17 import jalview.datamodel.Alignment;
18 import jalview.datamodel.AlignmentI;
19 import jalview.datamodel.ColumnSelection;
20 import jalview.datamodel.SearchResults;
21 import jalview.datamodel.SearchResults.Match;
22 import jalview.datamodel.Sequence;
23 import jalview.datamodel.SequenceGroup;
24 import jalview.gui.AlignViewport;
25 import jalview.io.AppletFormatAdapter;
26 import jalview.io.FormatAdapter;
28 public class MappingUtilsTest
30 private AlignViewportI dnaView;
31 private AlignViewportI proteinView;
34 * Simple test of mapping with no intron involved.
37 public void testBuildSearchResults()
39 final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC");
40 seq1.createDatasetSequence();
42 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
43 aseq1.createDatasetSequence();
46 * Map dna bases 1-6 to protein residues 1-2
48 AlignedCodonFrame acf = new AlignedCodonFrame();
49 MapList map = new MapList(new int[]
52 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
53 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
56 * Check protein residue 1 maps to codon 1-3, 2 to codon 4-6
58 SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList);
59 assertEquals(1, sr.getResults().size());
60 Match m = sr.getResults().get(0);
61 assertEquals(seq1.getDatasetSequence(), m.getSequence());
62 assertEquals(1, m.getStart());
63 assertEquals(3, m.getEnd());
64 sr = MappingUtils.buildSearchResults(aseq1, 2, acfList);
65 assertEquals(1, sr.getResults().size());
66 m = sr.getResults().get(0);
67 assertEquals(seq1.getDatasetSequence(), m.getSequence());
68 assertEquals(4, m.getStart());
69 assertEquals(6, m.getEnd());
72 * Check inverse mappings, from codons 1-3, 4-6 to protein 1, 2
74 for (int i = 1; i < 7; i++)
76 sr = MappingUtils.buildSearchResults(seq1, i, acfList);
77 assertEquals(1, sr.getResults().size());
78 m = sr.getResults().get(0);
79 assertEquals(aseq1.getDatasetSequence(), m.getSequence());
80 int residue = i > 3 ? 2 : 1;
81 assertEquals(residue, m.getStart());
82 assertEquals(residue, m.getEnd());
87 * Simple test of mapping with introns involved.
90 public void testBuildSearchResults_withIntro()
92 final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT");
93 seq1.createDatasetSequence();
95 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
96 aseq1.createDatasetSequence();
99 * Map dna bases [2, 4, 5], [7, 9, 11] to protein residues 1 and 2
101 AlignedCodonFrame acf = new AlignedCodonFrame();
102 MapList map = new MapList(new int[]
103 { 2, 2, 4, 5, 7, 7, 9, 9, 11, 11 }, new int[]
105 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
106 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
109 * Check protein residue 1 maps to [2, 4, 5]
111 SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList);
112 assertEquals(2, sr.getResults().size());
113 Match m = sr.getResults().get(0);
114 assertEquals(seq1.getDatasetSequence(), m.getSequence());
115 assertEquals(2, m.getStart());
116 assertEquals(2, m.getEnd());
117 m = sr.getResults().get(1);
118 assertEquals(seq1.getDatasetSequence(), m.getSequence());
119 assertEquals(4, m.getStart());
120 assertEquals(5, m.getEnd());
123 * Check protein residue 2 maps to [7, 9, 11]
125 sr = MappingUtils.buildSearchResults(aseq1, 2, acfList);
126 assertEquals(3, sr.getResults().size());
127 m = sr.getResults().get(0);
128 assertEquals(seq1.getDatasetSequence(), m.getSequence());
129 assertEquals(7, m.getStart());
130 assertEquals(7, m.getEnd());
131 m = sr.getResults().get(1);
132 assertEquals(seq1.getDatasetSequence(), m.getSequence());
133 assertEquals(9, m.getStart());
134 assertEquals(9, m.getEnd());
135 m = sr.getResults().get(2);
136 assertEquals(seq1.getDatasetSequence(), m.getSequence());
137 assertEquals(11, m.getStart());
138 assertEquals(11, m.getEnd());
141 * Check inverse mappings, from codons to protein
143 for (int i = 1; i < 14; i++)
145 sr = MappingUtils.buildSearchResults(seq1, i, acfList);
146 int residue = (i == 2 || i == 4 || i == 5) ? 1 : (i == 7 || i == 9
150 assertEquals(0, sr.getResults().size());
153 assertEquals(1, sr.getResults().size());
154 m = sr.getResults().get(0);
155 assertEquals(aseq1.getDatasetSequence(), m.getSequence());
156 assertEquals(residue, m.getStart());
157 assertEquals(residue, m.getEnd());
162 * Test mapping a sequence group made of entire sequences.
164 * @throws IOException
167 public void testMapSequenceGroup_sequences() throws IOException
170 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
173 AlignmentI cdna = loadAlignment(">Seq1\nACG\n>Seq2\nTGA\n>Seq3\nTAC\n",
175 cdna.setDataset(null);
176 AlignmentI protein = loadAlignment(">Seq1\nK\n>Seq2\nL\n>Seq3\nQ\n",
178 protein.setDataset(null);
179 AlignedCodonFrame acf = new AlignedCodonFrame();
180 MapList map = new MapList(new int[]
183 for (int seq = 0; seq < 3; seq++)
185 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
186 .getSequenceAt(seq).getDatasetSequence(), map);
188 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
190 AlignViewportI dnaView = new AlignViewport(cdna);
191 AlignViewportI proteinView = new AlignViewport(protein);
192 protein.setCodonFrames(acfList);
195 * Select Seq1 and Seq3 in the protein (startRes=endRes=0)
197 SequenceGroup sg = new SequenceGroup();
198 sg.setColourText(true);
199 sg.setIdColour(Color.GREEN);
200 sg.setOutlineColour(Color.LIGHT_GRAY);
201 sg.addSequence(protein.getSequenceAt(0), false);
202 sg.addSequence(protein.getSequenceAt(2), false);
205 * Verify the mapped sequence group in dna
207 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
208 assertTrue(mappedGroup.getColourText());
209 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
210 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
211 assertEquals(2, mappedGroup.getSequences().size());
212 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
213 assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1));
214 assertEquals(0, mappedGroup.getStartRes());
215 assertEquals(2, mappedGroup.getEndRes());
218 * Verify mapping sequence group from dna to protein
221 sg.addSequence(cdna.getSequenceAt(1), false);
222 sg.addSequence(cdna.getSequenceAt(0), false);
225 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
226 assertTrue(mappedGroup.getColourText());
227 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
228 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
229 assertEquals(2, mappedGroup.getSequences().size());
230 assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0));
231 assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1));
232 assertEquals(0, mappedGroup.getStartRes());
233 assertEquals(0, mappedGroup.getEndRes());
237 * Helper method to load an alignment and ensure dataset sequences are set up.
243 * @throws IOException
245 protected AlignmentI loadAlignment(final String data, String format)
248 Alignment a = new FormatAdapter().readFile(data,
249 AppletFormatAdapter.PASTE, format);
255 * Test mapping a column selection in protein to its dna equivalent
257 * @throws IOException
260 public void testMapColumnSelection_proteinToDna() throws IOException
262 setupMappedAlignments();
264 ColumnSelection colsel = new ColumnSelection();
267 * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3
268 * in dna respectively, overall 0-4
270 colsel.addElement(0);
271 ColumnSelection cs = MappingUtils.mapColumnSelection(colsel,
272 proteinView, dnaView);
273 assertEquals("[0, 1, 2, 3, 4]", cs.getSelected().toString());
276 * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna
279 colsel.addElement(1);
280 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
281 assertEquals("[0, 1, 2, 3]", cs.getSelected().toString());
284 * Column 2 in protein picks up gaps only - no mapping
287 colsel.addElement(2);
288 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
289 assertEquals("[]", cs.getSelected().toString());
292 * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns
293 * 6-9, 6-10, 5-8 respectively, overall to 5-10
296 colsel.addElement(3);
297 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
298 assertEquals("[5, 6, 7, 8, 9, 10]", cs.getSelected().toString());
301 * Combine selection of columns 1 and 3 to get a discontiguous mapped
305 colsel.addElement(1);
306 colsel.addElement(3);
307 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
308 assertEquals("[0, 1, 2, 3, 5, 6, 7, 8, 9, 10]", cs.getSelected()
313 * @throws IOException
315 protected void setupMappedAlignments() throws IOException
318 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
319 * viewport). Lower case for introns.
321 AlignmentI cdna = loadAlignment(">Seq1\nAC-GctGtC-T\n"
322 + ">Seq2\nTc-GA-G-T-Tc\n" + ">Seq3\nTtTT-AaCGg-\n",
324 cdna.setDataset(null);
325 AlignmentI protein = loadAlignment(
326 ">Seq1\n-K-P\n>Seq2\nL--Q\n>Seq3\nG--S\n",
328 protein.setDataset(null);
329 AlignedCodonFrame acf = new AlignedCodonFrame();
330 MapList map = new MapList(new int[]
331 { 1, 3, 6, 6, 8, 9 }, new int[]
333 acf.addMap(cdna.getSequenceAt(0).getDatasetSequence(), protein
334 .getSequenceAt(0).getDatasetSequence(), map);
335 map = new MapList(new int[]
336 { 1, 1, 3, 4, 5, 7 }, new int[]
338 acf.addMap(cdna.getSequenceAt(1).getDatasetSequence(), protein
339 .getSequenceAt(1).getDatasetSequence(), map);
340 map = new MapList(new int[]
341 { 1, 1, 3, 4, 5, 5, 7, 8 }, new int[]
343 acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein
344 .getSequenceAt(2).getDatasetSequence(), map);
345 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
347 dnaView = new AlignViewport(cdna);
348 proteinView = new AlignViewport(protein);
349 protein.setCodonFrames(acfList);
353 * Test mapping a column selection in dna to its protein equivalent
355 * @throws IOException
358 public void testMapColumnSelection_dnaToProtein() throws IOException
360 setupMappedAlignments();
362 ColumnSelection colsel = new ColumnSelection();
365 * Column 0 in dna picks up first bases which map to residue 1, columns 0-1
368 colsel.addElement(0);
369 ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, dnaView,
371 assertEquals("[0, 1]", cs.getSelected().toString());
374 * Columns 3-5 in dna map to the first residues in protein Seq1, Seq2, and
375 * the first two in Seq3. Overall to columns 0, 1, 3 (col2 is all gaps).
377 colsel.addElement(3);
378 colsel.addElement(4);
379 colsel.addElement(5);
380 cs = MappingUtils.mapColumnSelection(colsel, dnaView, proteinView);
381 assertEquals("[0, 1, 3]", cs.getSelected().toString());
385 * Tests for the method that converts a series of [start, end] ranges to
389 public void testFlattenRanges()
391 assertEquals("[1, 2, 3, 4]",
392 Arrays.toString(MappingUtils.flattenRanges(new int[]
394 assertEquals("[1, 2, 3, 4]",
395 Arrays.toString(MappingUtils.flattenRanges(new int[]
397 assertEquals("[1, 2, 3, 4]",
398 Arrays.toString(MappingUtils.flattenRanges(new int[]
399 { 1, 1, 2, 2, 3, 3, 4, 4 })));
400 assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]",
401 Arrays.toString(MappingUtils.flattenRanges(new int[]
402 { 1, 4, 7, 9, 12, 12 })));
403 // unpaired start position is ignored:
404 assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]",
405 Arrays.toString(MappingUtils.flattenRanges(new int[]
406 { 1, 4, 7, 9, 12, 12, 15 })));
410 * Test mapping a sequence group made of entire columns.
412 * @throws IOException
415 public void testMapSequenceGroup_columns() throws IOException
418 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
421 AlignmentI cdna = loadAlignment(
422 ">Seq1\nACGGCA\n>Seq2\nTGACAG\n>Seq3\nTACGTA\n",
424 cdna.setDataset(null);
425 AlignmentI protein = loadAlignment(">Seq1\nKA\n>Seq2\nLQ\n>Seq3\nQV\n",
427 protein.setDataset(null);
428 AlignedCodonFrame acf = new AlignedCodonFrame();
429 MapList map = new MapList(new int[]
432 for (int seq = 0; seq < 3; seq++)
434 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
435 .getSequenceAt(seq).getDatasetSequence(), map);
437 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
439 AlignViewportI dnaView = new AlignViewport(cdna);
440 AlignViewportI proteinView = new AlignViewport(protein);
441 protein.setCodonFrames(acfList);
444 * Select all sequences, column 2 in the protein
446 SequenceGroup sg = new SequenceGroup();
447 sg.setColourText(true);
448 sg.setIdColour(Color.GREEN);
449 sg.setOutlineColour(Color.LIGHT_GRAY);
450 sg.addSequence(protein.getSequenceAt(0), false);
451 sg.addSequence(protein.getSequenceAt(1), false);
452 sg.addSequence(protein.getSequenceAt(2), false);
457 * Verify the mapped sequence group in dna
459 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
460 assertTrue(mappedGroup.getColourText());
461 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
462 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
463 assertEquals(3, mappedGroup.getSequences().size());
464 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
465 assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1));
466 assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(2));
467 assertEquals(3, mappedGroup.getStartRes());
468 assertEquals(5, mappedGroup.getEndRes());
471 * Verify mapping sequence group from dna to protein
474 sg.addSequence(cdna.getSequenceAt(0), false);
475 sg.addSequence(cdna.getSequenceAt(1), false);
476 sg.addSequence(cdna.getSequenceAt(2), false);
477 // select columns 2 and 3 in DNA which span protein columns 0 and 1
480 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
481 assertTrue(mappedGroup.getColourText());
482 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
483 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
484 assertEquals(3, mappedGroup.getSequences().size());
485 assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(0));
486 assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(1));
487 assertSame(protein.getSequenceAt(2), mappedGroup.getSequences().get(2));
488 assertEquals(0, mappedGroup.getStartRes());
489 assertEquals(1, mappedGroup.getEndRes());
493 * Test mapping a sequence group made of a sequences/columns region.
495 * @throws IOException
498 public void testMapSequenceGroup_region() throws IOException
501 * Set up gapped dna and protein Seq1/2/3 with mappings (held on the protein
504 AlignmentI cdna = loadAlignment(
505 ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n",
507 cdna.setDataset(null);
508 AlignmentI protein = loadAlignment(
509 ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", "FASTA");
510 protein.setDataset(null);
511 AlignedCodonFrame acf = new AlignedCodonFrame();
512 MapList map = new MapList(new int[]
515 for (int seq = 0; seq < 3; seq++)
517 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
518 .getSequenceAt(seq).getDatasetSequence(), map);
520 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
522 AlignViewportI dnaView = new AlignViewport(cdna);
523 AlignViewportI proteinView = new AlignViewport(protein);
524 protein.setCodonFrames(acfList);
527 * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped
528 * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection
529 * only includes a gap in Seq2 there is no mappable selection region in the
532 SequenceGroup sg = new SequenceGroup();
533 sg.setColourText(true);
534 sg.setIdColour(Color.GREEN);
535 sg.setOutlineColour(Color.LIGHT_GRAY);
536 sg.addSequence(protein.getSequenceAt(0), false);
537 sg.addSequence(protein.getSequenceAt(1), false);
542 * Verify the mapped sequence group in dna
544 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
545 assertTrue(mappedGroup.getColourText());
546 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
547 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
548 assertEquals(1, mappedGroup.getSequences().size());
549 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
550 // Seq2 in protein has a gap in column 1 - ignored
551 // Seq1 has K which should map to columns 0-3 in Seq1
552 assertEquals(0, mappedGroup.getStartRes());
553 assertEquals(3, mappedGroup.getEndRes());
556 * Now select cols 2-4 in protein. These cover Seq1:AS Seq2:LQ Seq3:VM which
557 * extend over DNA columns 3-12, 1-7, 6-13 respectively, or 1-13 overall.
561 mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
562 assertEquals(1, mappedGroup.getStartRes());
563 assertEquals(13, mappedGroup.getEndRes());
566 * Verify mapping sequence group from dna to protein
569 sg.addSequence(cdna.getSequenceAt(0), false);
571 // select columns 4,5 - includes Seq1:codon2 (A) only
574 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
575 assertEquals(2, mappedGroup.getStartRes());
576 assertEquals(2, mappedGroup.getEndRes());
578 // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ)
579 sg.addSequence(cdna.getSequenceAt(1), false);
580 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
581 assertEquals(2, mappedGroup.getStartRes());
582 assertEquals(4, mappedGroup.getEndRes());
584 // add Seq3 to dna selection cols 4-5 include codon 1 (Q)
585 sg.addSequence(cdna.getSequenceAt(2), false);
586 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
587 assertEquals(0, mappedGroup.getStartRes());
588 assertEquals(4, mappedGroup.getEndRes());