3 import static org.junit.Assert.assertEquals;
4 import static org.junit.Assert.assertSame;
5 import static org.junit.Assert.assertTrue;
6 import jalview.api.AlignViewportI;
7 import jalview.datamodel.AlignedCodonFrame;
8 import jalview.datamodel.Alignment;
9 import jalview.datamodel.AlignmentI;
10 import jalview.datamodel.ColumnSelection;
11 import jalview.datamodel.SearchResults;
12 import jalview.datamodel.SearchResults.Match;
13 import jalview.datamodel.Sequence;
14 import jalview.datamodel.SequenceGroup;
15 import jalview.datamodel.SequenceI;
16 import jalview.gui.AlignViewport;
17 import jalview.io.AppletFormatAdapter;
18 import jalview.io.FormatAdapter;
19 import java.awt.Color;
20 import java.io.IOException;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.HashSet;
24 import java.util.List;
27 import org.junit.Test;
30 public class MappingUtilsTest
32 private AlignViewportI dnaView;
33 private AlignViewportI proteinView;
36 * Simple test of mapping with no intron involved.
39 public void testBuildSearchResults()
41 final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC");
42 seq1.createDatasetSequence();
44 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
45 aseq1.createDatasetSequence();
48 * Map dna bases 1-6 to protein residues 1-2
50 AlignedCodonFrame acf = new AlignedCodonFrame();
51 MapList map = new MapList(new int[]
54 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
55 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
58 * Check protein residue 1 maps to codon 1-3, 2 to codon 4-6
60 SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList);
61 assertEquals(1, sr.getResults().size());
62 Match m = sr.getResults().get(0);
63 assertEquals(seq1.getDatasetSequence(), m.getSequence());
64 assertEquals(1, m.getStart());
65 assertEquals(3, m.getEnd());
66 sr = MappingUtils.buildSearchResults(aseq1, 2, acfList);
67 assertEquals(1, sr.getResults().size());
68 m = sr.getResults().get(0);
69 assertEquals(seq1.getDatasetSequence(), m.getSequence());
70 assertEquals(4, m.getStart());
71 assertEquals(6, m.getEnd());
74 * Check inverse mappings, from codons 1-3, 4-6 to protein 1, 2
76 for (int i = 1; i < 7; i++)
78 sr = MappingUtils.buildSearchResults(seq1, i, acfList);
79 assertEquals(1, sr.getResults().size());
80 m = sr.getResults().get(0);
81 assertEquals(aseq1.getDatasetSequence(), m.getSequence());
82 int residue = i > 3 ? 2 : 1;
83 assertEquals(residue, m.getStart());
84 assertEquals(residue, m.getEnd());
89 * Simple test of mapping with introns involved.
92 public void testBuildSearchResults_withIntron()
94 final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT");
95 seq1.createDatasetSequence();
97 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
98 aseq1.createDatasetSequence();
101 * Map dna bases [2, 4, 5], [7, 9, 11] to protein residues 1 and 2
103 AlignedCodonFrame acf = new AlignedCodonFrame();
104 MapList map = new MapList(new int[]
105 { 2, 2, 4, 5, 7, 7, 9, 9, 11, 11 }, new int[]
107 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
108 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
111 * Check protein residue 1 maps to [2, 4, 5]
113 SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList);
114 assertEquals(2, sr.getResults().size());
115 Match m = sr.getResults().get(0);
116 assertEquals(seq1.getDatasetSequence(), m.getSequence());
117 assertEquals(2, m.getStart());
118 assertEquals(2, m.getEnd());
119 m = sr.getResults().get(1);
120 assertEquals(seq1.getDatasetSequence(), m.getSequence());
121 assertEquals(4, m.getStart());
122 assertEquals(5, m.getEnd());
125 * Check protein residue 2 maps to [7, 9, 11]
127 sr = MappingUtils.buildSearchResults(aseq1, 2, acfList);
128 assertEquals(3, sr.getResults().size());
129 m = sr.getResults().get(0);
130 assertEquals(seq1.getDatasetSequence(), m.getSequence());
131 assertEquals(7, m.getStart());
132 assertEquals(7, m.getEnd());
133 m = sr.getResults().get(1);
134 assertEquals(seq1.getDatasetSequence(), m.getSequence());
135 assertEquals(9, m.getStart());
136 assertEquals(9, m.getEnd());
137 m = sr.getResults().get(2);
138 assertEquals(seq1.getDatasetSequence(), m.getSequence());
139 assertEquals(11, m.getStart());
140 assertEquals(11, m.getEnd());
143 * Check inverse mappings, from codons to protein
145 for (int i = 1; i < 14; i++)
147 sr = MappingUtils.buildSearchResults(seq1, i, acfList);
148 int residue = (i == 2 || i == 4 || i == 5) ? 1 : (i == 7 || i == 9
152 assertEquals(0, sr.getResults().size());
155 assertEquals(1, sr.getResults().size());
156 m = sr.getResults().get(0);
157 assertEquals(aseq1.getDatasetSequence(), m.getSequence());
158 assertEquals(residue, m.getStart());
159 assertEquals(residue, m.getEnd());
164 * Test mapping a sequence group made of entire sequences.
166 * @throws IOException
169 public void testMapSequenceGroup_sequences() throws IOException
172 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
175 AlignmentI cdna = loadAlignment(">Seq1\nACG\n>Seq2\nTGA\n>Seq3\nTAC\n",
177 cdna.setDataset(null);
178 AlignmentI protein = loadAlignment(">Seq1\nK\n>Seq2\nL\n>Seq3\nQ\n",
180 protein.setDataset(null);
181 AlignedCodonFrame acf = new AlignedCodonFrame();
182 MapList map = new MapList(new int[]
185 for (int seq = 0; seq < 3; seq++)
187 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
188 .getSequenceAt(seq).getDatasetSequence(), map);
190 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
192 AlignViewportI dnaView = new AlignViewport(cdna);
193 AlignViewportI proteinView = new AlignViewport(protein);
194 protein.setCodonFrames(acfList);
197 * Select Seq1 and Seq3 in the protein (startRes=endRes=0)
199 SequenceGroup sg = new SequenceGroup();
200 sg.setColourText(true);
201 sg.setIdColour(Color.GREEN);
202 sg.setOutlineColour(Color.LIGHT_GRAY);
203 sg.addSequence(protein.getSequenceAt(0), false);
204 sg.addSequence(protein.getSequenceAt(2), false);
207 * Verify the mapped sequence group in dna
209 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
210 assertTrue(mappedGroup.getColourText());
211 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
212 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
213 assertEquals(2, mappedGroup.getSequences().size());
214 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
215 assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1));
216 assertEquals(0, mappedGroup.getStartRes());
217 assertEquals(2, mappedGroup.getEndRes());
220 * Verify mapping sequence group from dna to protein
223 sg.addSequence(cdna.getSequenceAt(1), false);
224 sg.addSequence(cdna.getSequenceAt(0), false);
227 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
228 assertTrue(mappedGroup.getColourText());
229 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
230 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
231 assertEquals(2, mappedGroup.getSequences().size());
232 assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0));
233 assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1));
234 assertEquals(0, mappedGroup.getStartRes());
235 assertEquals(0, mappedGroup.getEndRes());
239 * Helper method to load an alignment and ensure dataset sequences are set up.
245 * @throws IOException
247 protected AlignmentI loadAlignment(final String data, String format)
250 Alignment a = new FormatAdapter().readFile(data,
251 AppletFormatAdapter.PASTE, format);
257 * Test mapping a column selection in protein to its dna equivalent
259 * @throws IOException
262 public void testMapColumnSelection_proteinToDna() throws IOException
264 setupMappedAlignments();
266 ColumnSelection colsel = new ColumnSelection();
269 * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3
270 * in dna respectively, overall 0-4
272 colsel.addElement(0);
273 ColumnSelection cs = MappingUtils.mapColumnSelection(colsel,
274 proteinView, dnaView);
275 assertEquals("[0, 1, 2, 3, 4]", cs.getSelected().toString());
278 * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna
281 colsel.addElement(1);
282 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
283 assertEquals("[0, 1, 2, 3]", cs.getSelected().toString());
286 * Column 2 in protein picks up gaps only - no mapping
289 colsel.addElement(2);
290 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
291 assertEquals("[]", cs.getSelected().toString());
294 * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns
295 * 6-9, 6-10, 5-8 respectively, overall to 5-10
298 colsel.addElement(3);
299 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
300 assertEquals("[5, 6, 7, 8, 9, 10]", cs.getSelected().toString());
303 * Combine selection of columns 1 and 3 to get a discontiguous mapped
307 colsel.addElement(1);
308 colsel.addElement(3);
309 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
310 assertEquals("[0, 1, 2, 3, 5, 6, 7, 8, 9, 10]", cs.getSelected()
315 * @throws IOException
317 protected void setupMappedAlignments() throws IOException
320 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
321 * viewport). Lower case for introns.
323 AlignmentI cdna = loadAlignment(">Seq1\nAC-GctGtC-T\n"
324 + ">Seq2\nTc-GA-G-T-Tc\n" + ">Seq3\nTtTT-AaCGg-\n",
326 cdna.setDataset(null);
327 AlignmentI protein = loadAlignment(
328 ">Seq1\n-K-P\n>Seq2\nL--Q\n>Seq3\nG--S\n",
330 protein.setDataset(null);
331 AlignedCodonFrame acf = new AlignedCodonFrame();
332 MapList map = new MapList(new int[]
333 { 1, 3, 6, 6, 8, 9 }, new int[]
335 acf.addMap(cdna.getSequenceAt(0).getDatasetSequence(), protein
336 .getSequenceAt(0).getDatasetSequence(), map);
337 map = new MapList(new int[]
338 { 1, 1, 3, 4, 5, 7 }, new int[]
340 acf.addMap(cdna.getSequenceAt(1).getDatasetSequence(), protein
341 .getSequenceAt(1).getDatasetSequence(), map);
342 map = new MapList(new int[]
343 { 1, 1, 3, 4, 5, 5, 7, 8 }, new int[]
345 acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein
346 .getSequenceAt(2).getDatasetSequence(), map);
347 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
349 dnaView = new AlignViewport(cdna);
350 proteinView = new AlignViewport(protein);
351 protein.setCodonFrames(acfList);
355 * Test mapping a column selection in dna to its protein equivalent
357 * @throws IOException
360 public void testMapColumnSelection_dnaToProtein() throws IOException
362 setupMappedAlignments();
364 ColumnSelection colsel = new ColumnSelection();
367 * Column 0 in dna picks up first bases which map to residue 1, columns 0-1
370 colsel.addElement(0);
371 ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, dnaView,
373 assertEquals("[0, 1]", cs.getSelected().toString());
376 * Columns 3-5 in dna map to the first residues in protein Seq1, Seq2, and
377 * the first two in Seq3. Overall to columns 0, 1, 3 (col2 is all gaps).
379 colsel.addElement(3);
380 colsel.addElement(4);
381 colsel.addElement(5);
382 cs = MappingUtils.mapColumnSelection(colsel, dnaView, proteinView);
383 assertEquals("[0, 1, 3]", cs.getSelected().toString());
387 public void testMapColumnSelection_null() throws IOException
389 setupMappedAlignments();
390 ColumnSelection cs = MappingUtils.mapColumnSelection(null, dnaView,
392 assertTrue("mapped selection not empty", cs.getSelected().isEmpty());
396 * Tests for the method that converts a series of [start, end] ranges to
400 public void testFlattenRanges()
402 assertEquals("[1, 2, 3, 4]",
403 Arrays.toString(MappingUtils.flattenRanges(new int[]
405 assertEquals("[1, 2, 3, 4]",
406 Arrays.toString(MappingUtils.flattenRanges(new int[]
408 assertEquals("[1, 2, 3, 4]",
409 Arrays.toString(MappingUtils.flattenRanges(new int[]
410 { 1, 1, 2, 2, 3, 3, 4, 4 })));
411 assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]",
412 Arrays.toString(MappingUtils.flattenRanges(new int[]
413 { 1, 4, 7, 9, 12, 12 })));
414 // unpaired start position is ignored:
415 assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]",
416 Arrays.toString(MappingUtils.flattenRanges(new int[]
417 { 1, 4, 7, 9, 12, 12, 15 })));
421 * Test mapping a sequence group made of entire columns.
423 * @throws IOException
426 public void testMapSequenceGroup_columns() throws IOException
429 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
432 AlignmentI cdna = loadAlignment(
433 ">Seq1\nACGGCA\n>Seq2\nTGACAG\n>Seq3\nTACGTA\n",
435 cdna.setDataset(null);
436 AlignmentI protein = loadAlignment(">Seq1\nKA\n>Seq2\nLQ\n>Seq3\nQV\n",
438 protein.setDataset(null);
439 AlignedCodonFrame acf = new AlignedCodonFrame();
440 MapList map = new MapList(new int[]
443 for (int seq = 0; seq < 3; seq++)
445 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
446 .getSequenceAt(seq).getDatasetSequence(), map);
448 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
450 AlignViewportI dnaView = new AlignViewport(cdna);
451 AlignViewportI proteinView = new AlignViewport(protein);
452 protein.setCodonFrames(acfList);
455 * Select all sequences, column 2 in the protein
457 SequenceGroup sg = new SequenceGroup();
458 sg.setColourText(true);
459 sg.setIdColour(Color.GREEN);
460 sg.setOutlineColour(Color.LIGHT_GRAY);
461 sg.addSequence(protein.getSequenceAt(0), false);
462 sg.addSequence(protein.getSequenceAt(1), false);
463 sg.addSequence(protein.getSequenceAt(2), false);
468 * Verify the mapped sequence group in dna
470 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
471 assertTrue(mappedGroup.getColourText());
472 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
473 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
474 assertEquals(3, mappedGroup.getSequences().size());
475 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
476 assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1));
477 assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(2));
478 assertEquals(3, mappedGroup.getStartRes());
479 assertEquals(5, mappedGroup.getEndRes());
482 * Verify mapping sequence group from dna to protein
485 sg.addSequence(cdna.getSequenceAt(0), false);
486 sg.addSequence(cdna.getSequenceAt(1), false);
487 sg.addSequence(cdna.getSequenceAt(2), false);
488 // select columns 2 and 3 in DNA which span protein columns 0 and 1
491 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
492 assertTrue(mappedGroup.getColourText());
493 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
494 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
495 assertEquals(3, mappedGroup.getSequences().size());
496 assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(0));
497 assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(1));
498 assertSame(protein.getSequenceAt(2), mappedGroup.getSequences().get(2));
499 assertEquals(0, mappedGroup.getStartRes());
500 assertEquals(1, mappedGroup.getEndRes());
504 * Test mapping a sequence group made of a sequences/columns region.
506 * @throws IOException
509 public void testMapSequenceGroup_region() throws IOException
512 * Set up gapped dna and protein Seq1/2/3 with mappings (held on the protein
515 AlignmentI cdna = loadAlignment(
516 ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n",
518 cdna.setDataset(null);
519 AlignmentI protein = loadAlignment(
520 ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", "FASTA");
521 protein.setDataset(null);
522 AlignedCodonFrame acf = new AlignedCodonFrame();
523 MapList map = new MapList(new int[]
526 for (int seq = 0; seq < 3; seq++)
528 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
529 .getSequenceAt(seq).getDatasetSequence(), map);
531 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
533 AlignViewportI dnaView = new AlignViewport(cdna);
534 AlignViewportI proteinView = new AlignViewport(protein);
535 protein.setCodonFrames(acfList);
538 * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped
539 * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection
540 * only includes a gap in Seq2 there is no mappable selection region in the
543 SequenceGroup sg = new SequenceGroup();
544 sg.setColourText(true);
545 sg.setIdColour(Color.GREEN);
546 sg.setOutlineColour(Color.LIGHT_GRAY);
547 sg.addSequence(protein.getSequenceAt(0), false);
548 sg.addSequence(protein.getSequenceAt(1), false);
553 * Verify the mapped sequence group in dna
555 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
556 assertTrue(mappedGroup.getColourText());
557 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
558 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
559 assertEquals(1, mappedGroup.getSequences().size());
560 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
561 // Seq2 in protein has a gap in column 1 - ignored
562 // Seq1 has K which should map to columns 0-3 in Seq1
563 assertEquals(0, mappedGroup.getStartRes());
564 assertEquals(3, mappedGroup.getEndRes());
567 * Now select cols 2-4 in protein. These cover Seq1:AS Seq2:LQ Seq3:VM which
568 * extend over DNA columns 3-12, 1-7, 6-13 respectively, or 1-13 overall.
572 mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
573 assertEquals(1, mappedGroup.getStartRes());
574 assertEquals(13, mappedGroup.getEndRes());
577 * Verify mapping sequence group from dna to protein
580 sg.addSequence(cdna.getSequenceAt(0), false);
582 // select columns 4,5 - includes Seq1:codon2 (A) only
585 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
586 assertEquals(2, mappedGroup.getStartRes());
587 assertEquals(2, mappedGroup.getEndRes());
589 // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ)
590 sg.addSequence(cdna.getSequenceAt(1), false);
591 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
592 assertEquals(2, mappedGroup.getStartRes());
593 assertEquals(4, mappedGroup.getEndRes());
595 // add Seq3 to dna selection cols 4-5 include codon 1 (Q)
596 sg.addSequence(cdna.getSequenceAt(2), false);
597 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
598 assertEquals(0, mappedGroup.getStartRes());
599 assertEquals(4, mappedGroup.getEndRes());
603 public void testFindMappingsForSequence()
605 SequenceI seq1 = new Sequence("Seq1", "ABC");
606 SequenceI seq2 = new Sequence("Seq2", "ABC");
607 SequenceI seq3 = new Sequence("Seq3", "ABC");
608 SequenceI seq4 = new Sequence("Seq4", "ABC");
609 seq1.createDatasetSequence();
610 seq2.createDatasetSequence();
611 seq3.createDatasetSequence();
612 seq4.createDatasetSequence();
615 * Create mappings from seq1 to seq2, seq2 to seq1, seq3 to seq1
617 AlignedCodonFrame acf1 = new AlignedCodonFrame();
618 MapList map = new MapList(new int[]
621 acf1.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
622 AlignedCodonFrame acf2 = new AlignedCodonFrame();
623 acf2.addMap(seq2.getDatasetSequence(), seq1.getDatasetSequence(), map);
624 AlignedCodonFrame acf3 = new AlignedCodonFrame();
625 acf3.addMap(seq3.getDatasetSequence(), seq1.getDatasetSequence(), map);
627 Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
633 * Seq1 has three mappings
635 List<AlignedCodonFrame> result = MappingUtils.findMappingsForSequence(
637 assertEquals(3, result.size());
638 assertTrue(result.contains(acf1));
639 assertTrue(result.contains(acf2));
640 assertTrue(result.contains(acf3));
643 * Seq2 has two mappings
645 result = MappingUtils.findMappingsForSequence(seq2, mappings);
646 assertEquals(2, result.size());
647 assertTrue(result.contains(acf1));
648 assertTrue(result.contains(acf2));
651 * Seq3 has one mapping
653 result = MappingUtils.findMappingsForSequence(seq3, mappings);
654 assertEquals(1, result.size());
655 assertTrue(result.contains(acf3));
658 * Seq4 has no mappings
660 result = MappingUtils.findMappingsForSequence(seq4, mappings);
661 assertEquals(0, result.size());
663 result = MappingUtils.findMappingsForSequence(null, mappings);
664 assertEquals(0, result.size());
666 result = MappingUtils.findMappingsForSequence(seq1, null);
667 assertEquals(0, result.size());
669 result = MappingUtils.findMappingsForSequence(null, null);
670 assertEquals(0, result.size());