3 import static org.testng.AssertJUnit.assertSame;
4 import static org.testng.AssertJUnit.assertEquals;
5 import static org.testng.AssertJUnit.assertTrue;
6 import org.testng.annotations.Test;
8 import java.io.IOException;
9 import java.util.Arrays;
10 import java.util.Collections;
11 import java.util.HashSet;
12 import java.util.List;
15 import jalview.api.AlignViewportI;
16 import jalview.datamodel.AlignedCodonFrame;
17 import jalview.datamodel.Alignment;
18 import jalview.datamodel.AlignmentI;
19 import jalview.datamodel.ColumnSelection;
20 import jalview.datamodel.SearchResults;
21 import jalview.datamodel.SearchResults.Match;
22 import jalview.datamodel.Sequence;
23 import jalview.datamodel.SequenceGroup;
24 import jalview.datamodel.SequenceI;
25 import jalview.gui.AlignViewport;
26 import jalview.io.AppletFormatAdapter;
27 import jalview.io.FormatAdapter;
29 public class MappingUtilsTest
31 private AlignViewportI dnaView;
32 private AlignViewportI proteinView;
35 * Simple test of mapping with no intron involved.
38 public void testBuildSearchResults()
40 final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC");
41 seq1.createDatasetSequence();
43 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
44 aseq1.createDatasetSequence();
47 * Map dna bases 1-6 to protein residues 1-2
49 AlignedCodonFrame acf = new AlignedCodonFrame();
50 MapList map = new MapList(new int[]
53 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
54 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
57 * Check protein residue 1 maps to codon 1-3, 2 to codon 4-6
59 SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList);
60 assertEquals(1, sr.getResults().size());
61 Match m = sr.getResults().get(0);
62 assertEquals(seq1.getDatasetSequence(), m.getSequence());
63 assertEquals(1, m.getStart());
64 assertEquals(3, m.getEnd());
65 sr = MappingUtils.buildSearchResults(aseq1, 2, acfList);
66 assertEquals(1, sr.getResults().size());
67 m = sr.getResults().get(0);
68 assertEquals(seq1.getDatasetSequence(), m.getSequence());
69 assertEquals(4, m.getStart());
70 assertEquals(6, m.getEnd());
73 * Check inverse mappings, from codons 1-3, 4-6 to protein 1, 2
75 for (int i = 1; i < 7; i++)
77 sr = MappingUtils.buildSearchResults(seq1, i, acfList);
78 assertEquals(1, sr.getResults().size());
79 m = sr.getResults().get(0);
80 assertEquals(aseq1.getDatasetSequence(), m.getSequence());
81 int residue = i > 3 ? 2 : 1;
82 assertEquals(residue, m.getStart());
83 assertEquals(residue, m.getEnd());
88 * Simple test of mapping with introns involved.
91 public void testBuildSearchResults_withIntron()
93 final Sequence seq1 = new Sequence("Seq1", "C-G-TAGA-GCAGCTT");
94 seq1.createDatasetSequence();
96 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
97 aseq1.createDatasetSequence();
100 * Map dna bases [2, 4, 5], [7, 9, 11] to protein residues 1 and 2
102 AlignedCodonFrame acf = new AlignedCodonFrame();
103 MapList map = new MapList(new int[]
104 { 2, 2, 4, 5, 7, 7, 9, 9, 11, 11 }, new int[]
106 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
107 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
110 * Check protein residue 1 maps to [2, 4, 5]
112 SearchResults sr = MappingUtils.buildSearchResults(aseq1, 1, acfList);
113 assertEquals(2, sr.getResults().size());
114 Match m = sr.getResults().get(0);
115 assertEquals(seq1.getDatasetSequence(), m.getSequence());
116 assertEquals(2, m.getStart());
117 assertEquals(2, m.getEnd());
118 m = sr.getResults().get(1);
119 assertEquals(seq1.getDatasetSequence(), m.getSequence());
120 assertEquals(4, m.getStart());
121 assertEquals(5, m.getEnd());
124 * Check protein residue 2 maps to [7, 9, 11]
126 sr = MappingUtils.buildSearchResults(aseq1, 2, acfList);
127 assertEquals(3, sr.getResults().size());
128 m = sr.getResults().get(0);
129 assertEquals(seq1.getDatasetSequence(), m.getSequence());
130 assertEquals(7, m.getStart());
131 assertEquals(7, m.getEnd());
132 m = sr.getResults().get(1);
133 assertEquals(seq1.getDatasetSequence(), m.getSequence());
134 assertEquals(9, m.getStart());
135 assertEquals(9, m.getEnd());
136 m = sr.getResults().get(2);
137 assertEquals(seq1.getDatasetSequence(), m.getSequence());
138 assertEquals(11, m.getStart());
139 assertEquals(11, m.getEnd());
142 * Check inverse mappings, from codons to protein
144 for (int i = 1; i < 14; i++)
146 sr = MappingUtils.buildSearchResults(seq1, i, acfList);
147 int residue = (i == 2 || i == 4 || i == 5) ? 1 : (i == 7 || i == 9
151 assertEquals(0, sr.getResults().size());
154 assertEquals(1, sr.getResults().size());
155 m = sr.getResults().get(0);
156 assertEquals(aseq1.getDatasetSequence(), m.getSequence());
157 assertEquals(residue, m.getStart());
158 assertEquals(residue, m.getEnd());
163 * Test mapping a sequence group made of entire sequences.
165 * @throws IOException
168 public void testMapSequenceGroup_sequences() throws IOException
171 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
174 AlignmentI cdna = loadAlignment(">Seq1\nACG\n>Seq2\nTGA\n>Seq3\nTAC\n",
176 cdna.setDataset(null);
177 AlignmentI protein = loadAlignment(">Seq1\nK\n>Seq2\nL\n>Seq3\nQ\n",
179 protein.setDataset(null);
180 AlignedCodonFrame acf = new AlignedCodonFrame();
181 MapList map = new MapList(new int[]
184 for (int seq = 0; seq < 3; seq++)
186 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
187 .getSequenceAt(seq).getDatasetSequence(), map);
189 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
191 AlignViewportI dnaView = new AlignViewport(cdna);
192 AlignViewportI proteinView = new AlignViewport(protein);
193 protein.setCodonFrames(acfList);
196 * Select Seq1 and Seq3 in the protein (startRes=endRes=0)
198 SequenceGroup sg = new SequenceGroup();
199 sg.setColourText(true);
200 sg.setIdColour(Color.GREEN);
201 sg.setOutlineColour(Color.LIGHT_GRAY);
202 sg.addSequence(protein.getSequenceAt(0), false);
203 sg.addSequence(protein.getSequenceAt(2), false);
206 * Verify the mapped sequence group in dna
208 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
209 assertTrue(mappedGroup.getColourText());
210 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
211 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
212 assertEquals(2, mappedGroup.getSequences().size());
213 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
214 assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1));
215 assertEquals(0, mappedGroup.getStartRes());
216 assertEquals(2, mappedGroup.getEndRes());
219 * Verify mapping sequence group from dna to protein
222 sg.addSequence(cdna.getSequenceAt(1), false);
223 sg.addSequence(cdna.getSequenceAt(0), false);
226 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
227 assertTrue(mappedGroup.getColourText());
228 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
229 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
230 assertEquals(2, mappedGroup.getSequences().size());
231 assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0));
232 assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1));
233 assertEquals(0, mappedGroup.getStartRes());
234 assertEquals(0, mappedGroup.getEndRes());
238 * Helper method to load an alignment and ensure dataset sequences are set up.
244 * @throws IOException
246 protected AlignmentI loadAlignment(final String data, String format)
249 Alignment a = new FormatAdapter().readFile(data,
250 AppletFormatAdapter.PASTE, format);
256 * Test mapping a column selection in protein to its dna equivalent
258 * @throws IOException
261 public void testMapColumnSelection_proteinToDna() throws IOException
263 setupMappedAlignments();
265 ColumnSelection colsel = new ColumnSelection();
268 * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3
269 * in dna respectively, overall 0-4
271 colsel.addElement(0);
272 ColumnSelection cs = MappingUtils.mapColumnSelection(colsel,
273 proteinView, dnaView);
274 assertEquals("[0, 1, 2, 3, 4]", cs.getSelected().toString());
277 * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna
280 colsel.addElement(1);
281 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
282 assertEquals("[0, 1, 2, 3]", cs.getSelected().toString());
285 * Column 2 in protein picks up gaps only - no mapping
288 colsel.addElement(2);
289 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
290 assertEquals("[]", cs.getSelected().toString());
293 * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns
294 * 6-9, 6-10, 5-8 respectively, overall to 5-10
297 colsel.addElement(3);
298 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
299 assertEquals("[5, 6, 7, 8, 9, 10]", cs.getSelected().toString());
302 * Combine selection of columns 1 and 3 to get a discontiguous mapped
306 colsel.addElement(1);
307 colsel.addElement(3);
308 cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
309 assertEquals("[0, 1, 2, 3, 5, 6, 7, 8, 9, 10]", cs.getSelected()
314 * @throws IOException
316 protected void setupMappedAlignments() throws IOException
319 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
320 * viewport). Lower case for introns.
322 AlignmentI cdna = loadAlignment(">Seq1\nAC-GctGtC-T\n"
323 + ">Seq2\nTc-GA-G-T-Tc\n" + ">Seq3\nTtTT-AaCGg-\n",
325 cdna.setDataset(null);
326 AlignmentI protein = loadAlignment(
327 ">Seq1\n-K-P\n>Seq2\nL--Q\n>Seq3\nG--S\n",
329 protein.setDataset(null);
330 AlignedCodonFrame acf = new AlignedCodonFrame();
331 MapList map = new MapList(new int[]
332 { 1, 3, 6, 6, 8, 9 }, new int[]
334 acf.addMap(cdna.getSequenceAt(0).getDatasetSequence(), protein
335 .getSequenceAt(0).getDatasetSequence(), map);
336 map = new MapList(new int[]
337 { 1, 1, 3, 4, 5, 7 }, new int[]
339 acf.addMap(cdna.getSequenceAt(1).getDatasetSequence(), protein
340 .getSequenceAt(1).getDatasetSequence(), map);
341 map = new MapList(new int[]
342 { 1, 1, 3, 4, 5, 5, 7, 8 }, new int[]
344 acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein
345 .getSequenceAt(2).getDatasetSequence(), map);
346 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
348 dnaView = new AlignViewport(cdna);
349 proteinView = new AlignViewport(protein);
350 protein.setCodonFrames(acfList);
354 * Test mapping a column selection in dna to its protein equivalent
356 * @throws IOException
359 public void testMapColumnSelection_dnaToProtein() throws IOException
361 setupMappedAlignments();
363 ColumnSelection colsel = new ColumnSelection();
366 * Column 0 in dna picks up first bases which map to residue 1, columns 0-1
369 colsel.addElement(0);
370 ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, dnaView,
372 assertEquals("[0, 1]", cs.getSelected().toString());
375 * Columns 3-5 in dna map to the first residues in protein Seq1, Seq2, and
376 * the first two in Seq3. Overall to columns 0, 1, 3 (col2 is all gaps).
378 colsel.addElement(3);
379 colsel.addElement(4);
380 colsel.addElement(5);
381 cs = MappingUtils.mapColumnSelection(colsel, dnaView, proteinView);
382 assertEquals("[0, 1, 3]", cs.getSelected().toString());
386 public void testMapColumnSelection_null() throws IOException
388 setupMappedAlignments();
389 ColumnSelection cs = MappingUtils.mapColumnSelection(null, dnaView,
391 assertTrue("mapped selection not empty", cs.getSelected().isEmpty());
395 * Tests for the method that converts a series of [start, end] ranges to
399 public void testFlattenRanges()
401 assertEquals("[1, 2, 3, 4]",
402 Arrays.toString(MappingUtils.flattenRanges(new int[]
404 assertEquals("[1, 2, 3, 4]",
405 Arrays.toString(MappingUtils.flattenRanges(new int[]
407 assertEquals("[1, 2, 3, 4]",
408 Arrays.toString(MappingUtils.flattenRanges(new int[]
409 { 1, 1, 2, 2, 3, 3, 4, 4 })));
410 assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]",
411 Arrays.toString(MappingUtils.flattenRanges(new int[]
412 { 1, 4, 7, 9, 12, 12 })));
413 // unpaired start position is ignored:
414 assertEquals("[1, 2, 3, 4, 7, 8, 9, 12]",
415 Arrays.toString(MappingUtils.flattenRanges(new int[]
416 { 1, 4, 7, 9, 12, 12, 15 })));
420 * Test mapping a sequence group made of entire columns.
422 * @throws IOException
425 public void testMapSequenceGroup_columns() throws IOException
428 * Set up dna and protein Seq1/2/3 with mappings (held on the protein
431 AlignmentI cdna = loadAlignment(
432 ">Seq1\nACGGCA\n>Seq2\nTGACAG\n>Seq3\nTACGTA\n",
434 cdna.setDataset(null);
435 AlignmentI protein = loadAlignment(">Seq1\nKA\n>Seq2\nLQ\n>Seq3\nQV\n",
437 protein.setDataset(null);
438 AlignedCodonFrame acf = new AlignedCodonFrame();
439 MapList map = new MapList(new int[]
442 for (int seq = 0; seq < 3; seq++)
444 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
445 .getSequenceAt(seq).getDatasetSequence(), map);
447 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
449 AlignViewportI dnaView = new AlignViewport(cdna);
450 AlignViewportI proteinView = new AlignViewport(protein);
451 protein.setCodonFrames(acfList);
454 * Select all sequences, column 2 in the protein
456 SequenceGroup sg = new SequenceGroup();
457 sg.setColourText(true);
458 sg.setIdColour(Color.GREEN);
459 sg.setOutlineColour(Color.LIGHT_GRAY);
460 sg.addSequence(protein.getSequenceAt(0), false);
461 sg.addSequence(protein.getSequenceAt(1), false);
462 sg.addSequence(protein.getSequenceAt(2), false);
467 * Verify the mapped sequence group in dna
469 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
470 assertTrue(mappedGroup.getColourText());
471 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
472 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
473 assertEquals(3, mappedGroup.getSequences().size());
474 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
475 assertSame(cdna.getSequenceAt(1), mappedGroup.getSequences().get(1));
476 assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(2));
477 assertEquals(3, mappedGroup.getStartRes());
478 assertEquals(5, mappedGroup.getEndRes());
481 * Verify mapping sequence group from dna to protein
484 sg.addSequence(cdna.getSequenceAt(0), false);
485 sg.addSequence(cdna.getSequenceAt(1), false);
486 sg.addSequence(cdna.getSequenceAt(2), false);
487 // select columns 2 and 3 in DNA which span protein columns 0 and 1
490 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
491 assertTrue(mappedGroup.getColourText());
492 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
493 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
494 assertEquals(3, mappedGroup.getSequences().size());
495 assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(0));
496 assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(1));
497 assertSame(protein.getSequenceAt(2), mappedGroup.getSequences().get(2));
498 assertEquals(0, mappedGroup.getStartRes());
499 assertEquals(1, mappedGroup.getEndRes());
503 * Test mapping a sequence group made of a sequences/columns region.
505 * @throws IOException
508 public void testMapSequenceGroup_region() throws IOException
511 * Set up gapped dna and protein Seq1/2/3 with mappings (held on the protein
514 AlignmentI cdna = loadAlignment(
515 ">Seq1\nA-CG-GC--AT-CA\n>Seq2\n-TG-AC-AG-T-AT\n>Seq3\n-T--ACG-TAAT-G\n",
517 cdna.setDataset(null);
518 AlignmentI protein = loadAlignment(
519 ">Seq1\n-KA-S\n>Seq2\n--L-QY\n>Seq3\nQ-V-M\n", "FASTA");
520 protein.setDataset(null);
521 AlignedCodonFrame acf = new AlignedCodonFrame();
522 MapList map = new MapList(new int[]
525 for (int seq = 0; seq < 3; seq++)
527 acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
528 .getSequenceAt(seq).getDatasetSequence(), map);
530 Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
532 AlignViewportI dnaView = new AlignViewport(cdna);
533 AlignViewportI proteinView = new AlignViewport(protein);
534 protein.setCodonFrames(acfList);
537 * Select Seq1 and Seq2 in the protein, column 1 (K/-). Expect mapped
538 * sequence group to cover Seq1, columns 0-3 (ACG). Because the selection
539 * only includes a gap in Seq2 there is no mappable selection region in the
542 SequenceGroup sg = new SequenceGroup();
543 sg.setColourText(true);
544 sg.setIdColour(Color.GREEN);
545 sg.setOutlineColour(Color.LIGHT_GRAY);
546 sg.addSequence(protein.getSequenceAt(0), false);
547 sg.addSequence(protein.getSequenceAt(1), false);
552 * Verify the mapped sequence group in dna
554 SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
555 assertTrue(mappedGroup.getColourText());
556 assertSame(sg.getIdColour(), mappedGroup.getIdColour());
557 assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
558 assertEquals(1, mappedGroup.getSequences().size());
559 assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
560 // Seq2 in protein has a gap in column 1 - ignored
561 // Seq1 has K which should map to columns 0-3 in Seq1
562 assertEquals(0, mappedGroup.getStartRes());
563 assertEquals(3, mappedGroup.getEndRes());
566 * Now select cols 2-4 in protein. These cover Seq1:AS Seq2:LQ Seq3:VM which
567 * extend over DNA columns 3-12, 1-7, 6-13 respectively, or 1-13 overall.
571 mappedGroup = MappingUtils.mapSequenceGroup(sg, proteinView, dnaView);
572 assertEquals(1, mappedGroup.getStartRes());
573 assertEquals(13, mappedGroup.getEndRes());
576 * Verify mapping sequence group from dna to protein
579 sg.addSequence(cdna.getSequenceAt(0), false);
581 // select columns 4,5 - includes Seq1:codon2 (A) only
584 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
585 assertEquals(2, mappedGroup.getStartRes());
586 assertEquals(2, mappedGroup.getEndRes());
588 // add Seq2 to dna selection cols 4-5 include codons 1 and 2 (LQ)
589 sg.addSequence(cdna.getSequenceAt(1), false);
590 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
591 assertEquals(2, mappedGroup.getStartRes());
592 assertEquals(4, mappedGroup.getEndRes());
594 // add Seq3 to dna selection cols 4-5 include codon 1 (Q)
595 sg.addSequence(cdna.getSequenceAt(2), false);
596 mappedGroup = MappingUtils.mapSequenceGroup(sg, dnaView, proteinView);
597 assertEquals(0, mappedGroup.getStartRes());
598 assertEquals(4, mappedGroup.getEndRes());
602 public void testFindMappingsForSequence()
604 SequenceI seq1 = new Sequence("Seq1", "ABC");
605 SequenceI seq2 = new Sequence("Seq2", "ABC");
606 SequenceI seq3 = new Sequence("Seq3", "ABC");
607 SequenceI seq4 = new Sequence("Seq4", "ABC");
608 seq1.createDatasetSequence();
609 seq2.createDatasetSequence();
610 seq3.createDatasetSequence();
611 seq4.createDatasetSequence();
614 * Create mappings from seq1 to seq2, seq2 to seq1, seq3 to seq1
616 AlignedCodonFrame acf1 = new AlignedCodonFrame();
617 MapList map = new MapList(new int[]
620 acf1.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
621 AlignedCodonFrame acf2 = new AlignedCodonFrame();
622 acf2.addMap(seq2.getDatasetSequence(), seq1.getDatasetSequence(), map);
623 AlignedCodonFrame acf3 = new AlignedCodonFrame();
624 acf3.addMap(seq3.getDatasetSequence(), seq1.getDatasetSequence(), map);
626 Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
632 * Seq1 has three mappings
634 List<AlignedCodonFrame> result = MappingUtils.findMappingsForSequence(
636 assertEquals(3, result.size());
637 assertTrue(result.contains(acf1));
638 assertTrue(result.contains(acf2));
639 assertTrue(result.contains(acf3));
642 * Seq2 has two mappings
644 result = MappingUtils.findMappingsForSequence(seq2, mappings);
645 assertEquals(2, result.size());
646 assertTrue(result.contains(acf1));
647 assertTrue(result.contains(acf2));
650 * Seq3 has one mapping
652 result = MappingUtils.findMappingsForSequence(seq3, mappings);
653 assertEquals(1, result.size());
654 assertTrue(result.contains(acf3));
657 * Seq4 has no mappings
659 result = MappingUtils.findMappingsForSequence(seq4, mappings);
660 assertEquals(0, result.size());
662 result = MappingUtils.findMappingsForSequence(null, mappings);
663 assertEquals(0, result.size());
665 result = MappingUtils.findMappingsForSequence(seq1, null);
666 assertEquals(0, result.size());
668 result = MappingUtils.findMappingsForSequence(null, null);
669 assertEquals(0, result.size());