2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertSame;
28 import static org.testng.AssertJUnit.assertTrue;
29 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
31 import java.util.Arrays;
32 import java.util.List;
34 import org.testng.annotations.BeforeClass;
35 import org.testng.annotations.Test;
37 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
38 import jalview.gui.JvOptionPane;
39 import jalview.util.MapList;
41 public class AlignedCodonFrameTest
44 @BeforeClass(alwaysRun = true)
45 public void setUpJvOptionPane()
47 JvOptionPane.setInteractiveMode(false);
48 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
52 * Test the method that locates the first aligned sequence that has a mapping.
54 @Test(groups = { "Functional" })
55 public void testFindAlignedSequence()
57 AlignmentI cdna = new Alignment(new SequenceI[] {});
58 final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC");
59 seq1.createDatasetSequence();
60 cdna.addSequence(seq1);
61 final Sequence seq2 = new Sequence("Seq2", "-TA-GG-GG");
62 seq2.createDatasetSequence();
63 cdna.addSequence(seq2);
65 AlignmentI aa = new Alignment(new SequenceI[] {});
66 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
67 aseq1.createDatasetSequence();
68 aa.addSequence(aseq1);
69 final Sequence aseq2 = new Sequence("Seq2", "-LY-");
70 aseq2.createDatasetSequence();
71 aa.addSequence(aseq2);
74 * Mapping from first DNA sequence to second AA sequence.
76 AlignedCodonFrame acf = new AlignedCodonFrame();
78 assertNull(acf.findAlignedSequence(seq1, aa));
80 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
81 acf.addMap(seq1.getDatasetSequence(), aseq2.getDatasetSequence(), map);
84 * DNA seq1 maps to AA seq2
86 assertEquals(aa.getSequenceAt(1), acf.findAlignedSequence(cdna
87 .getSequenceAt(0).getDatasetSequence(), aa));
88 // can also find this from the dna aligned sequence
89 assertEquals(aa.getSequenceAt(1),
90 acf.findAlignedSequence(cdna.getSequenceAt(0), aa));
92 assertEquals(cdna.getSequenceAt(0), acf.findAlignedSequence(aa
93 .getSequenceAt(1).getDatasetSequence(), cdna));
97 * Test the method that locates the mapped codon for a protein position.
99 @Test(groups = { "Functional" })
100 public void testGetMappedRegion()
102 // introns lower case, exons upper case
103 final Sequence dna1 = new Sequence("Seq1/10-18", "c-G-TA-gC-gT-T");
104 dna1.createDatasetSequence();
105 final Sequence dna2 = new Sequence("Seq2/20-28", "-TA-gG-Gg-CG-a");
106 dna2.createDatasetSequence();
108 final Sequence pep1 = new Sequence("Seq1/3-4", "-P-R");
109 pep1.createDatasetSequence();
110 final Sequence pep2 = new Sequence("Seq2/7-9", "-LY-Q");
111 pep2.createDatasetSequence();
114 * First with no mappings
116 AlignedCodonFrame acf = new AlignedCodonFrame();
118 assertNull(acf.getMappedRegion(dna1, pep1, 3));
121 * Set up the mappings for the exons (upper-case bases)
122 * Note residue Q is unmapped
124 MapList map1 = new MapList(new int[] { 11, 13, 15, 15, 17, 18 }, new int[] {
126 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map1);
127 MapList map2 = new MapList(new int[] { 20, 21, 23, 24, 26, 27 }, new int[] { 7, 9 },
129 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map2);
132 * get codon positions for peptide position
134 assertArrayEquals(new int[] { 11, 13 },
135 acf.getMappedRegion(dna1, pep1, 3));
136 assertArrayEquals(new int[] { 15, 15, 17, 18 },
137 acf.getMappedRegion(dna1, pep1, 4));
138 assertArrayEquals(new int[] { 20, 21, 23, 23 },
139 acf.getMappedRegion(dna2, pep2, 7));
140 assertArrayEquals(new int[] { 24, 24, 26, 27 },
141 acf.getMappedRegion(dna2, pep2, 8));
144 * No mapping from dna2 to Q
146 assertNull(acf.getMappedRegion(dna2, pep2, 9));
149 * No mapping from dna1 to pep2
151 assertNull(acf.getMappedRegion(dna1, pep2, 7));
154 * get peptide position for codon position
156 assertArrayEquals(new int[] { 3, 3 },
157 acf.getMappedRegion(pep1, dna1, 11));
158 assertArrayEquals(new int[] { 3, 3 },
159 acf.getMappedRegion(pep1, dna1, 12));
160 assertArrayEquals(new int[] { 3, 3 },
161 acf.getMappedRegion(pep1, dna1, 13));
162 assertNull(acf.getMappedRegion(pep1, dna1, 14)); // intron base, not mapped
166 @Test(groups = { "Functional" })
167 public void testGetMappedCodons()
169 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
170 seq1.createDatasetSequence();
171 final Sequence aseq1 = new Sequence("Seq1", "-V-L");
172 aseq1.createDatasetSequence();
175 * First with no mappings
177 AlignedCodonFrame acf = new AlignedCodonFrame();
179 assertNull(acf.getMappedCodons(seq1.getDatasetSequence(), 0));
182 * Set up the mappings for the exons (upper-case bases)
184 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
186 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
188 assertEquals(1, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
192 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
196 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
201 * Test for the case where there is more than one variant of the DNA mapping
202 * to a protein sequence
204 @Test(groups = { "Functional" })
205 public void testGetMappedCodons_dnaVariants()
207 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
208 seq1.createDatasetSequence();
209 final Sequence seq2 = new Sequence("Seq2", "c-G-TT-gT-gT-A");
210 seq2.createDatasetSequence();
211 final Sequence aseq1 = new Sequence("Seq1", "-V-L");
212 aseq1.createDatasetSequence();
214 AlignedCodonFrame acf = new AlignedCodonFrame();
217 * Set up the mappings for the exons (upper-case bases)
219 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
221 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
222 acf.addMap(seq2.getDatasetSequence(), aseq1.getDatasetSequence(), map);
224 assertEquals(2, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
226 List<char[]> codonsForV = acf.getMappedCodons(
227 aseq1.getDatasetSequence(), 1);
228 assertEquals("[G, T, A]", Arrays.toString(codonsForV.get(0)));
229 assertEquals("[G, T, T]", Arrays.toString(codonsForV.get(1)));
230 List<char[]> codonsForL = acf.getMappedCodons(
231 aseq1.getDatasetSequence(), 2);
232 assertEquals("[C, T, T]", Arrays.toString(codonsForL.get(0)));
233 assertEquals("[T, T, A]", Arrays.toString(codonsForL.get(1)));
237 * Test for the case where sequences have start > 1
239 @Test(groups = { "Functional" })
240 public void testGetMappedCodons_forSubSequences()
242 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T", 27, 35);
243 seq1.createDatasetSequence();
245 final Sequence aseq1 = new Sequence("Seq1", "-V-L", 12, 13);
246 aseq1.createDatasetSequence();
249 * Set up the mappings for the exons (upper-case bases)
251 AlignedCodonFrame acf = new AlignedCodonFrame();
252 MapList map = new MapList(new int[] { 28, 30, 32, 32, 34, 35 },
253 new int[] { 12, 13 }, 3, 1);
254 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
258 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
262 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
266 @Test(groups = { "Functional" })
267 public void testCouldReplaceSequence()
269 SequenceI seq1 = new Sequence("Seq1/10-21", "aaacccgggttt");
270 SequenceI seq1proxy = new SequenceDummy("Seq1");
272 // map to region within sequence is ok
273 assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
275 // map to region overlapping sequence is ok
276 assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 5,
278 assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 21,
280 // map to region before sequence is not ok
281 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 4,
283 // map to region after sequence is not ok
284 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 22,
288 * test should fail if name doesn't match
290 seq1proxy.setName("Seq1a");
291 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
293 seq1proxy.setName("Seq1");
294 seq1.setName("Seq1a");
295 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
299 * a dummy sequence can't replace a real one
301 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1proxy, 12,
305 * a dummy sequence can't replace a dummy sequence
307 SequenceI seq1proxy2 = new SequenceDummy("Seq1");
308 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy,
309 seq1proxy2, 12, 17));
312 * a real sequence can't replace a real one
314 SequenceI seq1a = new Sequence("Seq1/10-21", "aaacccgggttt");
315 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1a, 12, 17));
319 * Tests for the method that tests whether any mapping to a dummy sequence can
320 * be 'realised' to a given real sequence
322 @Test(groups = { "Functional" })
323 public void testIsRealisableWith()
325 SequenceI seq1 = new Sequence("Seq1", "tttaaaCCCGGGtttaaa");
326 SequenceI seq2 = new Sequence("Seq2", "PG");
327 SequenceI seq1proxy = new SequenceDummy("Seq1");
328 seq1.createDatasetSequence();
329 seq2.createDatasetSequence();
330 MapList mapList = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
332 AlignedCodonFrame acf = new AlignedCodonFrame();
333 acf.addMap(seq1proxy, seq2, mapList);
336 * Seq2 is mapped to SequenceDummy seq1proxy bases 4-9
337 * This is 'realisable' from real sequence Seq1
339 assertTrue(acf.isRealisableWith(seq1));
342 * test should fail if name doesn't match
344 seq1proxy.setName("Seq1a");
345 assertFalse(acf.isRealisableWith(seq1));
346 seq1proxy.setName("Seq1");
348 SequenceI seq1ds = seq1.getDatasetSequence();
349 seq1ds.setName("Seq1a");
350 assertFalse(acf.isRealisableWith(seq1));
351 seq1ds.setName("Seq1");
354 * test should fail if no sequence overlap with mapping of bases 7-12
355 * use artificial start/end values to test this
359 // seq1 precedes mapped region:
360 assertFalse(acf.isRealisableWith(seq1));
362 // seq1 includes first mapped base:
363 assertTrue(acf.isRealisableWith(seq1));
366 // seq1 follows mapped region:
367 assertFalse(acf.isRealisableWith(seq1));
369 // seq1 includes last mapped base:
370 assertTrue(acf.isRealisableWith(seq1));
374 * Tests for the method that converts mappings to a dummy sequence to mappings
375 * to a compatible real sequence
377 @Test(groups = { "Functional" })
378 public void testRealiseWith()
380 SequenceI seq1 = new Sequence("Seq1", "tttCAACCCGGGtttaaa");
381 SequenceI seq2 = new Sequence("Seq2", "QPG");
382 SequenceI seq2a = new Sequence("Seq2a", "QPG");
383 SequenceI seq1proxy = new SequenceDummy("Seq1");
384 seq1.createDatasetSequence();
385 seq2.createDatasetSequence();
386 seq2a.createDatasetSequence();
389 * Make mappings from Seq2 and Seq2a peptides to dummy sequence Seq1
391 AlignedCodonFrame acf = new AlignedCodonFrame();
393 // map PG to codons 7-12 (CCCGGG)
394 MapList mapping1 = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
396 acf.addMap(seq1proxy, seq2, mapping1);
397 acf.addMap(seq1proxy, seq2a, mapping1);
399 // map QP to codons 4-9 (CAACCC)
400 MapList mapping2 = new MapList(new int[] { 4, 9 }, new int[] { 1, 2 },
402 acf.addMap(seq1proxy, seq2, mapping2);
403 acf.addMap(seq1proxy, seq2a, mapping2);
406 * acf now has two mappings one from Seq1 to Seq2, one from Seq1 to Seq2a
408 assertEquals(2, acf.getdnaSeqs().length);
409 assertSame(seq1proxy, acf.getdnaSeqs()[0]);
410 assertSame(seq1proxy, acf.getdnaSeqs()[1]);
411 assertEquals(2, acf.getProtMappings().length);
413 // 'realise' these mappings with the compatible sequence seq1
414 // two mappings should be updated:
415 assertEquals(2, acf.realiseWith(seq1));
416 assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[0]);
417 assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[1]);
421 * Test the method that locates the mapped codon for a protein position.
423 @Test(groups = { "Functional" })
424 public void testGetMappedRegion_eitherWay()
426 final Sequence seq1 = new Sequence("Seq1", "AAACCCGGGTTT");
427 seq1.createDatasetSequence();
428 final Sequence seq2 = new Sequence("Seq2", "KPGF");
429 seq2.createDatasetSequence();
430 final Sequence seq3 = new Sequence("Seq3", "QYKPGFSW");
431 seq3.createDatasetSequence();
434 * map Seq1 to all of Seq2 and part of Seq3
436 AlignedCodonFrame acf = new AlignedCodonFrame();
437 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
438 acf.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
439 map = new MapList(new int[] { 1, 12 }, new int[] { 3, 6 }, 3, 1);
440 acf.addMap(seq1.getDatasetSequence(), seq3.getDatasetSequence(), map);
443 * map part of Seq3 to Seq2
445 map = new MapList(new int[] { 3, 6 }, new int[] { 1, 4 }, 1, 1);
446 acf.addMap(seq3.getDatasetSequence(), seq2.getDatasetSequence(), map);
449 * original case - locate mapped codon for protein position
451 assertArrayEquals(new int[] { 4, 6 },
452 acf.getMappedRegion(seq1, seq2, 2));
453 assertArrayEquals(new int[] { 7, 9 },
454 acf.getMappedRegion(seq1, seq3, 5));
455 assertNull(acf.getMappedRegion(seq1, seq3, 1));
458 * locate mapped protein for protein position
460 assertArrayEquals(new int[] { 4, 4 },
461 acf.getMappedRegion(seq3, seq2, 2));
464 * reverse location protein-to-protein
466 assertArrayEquals(new int[] { 2, 2 },
467 acf.getMappedRegion(seq2, seq3, 4));
470 * reverse location protein-from-nucleotide
471 * any of codon [4, 5, 6] positions map to seq2/2
473 assertArrayEquals(new int[] { 2, 2 },
474 acf.getMappedRegion(seq2, seq1, 4));
475 assertArrayEquals(new int[] { 2, 2 },
476 acf.getMappedRegion(seq2, seq1, 5));
477 assertArrayEquals(new int[] { 2, 2 },
478 acf.getMappedRegion(seq2, seq1, 6));
482 * Tests for addMap. See also tests for MapList.addMapList
484 @Test(groups = { "Functional" })
485 public void testAddMap()
487 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
488 seq1.createDatasetSequence();
489 final Sequence aseq1 = new Sequence("Seq1", "-V-L");
490 aseq1.createDatasetSequence();
492 AlignedCodonFrame acf = new AlignedCodonFrame();
493 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
495 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
496 assertEquals(1, acf.getMappingsFromSequence(seq1).size());
497 Mapping before = acf.getMappingsFromSequence(seq1).get(0);
500 * add the same map again, verify it doesn't get duplicated
502 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
503 assertEquals(1, acf.getMappingsFromSequence(seq1).size());
504 assertSame(before, acf.getMappingsFromSequence(seq1).get(0));
507 @Test(groups = { "Functional" })
508 public void testGetCoveringMapping()
510 SequenceI dna = new Sequence("dna", "acttcaATGGCGGACtaattt");
511 SequenceI cds = new Sequence("cds/7-15", "ATGGCGGAC");
512 cds.setDatasetSequence(dna);
513 SequenceI pep = new Sequence("pep", "MAD");
516 * with null argument or no mappings
518 AlignedCodonFrame acf = new AlignedCodonFrame();
519 assertNull(acf.getCoveringMapping(null, null));
520 assertNull(acf.getCoveringMapping(dna, null));
521 assertNull(acf.getCoveringMapping(null, pep));
522 assertNull(acf.getCoveringMapping(dna, pep));
525 * with a non-covering mapping e.g. overlapping exon
527 MapList map = new MapList(new int[] { 7, 9 }, new int[] {
529 acf.addMap(dna, pep, map);
530 assertNull(acf.getCoveringMapping(dna, pep));
532 acf = new AlignedCodonFrame();
533 MapList map2 = new MapList(new int[] { 13, 18 }, new int[] {
535 acf.addMap(dna, pep, map2);
536 assertNull(acf.getCoveringMapping(dna, pep));
539 * with a covering mapping from CDS (dataset) to protein
541 acf = new AlignedCodonFrame();
542 MapList map3 = new MapList(new int[] { 7, 15 }, new int[] {
544 acf.addMap(dna, pep, map3);
545 assertNull(acf.getCoveringMapping(dna, pep));
546 SequenceToSequenceMapping mapping = acf.getCoveringMapping(cds, pep);
547 assertNotNull(mapping);
550 * with a mapping that extends to stop codon
552 acf = new AlignedCodonFrame();
553 MapList map4 = new MapList(new int[] { 7, 18 }, new int[] {
555 acf.addMap(dna, pep, map4);
556 assertNull(acf.getCoveringMapping(dna, pep));
557 assertNull(acf.getCoveringMapping(cds, pep));
558 SequenceI cds2 = new Sequence("cds/7-18", "ATGGCGGACtaa");
559 cds2.setDatasetSequence(dna);
560 mapping = acf.getCoveringMapping(cds2, pep);
561 assertNotNull(mapping);
565 * Test the method that adds mapped positions to SearchResults
567 @Test(groups = { "Functional" })
568 public void testMarkMappedRegion()
570 // introns lower case, exons upper case
571 final Sequence dna1 = new Sequence("Seq1/10-18", "c-G-TA-gC-gT-T");
572 dna1.createDatasetSequence();
573 final Sequence dna2 = new Sequence("Seq2/20-28", "-TA-gG-Gg-CG-a");
574 dna2.createDatasetSequence();
576 final Sequence pep1 = new Sequence("Seq1/3-4", "-P-R");
577 pep1.createDatasetSequence();
578 final Sequence pep2 = new Sequence("Seq2/7-9", "-LY-Q");
579 pep2.createDatasetSequence();
582 * First with no mappings
584 AlignedCodonFrame acf = new AlignedCodonFrame();
585 SearchResults sr = new SearchResults();
586 acf.markMappedRegion(dna1, 12, sr);
587 assertTrue(sr.isEmpty());
590 * Set up the mappings for the exons (upper-case bases)
591 * Note residue Q is unmapped
593 MapList map1 = new MapList(new int[] { 11, 13, 15, 15, 17, 18 }, new int[] {
595 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map1);
596 MapList map2 = new MapList(new int[] { 20, 21, 23, 24, 26, 27 }, new int[] { 7, 8 },
598 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map2);
601 * intron bases are not mapped
603 acf.markMappedRegion(dna1, 10, sr);
604 assertTrue(sr.isEmpty());
609 acf.markMappedRegion(pep2, 9, sr);
610 assertTrue(sr.isEmpty());
613 * mark peptide position for exon position (of aligned sequence)
615 acf.markMappedRegion(dna1, 11, sr);
616 SearchResults expected = new SearchResults();
617 expected.addResult(pep1.getDatasetSequence(), 3, 3);
618 assertEquals(sr, expected);
621 * mark peptide position for exon position of dataset sequence - same result
623 sr = new SearchResults();
624 acf.markMappedRegion(dna1.getDatasetSequence(), 11, sr);
625 assertEquals(sr, expected);
628 * marking the same position a second time should not create a duplicate match
630 acf.markMappedRegion(dna1.getDatasetSequence(), 12, sr);
631 assertEquals(sr, expected);
634 * mark exon positions for peptide position (of aligned sequence)
636 sr = new SearchResults();
637 acf.markMappedRegion(pep2, 7, sr); // codon positions 20, 21, 23
638 expected = new SearchResults();
639 expected.addResult(dna2.getDatasetSequence(), 20, 21);
640 expected.addResult(dna2.getDatasetSequence(), 23, 23);
641 assertEquals(sr, expected);
644 * add another codon to the same SearchResults
646 acf.markMappedRegion(pep1.getDatasetSequence(), 4, sr); // codon positions 15, 17, 18
647 expected.addResult(dna1.getDatasetSequence(), 15, 15);
648 expected.addResult(dna1.getDatasetSequence(), 17, 18);
649 assertEquals(sr, expected);