2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertSame;
27 import static org.testng.AssertJUnit.assertTrue;
28 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
30 import jalview.util.MapList;
32 import java.util.Arrays;
33 import java.util.List;
35 import org.testng.annotations.Test;
37 public class AlignedCodonFrameTest
41 * Test the method that locates the first aligned sequence that has a mapping.
43 @Test(groups = { "Functional" })
44 public void testFindAlignedSequence()
46 AlignmentI cdna = new Alignment(new SequenceI[] {});
47 final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC");
48 seq1.createDatasetSequence();
49 cdna.addSequence(seq1);
50 final Sequence seq2 = new Sequence("Seq2", "-TA-GG-GG");
51 seq2.createDatasetSequence();
52 cdna.addSequence(seq2);
54 AlignmentI aa = new Alignment(new SequenceI[] {});
55 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
56 aseq1.createDatasetSequence();
57 aa.addSequence(aseq1);
58 final Sequence aseq2 = new Sequence("Seq2", "-LY-");
59 aseq2.createDatasetSequence();
60 aa.addSequence(aseq2);
63 * Mapping from first DNA sequence to second AA sequence.
65 AlignedCodonFrame acf = new AlignedCodonFrame();
67 assertNull(acf.findAlignedSequence(seq1, aa));
69 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
70 acf.addMap(seq1.getDatasetSequence(), aseq2.getDatasetSequence(), map);
73 * DNA seq1 maps to AA seq2
75 assertEquals(aa.getSequenceAt(1), acf.findAlignedSequence(cdna
76 .getSequenceAt(0).getDatasetSequence(), aa));
77 // can also find this from the dna aligned sequence
78 assertEquals(aa.getSequenceAt(1),
79 acf.findAlignedSequence(cdna.getSequenceAt(0), aa));
81 assertEquals(cdna.getSequenceAt(0), acf.findAlignedSequence(aa
82 .getSequenceAt(1).getDatasetSequence(), cdna));
86 * Test the method that locates the mapped codon for a protein position.
88 @Test(groups = { "Functional" })
89 public void testGetMappedRegion()
91 // introns lower case, exons upper case
92 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
93 seq1.createDatasetSequence();
94 final Sequence seq2 = new Sequence("Seq2", "-TA-gG-Gg-CG-a");
95 seq2.createDatasetSequence();
97 final Sequence aseq1 = new Sequence("Seq1", "-P-R");
98 aseq1.createDatasetSequence();
99 final Sequence aseq2 = new Sequence("Seq2", "-LY-Q");
100 aseq2.createDatasetSequence();
103 * First with no mappings
105 AlignedCodonFrame acf = new AlignedCodonFrame();
107 assertNull(acf.getMappedRegion(seq1, aseq1, 1));
110 * Set up the mappings for the exons (upper-case bases)
111 * Note residue Q is unmapped
113 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
115 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
116 map = new MapList(new int[] { 1, 2, 4, 5, 7, 8 }, new int[] { 1, 2 },
118 acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map);
120 assertArrayEquals(new int[] { 2, 4 },
121 acf.getMappedRegion(seq1, aseq1, 1));
122 assertArrayEquals(new int[] { 6, 6, 8, 9 },
123 acf.getMappedRegion(seq1, aseq1, 2));
124 assertArrayEquals(new int[] { 1, 2, 4, 4 },
125 acf.getMappedRegion(seq2, aseq2, 1));
126 assertArrayEquals(new int[] { 5, 5, 7, 8 },
127 acf.getMappedRegion(seq2, aseq2, 2));
130 * No mapping from seq2 to Q
132 assertNull(acf.getMappedRegion(seq2, aseq2, 3));
135 * No mapping from sequence 1 to sequence 2
137 assertNull(acf.getMappedRegion(seq1, aseq2, 1));
140 @Test(groups = { "Functional" })
141 public void testGetMappedCodons()
143 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
144 seq1.createDatasetSequence();
145 final Sequence aseq1 = new Sequence("Seq1", "-V-L");
146 aseq1.createDatasetSequence();
149 * First with no mappings
151 AlignedCodonFrame acf = new AlignedCodonFrame();
153 assertNull(acf.getMappedCodons(seq1.getDatasetSequence(), 0));
156 * Set up the mappings for the exons (upper-case bases)
158 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
160 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
162 assertEquals(1, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
166 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
170 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
175 * Test for the case where there is more than one variant of the DNA mapping
176 * to a protein sequence
178 @Test(groups = { "Functional" })
179 public void testGetMappedCodons_dnaVariants()
181 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
182 seq1.createDatasetSequence();
183 final Sequence seq2 = new Sequence("Seq2", "c-G-TT-gT-gT-A");
184 seq2.createDatasetSequence();
185 final Sequence aseq1 = new Sequence("Seq1", "-V-L");
186 aseq1.createDatasetSequence();
188 AlignedCodonFrame acf = new AlignedCodonFrame();
191 * Set up the mappings for the exons (upper-case bases)
193 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
195 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
196 acf.addMap(seq2.getDatasetSequence(), aseq1.getDatasetSequence(), map);
198 assertEquals(2, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
200 List<char[]> codonsForV = acf.getMappedCodons(
201 aseq1.getDatasetSequence(), 1);
202 assertEquals("[G, T, A]", Arrays.toString(codonsForV.get(0)));
203 assertEquals("[G, T, T]", Arrays.toString(codonsForV.get(1)));
204 List<char[]> codonsForL = acf.getMappedCodons(
205 aseq1.getDatasetSequence(), 2);
206 assertEquals("[C, T, T]", Arrays.toString(codonsForL.get(0)));
207 assertEquals("[T, T, A]", Arrays.toString(codonsForL.get(1)));
211 * Test for the case where sequences have start > 1
213 @Test(groups = { "Functional" })
214 public void testGetMappedCodons_forSubSequences()
216 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T", 27, 35);
217 seq1.createDatasetSequence();
219 final Sequence aseq1 = new Sequence("Seq1", "-V-L", 12, 13);
220 aseq1.createDatasetSequence();
223 * Set up the mappings for the exons (upper-case bases)
225 AlignedCodonFrame acf = new AlignedCodonFrame();
226 MapList map = new MapList(new int[] { 28, 30, 32, 32, 34, 35 },
227 new int[] { 12, 13 }, 3, 1);
228 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
232 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
236 Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
240 @Test(groups = { "Functional" })
241 public void testCouldReplaceSequence()
243 SequenceI seq1 = new Sequence("Seq1/10-21", "aaacccgggttt");
244 SequenceI seq1proxy = new SequenceDummy("Seq1");
246 // map to region within sequence is ok
247 assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
249 // map to region overlapping sequence is ok
250 assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 5,
252 assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 21,
254 // map to region before sequence is not ok
255 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 4,
257 // map to region after sequence is not ok
258 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 22,
262 * test should fail if name doesn't match
264 seq1proxy.setName("Seq1a");
265 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
267 seq1proxy.setName("Seq1");
268 seq1.setName("Seq1a");
269 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
273 * a dummy sequence can't replace a real one
275 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1proxy, 12,
279 * a dummy sequence can't replace a dummy sequence
281 SequenceI seq1proxy2 = new SequenceDummy("Seq1");
282 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy,
283 seq1proxy2, 12, 17));
286 * a real sequence can't replace a real one
288 SequenceI seq1a = new Sequence("Seq1/10-21", "aaacccgggttt");
289 assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1a, 12, 17));
293 * Tests for the method that tests whether any mapping to a dummy sequence can
294 * be 'realised' to a given real sequence
296 @Test(groups = { "Functional" })
297 public void testIsRealisableWith()
299 SequenceI seq1 = new Sequence("Seq1", "tttaaaCCCGGGtttaaa");
300 SequenceI seq2 = new Sequence("Seq2", "PG");
301 SequenceI seq1proxy = new SequenceDummy("Seq1");
302 seq1.createDatasetSequence();
303 seq2.createDatasetSequence();
304 MapList mapList = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
306 AlignedCodonFrame acf = new AlignedCodonFrame();
307 acf.addMap(seq1proxy, seq2, mapList);
310 * Seq2 is mapped to SequenceDummy seq1proxy bases 4-9
311 * This is 'realisable' from real sequence Seq1
313 assertTrue(acf.isRealisableWith(seq1));
316 * test should fail if name doesn't match
318 seq1proxy.setName("Seq1a");
319 assertFalse(acf.isRealisableWith(seq1));
320 seq1proxy.setName("Seq1");
322 SequenceI seq1ds = seq1.getDatasetSequence();
323 seq1ds.setName("Seq1a");
324 assertFalse(acf.isRealisableWith(seq1));
325 seq1ds.setName("Seq1");
328 * test should fail if no sequence overlap with mapping of bases 7-12
329 * use artificial start/end values to test this
333 // seq1 precedes mapped region:
334 assertFalse(acf.isRealisableWith(seq1));
336 // seq1 includes first mapped base:
337 assertTrue(acf.isRealisableWith(seq1));
340 // seq1 follows mapped region:
341 assertFalse(acf.isRealisableWith(seq1));
343 // seq1 includes last mapped base:
344 assertTrue(acf.isRealisableWith(seq1));
348 * Tests for the method that converts mappings to a dummy sequence to mappings
349 * to a compatible real sequence
351 @Test(groups = { "Functional" })
352 public void testRealiseWith()
354 SequenceI seq1 = new Sequence("Seq1", "tttCAACCCGGGtttaaa");
355 SequenceI seq2 = new Sequence("Seq2", "QPG");
356 SequenceI seq2a = new Sequence("Seq2a", "QPG");
357 SequenceI seq1proxy = new SequenceDummy("Seq1");
358 seq1.createDatasetSequence();
359 seq2.createDatasetSequence();
360 seq2a.createDatasetSequence();
363 * Make mappings from Seq2 and Seq2a peptides to dummy sequence Seq1
365 AlignedCodonFrame acf = new AlignedCodonFrame();
367 // map PG to codons 7-12 (CCCGGG)
368 MapList mapping1 = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
370 acf.addMap(seq1proxy, seq2, mapping1);
371 acf.addMap(seq1proxy, seq2a, mapping1);
373 // map QP to codons 4-9 (CAACCC)
374 MapList mapping2 = new MapList(new int[] { 4, 9 }, new int[] { 1, 2 },
376 acf.addMap(seq1proxy, seq2, mapping2);
377 acf.addMap(seq1proxy, seq2a, mapping2);
380 * acf now has two mappings one from Seq1 to Seq2, one from Seq1 to Seq2a
382 assertEquals(2, acf.getdnaSeqs().length);
383 assertSame(seq1proxy, acf.getdnaSeqs()[0]);
384 assertSame(seq1proxy, acf.getdnaSeqs()[1]);
385 assertEquals(2, acf.getProtMappings().length);
387 // 'realise' these mappings with the compatible sequence seq1
388 // two mappings should be updated:
389 assertEquals(2, acf.realiseWith(seq1));
390 assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[0]);
391 assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[1]);
395 * Test the method that locates the mapped codon for a protein position.
397 @Test(groups = { "Functional" })
398 public void testGetMappedRegion_eitherWay()
400 final Sequence seq1 = new Sequence("Seq1", "AAACCCGGGTTT");
401 seq1.createDatasetSequence();
402 final Sequence seq2 = new Sequence("Seq2", "KPGF");
403 seq2.createDatasetSequence();
404 final Sequence seq3 = new Sequence("Seq3", "QYKPGFSW");
405 seq3.createDatasetSequence();
408 * map Seq1 to all of Seq2 and part of Seq3
410 AlignedCodonFrame acf = new AlignedCodonFrame();
411 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
412 acf.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
413 map = new MapList(new int[] { 1, 12 }, new int[] { 3, 6 }, 3, 1);
414 acf.addMap(seq1.getDatasetSequence(), seq3.getDatasetSequence(), map);
417 * map part of Seq3 to Seq2
419 map = new MapList(new int[] { 3, 6 }, new int[] { 1, 4 }, 1, 1);
420 acf.addMap(seq3.getDatasetSequence(), seq2.getDatasetSequence(), map);
423 * original case - locate mapped codon for protein position
425 assertArrayEquals(new int[] { 4, 6 },
426 acf.getMappedRegion(seq1, seq2, 2));
427 assertArrayEquals(new int[] { 7, 9 },
428 acf.getMappedRegion(seq1, seq3, 5));
429 assertNull(acf.getMappedRegion(seq1, seq3, 1));
432 * locate mapped protein for protein position
434 assertArrayEquals(new int[] { 4, 4 },
435 acf.getMappedRegion(seq3, seq2, 2));
438 * reverse location protein-to-protein
440 assertArrayEquals(new int[] { 2, 2 },
441 acf.getMappedRegion(seq2, seq3, 4));
444 * reverse location protein-from-nucleotide
445 * any of codon [4, 5, 6] positions map to seq2/2
447 assertArrayEquals(new int[] { 2, 2 },
448 acf.getMappedRegion(seq2, seq1, 4));
449 assertArrayEquals(new int[] { 2, 2 },
450 acf.getMappedRegion(seq2, seq1, 5));
451 assertArrayEquals(new int[] { 2, 2 },
452 acf.getMappedRegion(seq2, seq1, 6));
456 * Tests for addMap. See also tests for MapList.addMapList
458 @Test(groups = { "Functional" })
459 public void testAddMap()
461 final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
462 seq1.createDatasetSequence();
463 final Sequence aseq1 = new Sequence("Seq1", "-V-L");
464 aseq1.createDatasetSequence();
466 AlignedCodonFrame acf = new AlignedCodonFrame();
467 MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
469 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
470 assertEquals(1, acf.getMappingsFromSequence(seq1).size());
471 Mapping before = acf.getMappingsFromSequence(seq1).get(0);
474 * add the same map again, verify it doesn't get duplicated
476 acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
477 assertEquals(1, acf.getMappingsFromSequence(seq1).size());
478 assertSame(before, acf.getMappingsFromSequence(seq1).get(0));