1 package jalview.analysis;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertNotNull;
5 import static org.testng.AssertJUnit.assertTrue;
7 import jalview.api.AlignViewportI;
8 import jalview.datamodel.AlignedCodon;
9 import jalview.datamodel.Alignment;
10 import jalview.datamodel.AlignmentI;
11 import jalview.datamodel.ColumnSelection;
12 import jalview.datamodel.SequenceI;
13 import jalview.gui.AlignViewport;
14 import jalview.io.FormatAdapter;
16 import java.io.IOException;
18 import org.testng.annotations.Test;
23 // AA encoding codons as ordered on the Jalview help page Amino Acid Table
24 private static String fasta = ">B\n" + "GCT" + "GCC" + "GCA" + "GCG"
25 + "TGT" + "TGC" + "GAT" + "GAC" + "GAA" + "GAG" + "TTT" + "TTC"
26 + "GGT" + "GGC" + "GGA" + "GGG" + "CAT" + "CAC" + "ATT" + "ATC"
27 + "ATA" + "AAA" + "AAG" + "TTG" + "TTA" + "CTT" + "CTC" + "CTA"
28 + "CTG" + "ATG" + "AAT" + "AAC" + "CCT" + "CCC" + "CCA" + "CCG"
29 + "CAA" + "CAG" + "CGT" + "CGC" + "CGA" + "CGG" + "AGA" + "AGG"
30 + "TCT" + "TCC" + "TCA" + "TCG" + "AGT" + "AGC" + "ACT" + "ACC"
31 + "ACA" + "ACG" + "GTT" + "GTC" + "GTA" + "GTG" + "TGG" + "TAT"
32 + "TAC" + "TAA" + "TAG" + "TGA";
34 private static String JAL_1312_example_align_fasta = ">B.FR.83.HXB2_LAI_IIIB_BRU_K03455/45-306\n"
35 + "ATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
36 + "GGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACA\n"
37 + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTG\n"
38 + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAC\n"
39 + ">gi|27804621|gb|AY178912.1|/1-259\n"
40 + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n"
41 + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGGACA\n"
42 + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
43 + "TGTTCATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAT\n"
44 + ">gi|27804623|gb|AY178913.1|/1-259\n"
45 + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n"
46 + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGAACA\n"
47 + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
48 + "TGTTCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n"
49 + ">gi|27804627|gb|AY178915.1|/1-260\n"
50 + "-TGGGAAAA-ATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
51 + "GGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGTTGTAGACAAATATTGGGACA\n"
52 + "GCTACAACCATCCCTTGAGACAGGATCAGAAGAACTTAAATCATTATWTAATACCATAGCAGTCCTCTATTG\n"
53 + "TGTACATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAG\n"
54 + ">gi|27804631|gb|AY178917.1|/1-261\n"
55 + "-TGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
56 + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACACCAGAAGGCTGTAGACAAATACTGGGACA\n"
57 + "GCTACAACCGTCCCTTCAGACAGGATCGGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
58 + "TGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAGGCTTTAGAC\n"
59 + ">gi|27804635|gb|AY178919.1|/1-261\n"
60 + "-TGGGAGAGAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAAATTGAAACATATAGTATGGGCAGGCAG\n"
61 + "AGAGCTAGATCGATTCGCAGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAGATATTGGGACA\n"
62 + "GCTACAACCGTCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
63 + "TGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n"
64 + ">gi|27804641|gb|AY178922.1|/1-261\n"
65 + "-TGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
66 + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAAATACTGGGACA\n"
67 + "GTTACACCCATCCCTTCATACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
68 + "TGTGCATCAAAGGATAGAAGTAAAAGACACCAAGGAAGCTTTAGAC\n"
69 + ">gi|27804647|gb|AY178925.1|/1-261\n"
70 + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATGTAGTATGGGCAAGCAG\n"
71 + "GGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n"
72 + "GCTACAACCATCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTG\n"
73 + "TGTACATCAAAGAATAGATGTAAAAGACACCAAGGAAGCTCTAGAA\n"
74 + ">gi|27804649|gb|AY178926.1|/1-261\n"
75 + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
76 + "GGAGCTAGAACGATTCGCGGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAACTACTGGGACA\n"
77 + "GTTACAACCATCCCTTCAGACAGGATCAGAAGAACTCAAATCATTATATAATACAATAGCAACCCTCTATTG\n"
78 + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCCTTAGAT\n"
79 + ">gi|27804653|gb|AY178928.1|/1-261\n"
80 + "-TGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
81 + "GGAGCTAGACCGATTCGCACTTAACCCCGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n"
82 + "GCTACAATCGTCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATATAATACAGTAGCAGTCCTCTATTG\n"
83 + "TGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCCTTAGAC\n"
84 + ">gi|27804659|gb|AY178931.1|/1-261\n"
85 + "-TGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
86 + "GGAGCTAGAACGATTYGCAGTTAATCCTGGCCTTTTAGAAACAGCAGAAGGCTGTAGACAAATACTGGGACA\n"
87 + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
88 + "TGTACATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAA\n";
92 * Corner case for this test is the presence of codons after codons that were
97 @Test(groups = { "Functional" })
98 public void testTranslateCdna_withUntranslatableCodons()
101 AlignmentI alf = new FormatAdapter().readFile(
102 JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE,
104 ColumnSelection cs = new ColumnSelection();
105 AlignViewportI av = new AlignViewport(alf, cs);
106 Dna dna = new Dna(av, new int[] { 0, alf.getWidth() - 1 });
107 AlignmentI translated = dna.translateCdna();
108 assertNotNull("Couldn't do a full width translation of test data.",
113 * Test variant in which 15 column blocks at a time are translated (the rest
116 * @throws IOException
118 @Test(groups = { "Functional" })
119 public void testTranslateCdna_withUntranslatableCodonsAndHiddenColumns()
122 AlignmentI alf = new FormatAdapter().readFile(
123 JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE,
126 for (int ipos = 0; ipos + vwidth < alf.getWidth(); ipos += vwidth)
128 ColumnSelection cs = new ColumnSelection();
131 cs.hideColumns(0, ipos - 1);
133 cs.hideColumns(ipos + vwidth, alf.getWidth());
134 int[] vcontigs = cs.getVisibleContigs(0, alf.getWidth());
135 AlignViewportI av = new AlignViewport(alf, cs);
136 Dna dna = new Dna(av, vcontigs);
137 AlignmentI transAlf = dna.translateCdna();
139 assertTrue("Translation failed (ipos=" + ipos
140 + ") No alignment data.", transAlf != null);
141 assertTrue("Translation failed (ipos=" + ipos + ") Empty alignment.",
142 transAlf.getHeight() > 0);
143 assertTrue("Translation failed (ipos=" + ipos + ") Translated "
144 + transAlf.getHeight() + " sequences from " + alf.getHeight()
145 + " sequences", alf.getHeight() == transAlf.getHeight());
150 * Test simple translation to Amino Acids (with STOP codons translated to *).
152 * @throws IOException
154 @Test(groups = { "Functional" })
155 public void testTranslateCdna_simple() throws IOException
157 AlignmentI alf = new FormatAdapter().readFile(fasta,
158 FormatAdapter.PASTE, "FASTA");
159 ColumnSelection cs = new ColumnSelection();
160 AlignViewportI av = new AlignViewport(alf, cs);
161 Dna dna = new Dna(av, new int[] { 0, alf.getWidth() - 1 });
162 AlignmentI translated = dna.translateCdna();
163 String aa = translated.getSequenceAt(0).getSequenceAsString();
165 "AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY***",
170 * Test translation excluding hidden columns.
172 * @throws IOException
174 @Test(groups = { "Functional" })
175 public void testTranslateCdna_hiddenColumns() throws IOException
177 AlignmentI alf = new FormatAdapter().readFile(fasta,
178 FormatAdapter.PASTE, "FASTA");
179 ColumnSelection cs = new jalview.datamodel.ColumnSelection();
180 cs.hideColumns(6, 14); // hide codons 3/4/5
181 cs.hideColumns(24, 35); // hide codons 9-12
182 cs.hideColumns(177, 191); // hide codons 60-64
183 AlignViewportI av = new AlignViewport(alf, cs);
184 Dna dna = new Dna(av, new int[] { 0, alf.getWidth() - 1 });
185 AlignmentI translated = dna.translateCdna();
186 String aa = translated.getSequenceAt(0).getSequenceAsString();
187 assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa);
191 * Use this test to help debug into any cases of interest.
193 @Test(groups = { "Functional" })
194 public void testCompareCodonPos_oneOnly()
196 assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
200 * Tests for method that compares 'alignment' of two codon position triplets.
202 @Test(groups = { "Functional" })
203 public void testCompareCodonPos()
206 * Returns 0 for any null argument
208 assertEquals(0, Dna.compareCodonPos(new AlignedCodon(1, 2, 3), null));
209 assertEquals(0, Dna.compareCodonPos(null, new AlignedCodon(1, 2, 3)));
212 * Work through 27 combinations. First 9 cases where first position matches.
214 assertMatches("AAA", "GGG"); // 2 and 3 match
215 assertFollows("AA-A", "GGG"); // 2 matches, 3 shifted seq1
216 assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2
217 assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches
218 assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1
219 assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2
220 assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches
221 assertFollows("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1
222 assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2
225 * 9 cases where first position is shifted in first sequence.
227 assertFollows("-AAA", "G-GG"); // 2 and 3 match
228 assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1
229 // 'enclosing' case: pick first to start precedes
230 assertFollows("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2
231 assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches
232 assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1
233 // 'enclosing' case: pick first to start precedes
234 assertFollows("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2
235 assertFollows("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches
236 assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
237 assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2
240 * 9 cases where first position is shifted in second sequence.
242 assertPrecedes("A-AA", "-GGG"); // 2 and 3 match
243 assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1
244 assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2
245 assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches
246 // 'enclosing' case with middle base deciding:
247 assertFollows("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1
248 assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2
249 assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches
250 assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1
251 assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2
255 * This test generates a random cDNA alignment and its translation, then
256 * reorders the cDNA and retranslates, and verifies that the translations are
257 * the same (apart from ordering).
259 @Test(groups = { "Functional" })
260 public void testTranslateCdna_sequenceOrderIndependent()
263 * Generate cDNA - 8 sequences of 12 bases each.
265 AlignmentI cdna = new DnaAlignmentGenerator().generate(12, 8, 97, 5, 5);
266 ColumnSelection cs = new ColumnSelection();
267 AlignViewportI av = new AlignViewport(cdna, cs);
268 Dna dna = new Dna(av, new int[] { 0, cdna.getWidth() - 1 });
269 AlignmentI translated = dna.translateCdna();
272 * Jumble the cDNA sequences and translate.
274 SequenceI[] sorted = new SequenceI[cdna.getHeight()];
275 final int[] jumbler = new int[] { 6, 7, 3, 4, 2, 0, 1, 5 };
277 for (int i : jumbler)
279 sorted[seqNo++] = cdna.getSequenceAt(i);
281 AlignmentI cdnaReordered = new Alignment(sorted);
282 av = new AlignViewport(cdnaReordered, cs);
283 dna = new Dna(av, new int[] { 0, cdna.getWidth() - 1 });
284 AlignmentI translated2 = dna.translateCdna();
287 * Check translated sequences are the same in both alignments.
289 System.out.println("Original");
290 System.out.println(translated.toString());
291 System.out.println("Sorted");
292 System.out.println(translated2.toString());
294 int sortedSequenceIndex = 0;
295 for (int originalSequenceIndex : jumbler)
297 final String translation1 = translated.getSequenceAt(
298 originalSequenceIndex).getSequenceAsString();
299 final String translation2 = translated2.getSequenceAt(
300 sortedSequenceIndex).getSequenceAsString();
301 assertEquals(translation2, translation1);
302 sortedSequenceIndex++;
307 * Test that all the cases in testCompareCodonPos have a 'symmetric'
308 * comparison (without checking the actual comparison result).
310 @Test(groups = { "Functional" })
311 public void testCompareCodonPos_isSymmetric()
313 assertSymmetric("AAA", "GGG");
314 assertSymmetric("AA-A", "GGG");
315 assertSymmetric("AAA", "GG-G");
316 assertSymmetric("A-AA", "GG-G");
317 assertSymmetric("A-A-A", "GG-G");
318 assertSymmetric("A-AA", "GG--G");
319 assertSymmetric("AA-A", "G-GG");
320 assertSymmetric("AA--A", "G-GG");
321 assertSymmetric("AAA", "G-GG");
322 assertSymmetric("-AAA", "G-GG");
323 assertSymmetric("-AA-A", "G-GG");
324 assertSymmetric("-AAA", "G-G-G");
325 assertSymmetric("-A-AA", "G-G-G");
326 assertSymmetric("-A-A-A", "G-G-G");
327 assertSymmetric("-A-AA", "G-G--G");
328 assertSymmetric("-AA-A", "G--GG");
329 assertSymmetric("-AA--A", "G--GG");
330 assertSymmetric("-AAA", "G--GG");
331 assertSymmetric("A-AA", "-GGG");
332 assertSymmetric("A-A-A", "-GGG");
333 assertSymmetric("A-AA", "-GG-G");
334 assertSymmetric("A--AA", "-GG-G");
335 assertSymmetric("A--AA", "-GGG");
336 assertSymmetric("A--AA", "-GG--G");
337 assertSymmetric("AA-A", "-GGG");
338 assertSymmetric("AA--A", "-GGG");
339 assertSymmetric("AAA", "-GGG");
342 private void assertSymmetric(String codon1, String codon2)
344 assertEquals("Comparison of '" + codon1 + "' and '" + codon2
345 + " not symmetric", Integer.signum(compare(codon1, codon2)),
346 -Integer.signum(compare(codon2, codon1)));
350 * Assert that the first sequence should map to the same position as the
351 * second in a translated alignment. Also checks that this is true if the
352 * order of the codons is reversed.
357 private void assertMatches(String codon1, String codon2)
359 assertEquals("Expected '" + codon1 + "' matches '" + codon2 + "'", 0,
360 compare(codon1, codon2));
361 assertEquals("Expected '" + codon2 + "' matches '" + codon1 + "'", 0,
362 compare(codon2, codon1));
366 * Assert that the first sequence should precede the second in a translated
372 private void assertPrecedes(String codon1, String codon2)
374 assertEquals("Expected '" + codon1 + "' precedes '" + codon2 + "'",
375 -1, compare(codon1, codon2));
379 * Assert that the first sequence should follow the second in a translated
385 private void assertFollows(String codon1, String codon2)
387 assertEquals("Expected '" + codon1 + "' follows '" + codon2 + "'", 1,
388 compare(codon1, codon2));
392 * Convert two nucleotide strings to base positions and pass to
393 * Dna.compareCodonPos, return the result.
399 private int compare(String s1, String s2)
401 final AlignedCodon cd1 = convertCodon(s1);
402 final AlignedCodon cd2 = convertCodon(s2);
403 System.out.println("K: " + s1 + " " + cd1.toString());
404 System.out.println("G: " + s2 + " " + cd2.toString());
405 System.out.println();
406 return Dna.compareCodonPos(cd1, cd2);
410 * Convert a string e.g. "-GC-T" to base positions e.g. [1, 2, 4]. The string
411 * should have exactly 3 non-gap characters, and use '-' for gaps.
416 private AlignedCodon convertCodon(String s)
418 int[] codon = new int[3];
420 for (int j = 0; j < s.length(); j++)
422 if (s.charAt(j) != '-')
427 return new AlignedCodon(codon[0], codon[1], codon[2]);
431 * Weirdly, maybe worth a test to prove the helper method of this test class.
433 @Test(groups = { "Functional" })
434 public void testConvertCodon()
436 assertEquals("[0, 1, 2]", convertCodon("AAA").toString());
437 assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString());
438 assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString());