JAL-1619 refactoring wip
[jalview.git] / test / jalview / analysis / DnaTest.java
1 package jalview.analysis;
2
3 import static org.junit.Assert.assertEquals;
4 import static org.junit.Assert.assertNotNull;
5 import static org.junit.Assert.assertTrue;
6 import jalview.api.AlignViewportI;
7 import jalview.datamodel.AlignmentI;
8 import jalview.datamodel.ColumnSelection;
9 import jalview.gui.AlignViewport;
10 import jalview.io.FormatAdapter;
11
12 import java.io.IOException;
13 import java.util.Arrays;
14
15 import org.junit.Test;
16
17 public class DnaTest
18 {
19   // @formatter:off
20   // AA encoding codons as ordered on the Jalview help page Amino Acid Table
21   private static String fasta = ">B\n" + "GCT" + "GCC" + "GCA" + "GCG"
22           + "TGT" + "TGC" + "GAT" + "GAC" + "GAA" + "GAG" + "TTT" + "TTC"
23           + "GGT" + "GGC" + "GGA" + "GGG" + "CAT" + "CAC" + "ATT" + "ATC"
24           + "ATA" + "AAA" + "AAG" + "TTG" + "TTA" + "CTT" + "CTC" + "CTA"
25           + "CTG" + "ATG" + "AAT" + "AAC" + "CCT" + "CCC" + "CCA" + "CCG"
26           + "CAA" + "CAG" + "CGT" + "CGC" + "CGA" + "CGG" + "AGA" + "AGG"
27           + "TCT" + "TCC" + "TCA" + "TCG" + "AGT" + "AGC" + "ACT" + "ACC"
28           + "ACA" + "ACG" + "GTT" + "GTC" + "GTA" + "GTG" + "TGG" + "TAT"
29           + "TAC" + "TAA" + "TAG" + "TGA";
30
31   private static String JAL_1312_example_align_fasta = ">B.FR.83.HXB2_LAI_IIIB_BRU_K03455/45-306\n"
32           + "ATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
33           + "GGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACA\n"
34           + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTG\n"
35           + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAC\n"
36           + ">gi|27804621|gb|AY178912.1|/1-259\n"
37           + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n"
38           + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGGACA\n"
39           + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
40           + "TGTTCATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAT\n"
41           + ">gi|27804623|gb|AY178913.1|/1-259\n"
42           + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n"
43           + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGAACA\n"
44           + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
45           + "TGTTCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n"
46           + ">gi|27804627|gb|AY178915.1|/1-260\n"
47           + "-TGGGAAAA-ATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
48           + "GGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGTTGTAGACAAATATTGGGACA\n"
49           + "GCTACAACCATCCCTTGAGACAGGATCAGAAGAACTTAAATCATTATWTAATACCATAGCAGTCCTCTATTG\n"
50           + "TGTACATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAG\n"
51           + ">gi|27804631|gb|AY178917.1|/1-261\n"
52           + "-TGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
53           + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACACCAGAAGGCTGTAGACAAATACTGGGACA\n"
54           + "GCTACAACCGTCCCTTCAGACAGGATCGGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
55           + "TGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAGGCTTTAGAC\n"
56           + ">gi|27804635|gb|AY178919.1|/1-261\n"
57           + "-TGGGAGAGAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAAATTGAAACATATAGTATGGGCAGGCAG\n"
58           + "AGAGCTAGATCGATTCGCAGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAGATATTGGGACA\n"
59           + "GCTACAACCGTCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
60           + "TGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n"
61           + ">gi|27804641|gb|AY178922.1|/1-261\n"
62           + "-TGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
63           + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAAATACTGGGACA\n"
64           + "GTTACACCCATCCCTTCATACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
65           + "TGTGCATCAAAGGATAGAAGTAAAAGACACCAAGGAAGCTTTAGAC\n"
66           + ">gi|27804647|gb|AY178925.1|/1-261\n"
67           + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATGTAGTATGGGCAAGCAG\n"
68           + "GGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n"
69           + "GCTACAACCATCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTG\n"
70           + "TGTACATCAAAGAATAGATGTAAAAGACACCAAGGAAGCTCTAGAA\n"
71           + ">gi|27804649|gb|AY178926.1|/1-261\n"
72           + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
73           + "GGAGCTAGAACGATTCGCGGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAACTACTGGGACA\n"
74           + "GTTACAACCATCCCTTCAGACAGGATCAGAAGAACTCAAATCATTATATAATACAATAGCAACCCTCTATTG\n"
75           + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCCTTAGAT\n"
76           + ">gi|27804653|gb|AY178928.1|/1-261\n"
77           + "-TGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
78           + "GGAGCTAGACCGATTCGCACTTAACCCCGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n"
79           + "GCTACAATCGTCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATATAATACAGTAGCAGTCCTCTATTG\n"
80           + "TGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCCTTAGAC\n"
81           + ">gi|27804659|gb|AY178931.1|/1-261\n"
82           + "-TGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
83           + "GGAGCTAGAACGATTYGCAGTTAATCCTGGCCTTTTAGAAACAGCAGAAGGCTGTAGACAAATACTGGGACA\n"
84           + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
85           + "TGTACATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAA\n";
86   // @formatter:on
87
88   /**
89    * Corner case for this test is the presence of codons after codons that were
90    * not translated.
91    * 
92    * @throws IOException
93    */
94   @Test
95   public void testTranslateCdna_withUntranslatableCodons()
96           throws IOException
97   {
98     AlignmentI alf = new FormatAdapter().readFile(
99             JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE,
100             "FASTA");
101     ColumnSelection cs = new ColumnSelection();
102     AlignViewportI av = new AlignViewport(alf, cs);
103     Dna dna = new Dna(av, new int[]
104     { 0, alf.getWidth() - 1 });
105     AlignmentI translated = dna.translateCdna();
106     assertNotNull("Couldn't do a full width translation of test data.",
107             translated);
108   }
109
110   /**
111    * Test variant in which 15 column blocks at a time are translated (the rest
112    * hidden).
113    * 
114    * @throws IOException
115    */
116   @Test
117   public void testTranslateCdna_withUntranslatableCodonsAndHiddenColumns()
118           throws IOException
119   {
120     AlignmentI alf = new FormatAdapter().readFile(
121             JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE,
122             "FASTA");
123     int vwidth = 15;
124     for (int ipos = 0; ipos + vwidth < alf.getWidth(); ipos += vwidth)
125     {
126       ColumnSelection cs = new ColumnSelection();
127       if (ipos > 0)
128       {
129         cs.hideColumns(0, ipos - 1);
130       }
131       cs.hideColumns(ipos + vwidth, alf.getWidth());
132       int[] vcontigs = cs.getVisibleContigs(0, alf.getWidth());
133       AlignViewportI av = new AlignViewport(alf, cs);
134       Dna dna = new Dna(av, vcontigs);
135       AlignmentI transAlf = dna.translateCdna();
136
137       assertTrue("Translation failed (ipos=" + ipos
138               + ") No alignment data.", transAlf != null);
139       assertTrue("Translation failed (ipos=" + ipos + ") Empty alignment.",
140               transAlf.getHeight() > 0);
141       assertTrue("Translation failed (ipos=" + ipos + ") Translated "
142               + transAlf.getHeight() + " sequences from " + alf.getHeight()
143               + " sequences", alf.getHeight() == transAlf.getHeight());
144     }
145   }
146
147   /**
148    * Test simple translation to Amino Acids (with STOP codons translated to X).
149    * 
150    * @throws IOException
151    */
152   @Test
153   public void testTranslateCdna_simple() throws IOException
154   {
155     AlignmentI alf = new FormatAdapter().readFile(fasta,
156             FormatAdapter.PASTE, "FASTA");
157     ColumnSelection cs = new ColumnSelection();
158     AlignViewportI av = new AlignViewport(alf, cs);
159     Dna dna = new Dna(av, new int[]
160     { 0, alf.getWidth() - 1 });
161     AlignmentI translated = dna.translateCdna();
162     String aa = translated.getSequenceAt(0).getSequenceAsString();
163     assertEquals(
164             "AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYYXXX",
165             aa);
166   }
167
168   /**
169    * Test translation excluding hidden columns.
170    * 
171    * @throws IOException
172    */
173   @Test
174   public void testTranslatCdna_hiddenColumns() throws IOException
175   {
176     AlignmentI alf = new FormatAdapter().readFile(fasta,
177             FormatAdapter.PASTE, "FASTA");
178     ColumnSelection cs = new jalview.datamodel.ColumnSelection();
179     cs.hideColumns(6, 14); // hide codons 3/4/5
180     cs.hideColumns(24, 35); // hide codons 9-12
181     cs.hideColumns(177, 191); // hide codons 60-64
182     AlignViewportI av = new AlignViewport(alf, cs);
183     Dna dna = new Dna(av, new int[]
184     { 0, alf.getWidth() - 1 });
185     AlignmentI translated = dna.translateCdna();
186     String aa = translated.getSequenceAt(0).getSequenceAsString();
187     assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa);
188   }
189
190   /**
191    * Tests for method that compares 'alignment' of two codon position triplets.
192    */
193   @Test
194   public void testCompareCodonPos()
195   {
196     /*
197      * Returns 0 for any null argument
198      */
199     assertEquals(0, Dna.compareCodonPos(new int[]
200       { 1, 2, 3 }, null));
201     assertEquals(0, Dna.compareCodonPos(null, new int[]
202       { 1, 2, 3 }));
203
204     /*
205      * Work through 27 combinations. First 9 cases where first position matches.
206      */
207     assertMatches("AAA", "GGG"); // 2 and 3 match
208     assertFollows("AA-A", "GGG"); // 2 matches, 3 shifted seq1
209     assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2
210     assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches
211     assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1
212     // TODO is this right?
213     assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2
214     assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches
215     assertPrecedes("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1
216     assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2
217
218     /*
219      * 9 cases where first position is shifted in first sequence.
220      */
221     assertFollows("-AAA", "G-GG"); // 2 and 3 match
222     assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1
223     assertPrecedes("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2
224     assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches
225     assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1
226     // is this right? codon2 ends after codon1
227     assertPrecedes("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2
228     assertPrecedes("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches
229     assertPrecedes("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
230     assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2
231
232     /*
233      * 9 cases where first position is shifted in second sequence.
234      */
235     assertPrecedes("A-AA", "-GGG"); // 2 and 3 match
236     assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1
237     assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2
238     assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches
239     assertPrecedes("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1
240     assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2
241     assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches
242     assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1
243     assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2
244
245     /*
246      * two codon positions can each 'precede' the other! the comparison is
247      * biased to the first sequence
248      */
249     // TODO is this correct?
250     assertPrecedes("-A--AA", "--GGG");
251     assertPrecedes("--AAA", "-A--AA");
252   }
253
254   /**
255    * Assert that the first sequence should map to the same position as the
256    * second in a translated alignment
257    * 
258    * @param codon1
259    * @param codon2
260    */
261   private void assertMatches(String codon1, String codon2)
262   {
263     assertEquals("Expected match (0)", 0, compare(codon1, codon2));
264   }
265
266   /**
267    * Assert that the first sequence should precede the second in a translated
268    * alignment
269    * 
270    * @param codon1
271    * @param codon2
272    */
273   private void assertPrecedes(String codon1, String codon2)
274   {
275     assertEquals("Expected precedes (-1)", -1, compare(codon1, codon2));
276   }
277
278   /**
279    * Assert that the first sequence should follow the second in a translated
280    * alignment
281    * 
282    * @param codon1
283    * @param codon2
284    */
285   private void assertFollows(String codon1, String codon2)
286   {
287     assertEquals("Expected follows (1)", 1, compare(codon1, codon2));
288   }
289
290   /**
291    * Convert two nucleotide strings to base positions and pass to
292    * Dna.compareCodonPos, return the result.
293    * 
294    * @param s1
295    * @param s2
296    * @return
297    */
298   private int compare(String s1, String s2)
299   {
300     final int[] cd1 = convertCodon(s1);
301     final int[] cd2 = convertCodon(s2);
302     System.out.println("K: " + s1 + "  " + Arrays.toString(cd1));
303     System.out.println("G: " + s2 + "  " + Arrays.toString(cd2));
304     System.out.println();
305     return Dna.compareCodonPos(cd1, cd2);
306   }
307
308   /**
309    * Convert a string e.g. "-GC-T" to base positions e.g. [1, 2, 4]. The string
310    * should have exactly 3 non-gap characters, and use '-' for gaps.
311    * 
312    * @param s
313    * @return
314    */
315   private int[] convertCodon(String s)
316   {
317     int[] result = new int[3];
318     int i = 0;
319     for (int j = 0; j < s.length(); j++)
320     {
321       if (s.charAt(j) != '-')
322       {
323         result[i++] = j;
324       }
325     }
326     return result;
327   }
328
329   /**
330    * Weirdly, maybe worth a test to prove the helper method of this test class.
331    */
332   @Test
333   public void testConvertCodon()
334   {
335     assertEquals("[0, 1, 2]", Arrays.toString(convertCodon("AAA")));
336     assertEquals("[0, 2, 5]", Arrays.toString(convertCodon("A-A--A")));
337     assertEquals("[1, 3, 4]", Arrays.toString(convertCodon("-A-AA-")));
338   }
339 }