X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FDnaTest.java;fp=test%2Fjalview%2Fanalysis%2FDnaTest.java;h=01ed18397e2cf92f0db95e0d1cc3ec17cd95be8b;hb=be32c14cd8e48fe0a207cd7030cb9cd46f894678;hp=0000000000000000000000000000000000000000;hpb=10e637daad8983c41db8679baabea5563d7371f4;p=jalview.git diff --git a/test/jalview/analysis/DnaTest.java b/test/jalview/analysis/DnaTest.java new file mode 100644 index 0000000..01ed183 --- /dev/null +++ b/test/jalview/analysis/DnaTest.java @@ -0,0 +1,445 @@ +package jalview.analysis; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import jalview.api.AlignViewportI; +import jalview.datamodel.AlignedCodon; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.ColumnSelection; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignViewport; +import jalview.io.FormatAdapter; + +import java.io.IOException; + +import org.junit.Test; + +public class DnaTest +{ + // @formatter:off + // AA encoding codons as ordered on the Jalview help page Amino Acid Table + private static String fasta = ">B\n" + "GCT" + "GCC" + "GCA" + "GCG" + + "TGT" + "TGC" + "GAT" + "GAC" + "GAA" + "GAG" + "TTT" + "TTC" + + "GGT" + "GGC" + "GGA" + "GGG" + "CAT" + "CAC" + "ATT" + "ATC" + + "ATA" + "AAA" + "AAG" + "TTG" + "TTA" + "CTT" + "CTC" + "CTA" + + "CTG" + "ATG" + "AAT" + "AAC" + "CCT" + "CCC" + "CCA" + "CCG" + + "CAA" + "CAG" + "CGT" + "CGC" + "CGA" + "CGG" + "AGA" + "AGG" + + "TCT" + "TCC" + "TCA" + "TCG" + "AGT" + "AGC" + "ACT" + "ACC" + + "ACA" + "ACG" + "GTT" + "GTC" + "GTA" + "GTG" + "TGG" + "TAT" + + "TAC" + "TAA" + "TAG" + "TGA"; + + private static String JAL_1312_example_align_fasta = ">B.FR.83.HXB2_LAI_IIIB_BRU_K03455/45-306\n" + + "ATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAC\n" + + ">gi|27804621|gb|AY178912.1|/1-259\n" + + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTTCATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAT\n" + + ">gi|27804623|gb|AY178913.1|/1-259\n" + + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGAACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTTCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n" + + ">gi|27804627|gb|AY178915.1|/1-260\n" + + "-TGGGAAAA-ATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGTTGTAGACAAATATTGGGACA\n" + + "GCTACAACCATCCCTTGAGACAGGATCAGAAGAACTTAAATCATTATWTAATACCATAGCAGTCCTCTATTG\n" + + "TGTACATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAG\n" + + ">gi|27804631|gb|AY178917.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACACCAGAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCGTCCCTTCAGACAGGATCGGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAGGCTTTAGAC\n" + + ">gi|27804635|gb|AY178919.1|/1-261\n" + + "-TGGGAGAGAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAAATTGAAACATATAGTATGGGCAGGCAG\n" + + "AGAGCTAGATCGATTCGCAGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAGATATTGGGACA\n" + + "GCTACAACCGTCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n" + + ">gi|27804641|gb|AY178922.1|/1-261\n" + + "-TGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAAATACTGGGACA\n" + + "GTTACACCCATCCCTTCATACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGAAGTAAAAGACACCAAGGAAGCTTTAGAC\n" + + ">gi|27804647|gb|AY178925.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATGTAGTATGGGCAAGCAG\n" + + "GGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTG\n" + + "TGTACATCAAAGAATAGATGTAAAAGACACCAAGGAAGCTCTAGAA\n" + + ">gi|27804649|gb|AY178926.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCGGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAACTACTGGGACA\n" + + "GTTACAACCATCCCTTCAGACAGGATCAGAAGAACTCAAATCATTATATAATACAATAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCCTTAGAT\n" + + ">gi|27804653|gb|AY178928.1|/1-261\n" + + "-TGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAATTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGACCGATTCGCACTTAACCCCGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n" + + "GCTACAATCGTCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATATAATACAGTAGCAGTCCTCTATTG\n" + + "TGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCCTTAGAC\n" + + ">gi|27804659|gb|AY178931.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTYGCAGTTAATCCTGGCCTTTTAGAAACAGCAGAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTACATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAA\n"; + // @formatter:on + + /** + * Corner case for this test is the presence of codons after codons that were + * not translated. + * + * @throws IOException + */ + @Test + public void testTranslateCdna_withUntranslatableCodons() + throws IOException + { + AlignmentI alf = new FormatAdapter().readFile( + JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE, + "FASTA"); + ColumnSelection cs = new ColumnSelection(); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, new int[] + { 0, alf.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); + assertNotNull("Couldn't do a full width translation of test data.", + translated); + } + + /** + * Test variant in which 15 column blocks at a time are translated (the rest + * hidden). + * + * @throws IOException + */ + @Test + public void testTranslateCdna_withUntranslatableCodonsAndHiddenColumns() + throws IOException + { + AlignmentI alf = new FormatAdapter().readFile( + JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE, + "FASTA"); + int vwidth = 15; + for (int ipos = 0; ipos + vwidth < alf.getWidth(); ipos += vwidth) + { + ColumnSelection cs = new ColumnSelection(); + if (ipos > 0) + { + cs.hideColumns(0, ipos - 1); + } + cs.hideColumns(ipos + vwidth, alf.getWidth()); + int[] vcontigs = cs.getVisibleContigs(0, alf.getWidth()); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, vcontigs); + AlignmentI transAlf = dna.translateCdna(); + + assertTrue("Translation failed (ipos=" + ipos + + ") No alignment data.", transAlf != null); + assertTrue("Translation failed (ipos=" + ipos + ") Empty alignment.", + transAlf.getHeight() > 0); + assertTrue("Translation failed (ipos=" + ipos + ") Translated " + + transAlf.getHeight() + " sequences from " + alf.getHeight() + + " sequences", alf.getHeight() == transAlf.getHeight()); + } + } + + /** + * Test simple translation to Amino Acids (with STOP codons translated to X). + * + * @throws IOException + */ + @Test + public void testTranslateCdna_simple() throws IOException + { + AlignmentI alf = new FormatAdapter().readFile(fasta, + FormatAdapter.PASTE, "FASTA"); + ColumnSelection cs = new ColumnSelection(); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, new int[] + { 0, alf.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); + String aa = translated.getSequenceAt(0).getSequenceAsString(); + assertEquals( + "AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYYXXX", + aa); + } + + /** + * Test translation excluding hidden columns. + * + * @throws IOException + */ + @Test + public void testTranslateCdna_hiddenColumns() throws IOException + { + AlignmentI alf = new FormatAdapter().readFile(fasta, + FormatAdapter.PASTE, "FASTA"); + ColumnSelection cs = new jalview.datamodel.ColumnSelection(); + cs.hideColumns(6, 14); // hide codons 3/4/5 + cs.hideColumns(24, 35); // hide codons 9-12 + cs.hideColumns(177, 191); // hide codons 60-64 + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, new int[] + { 0, alf.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); + String aa = translated.getSequenceAt(0).getSequenceAsString(); + assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa); + } + + /** + * Use this test to help debug into any cases of interest. + */ + @Test + public void testCompareCodonPos_oneOnly() + { + assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + } + + /** + * Tests for method that compares 'alignment' of two codon position triplets. + */ + @Test + public void testCompareCodonPos() + { + /* + * Returns 0 for any null argument + */ + assertEquals(0, Dna.compareCodonPos(new AlignedCodon(1, 2, 3), null)); + assertEquals(0, Dna.compareCodonPos(null, new AlignedCodon(1, 2, 3))); + + /* + * Work through 27 combinations. First 9 cases where first position matches. + */ + assertMatches("AAA", "GGG"); // 2 and 3 match + assertFollows("AA-A", "GGG"); // 2 matches, 3 shifted seq1 + assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2 + assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches + assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1 + assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches + assertFollows("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * 9 cases where first position is shifted in first sequence. + */ + assertFollows("-AAA", "G-GG"); // 2 and 3 match + assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1 + // 'enclosing' case: pick first to start precedes + assertFollows("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2 + assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches + assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1 + // 'enclosing' case: pick first to start precedes + assertFollows("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2 + assertFollows("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches + assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * 9 cases where first position is shifted in second sequence. + */ + assertPrecedes("A-AA", "-GGG"); // 2 and 3 match + assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1 + assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2 + assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches + // 'enclosing' case with middle base deciding: + assertFollows("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1 + assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches + assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2 + } + + /** + * This test generates a random cDNA alignment and its translation, then + * reorders the cDNA and retranslates, and verifies that the translations are + * the same (apart from ordering). + */ + @Test + public void testTranslateCdna_sequenceOrderIndependent() + { + /* + * Generate cDNA - 8 sequences of 12 bases each. + */ + AlignmentI cdna = new DnaAlignmentGenerator().generate(12, 8, 97, 5, 5); + ColumnSelection cs = new ColumnSelection(); + AlignViewportI av = new AlignViewport(cdna, cs); + Dna dna = new Dna(av, new int[] + { 0, cdna.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); + + /* + * Jumble the cDNA sequences and translate. + */ + SequenceI[] sorted = new SequenceI[cdna.getHeight()]; + final int[] jumbler = new int[] + { 6, 7, 3, 4, 2, 0, 1, 5 }; + int seqNo = 0; + for (int i : jumbler) + { + sorted[seqNo++] = cdna.getSequenceAt(i); + } + AlignmentI cdnaReordered = new Alignment(sorted); + av = new AlignViewport(cdnaReordered, cs); + dna = new Dna(av, new int[] + { 0, cdna.getWidth() - 1 }); + AlignmentI translated2 = dna.translateCdna(); + + /* + * Check translated sequences are the same in both alignments. + */ + System.out.println("Original"); + System.out.println(translated.toString()); + System.out.println("Sorted"); + System.out.println(translated2.toString()); + + int sortedSequenceIndex = 0; + for (int originalSequenceIndex : jumbler) + { + final String translation1 = translated.getSequenceAt( + originalSequenceIndex).getSequenceAsString(); + final String translation2 = translated2.getSequenceAt(sortedSequenceIndex) + .getSequenceAsString(); + assertEquals(translation2, translation1); + sortedSequenceIndex++; + } + } + + /** + * Test that all the cases in testCompareCodonPos have a 'symmetric' + * comparison (without checking the actual comparison result). + */ + @Test + public void testCompareCodonPos_isSymmetric() + { + assertSymmetric("AAA", "GGG"); + assertSymmetric("AA-A", "GGG"); + assertSymmetric("AAA", "GG-G"); + assertSymmetric("A-AA", "GG-G"); + assertSymmetric("A-A-A", "GG-G"); + assertSymmetric("A-AA", "GG--G"); + assertSymmetric("AA-A", "G-GG"); + assertSymmetric("AA--A", "G-GG"); + assertSymmetric("AAA", "G-GG"); + assertSymmetric("-AAA", "G-GG"); + assertSymmetric("-AA-A", "G-GG"); + assertSymmetric("-AAA", "G-G-G"); + assertSymmetric("-A-AA", "G-G-G"); + assertSymmetric("-A-A-A", "G-G-G"); + assertSymmetric("-A-AA", "G-G--G"); + assertSymmetric("-AA-A", "G--GG"); + assertSymmetric("-AA--A", "G--GG"); + assertSymmetric("-AAA", "G--GG"); + assertSymmetric("A-AA", "-GGG"); + assertSymmetric("A-A-A", "-GGG"); + assertSymmetric("A-AA", "-GG-G"); + assertSymmetric("A--AA", "-GG-G"); + assertSymmetric("A--AA", "-GGG"); + assertSymmetric("A--AA", "-GG--G"); + assertSymmetric("AA-A", "-GGG"); + assertSymmetric("AA--A", "-GGG"); + assertSymmetric("AAA", "-GGG"); + } + + private void assertSymmetric(String codon1, String codon2) + { + assertEquals("Comparison of '" + codon1 + "' and '" + codon2 + + " not symmetric", Integer.signum(compare(codon1, codon2)), + -Integer.signum(compare(codon2, codon1))); + } + + /** + * Assert that the first sequence should map to the same position as the + * second in a translated alignment. Also checks that this is true if the + * order of the codons is reversed. + * + * @param codon1 + * @param codon2 + */ + private void assertMatches(String codon1, String codon2) + { + assertEquals("Expected '" + codon1 + "' matches '" + codon2 + "'", 0, + compare(codon1, codon2)); + assertEquals("Expected '" + codon2 + "' matches '" + codon1 + "'", 0, + compare(codon2, codon1)); + } + + /** + * Assert that the first sequence should precede the second in a translated + * alignment + * + * @param codon1 + * @param codon2 + */ + private void assertPrecedes(String codon1, String codon2) + { + assertEquals("Expected '" + codon1 + "' precedes '" + codon2 + "'", + -1, compare(codon1, codon2)); + } + + /** + * Assert that the first sequence should follow the second in a translated + * alignment + * + * @param codon1 + * @param codon2 + */ + private void assertFollows(String codon1, String codon2) + { + assertEquals("Expected '" + codon1 + "' follows '" + codon2 + "'", 1, + compare(codon1, codon2)); + } + + /** + * Convert two nucleotide strings to base positions and pass to + * Dna.compareCodonPos, return the result. + * + * @param s1 + * @param s2 + * @return + */ + private int compare(String s1, String s2) + { + final AlignedCodon cd1 = convertCodon(s1); + final AlignedCodon cd2 = convertCodon(s2); + System.out.println("K: " + s1 + " " + cd1.toString()); + System.out.println("G: " + s2 + " " + cd2.toString()); + System.out.println(); + return Dna.compareCodonPos(cd1, cd2); + } + + /** + * Convert a string e.g. "-GC-T" to base positions e.g. [1, 2, 4]. The string + * should have exactly 3 non-gap characters, and use '-' for gaps. + * + * @param s + * @return + */ + private AlignedCodon convertCodon(String s) + { + int[] codon = new int[3]; + int i = 0; + for (int j = 0; j < s.length(); j++) + { + if (s.charAt(j) != '-') + { + codon[i++] = j; + } + } + return new AlignedCodon(codon[0], codon[1], codon[2]); + } + + /** + * Weirdly, maybe worth a test to prove the helper method of this test class. + */ + @Test + public void testConvertCodon() + { + assertEquals("[0, 1, 2]", convertCodon("AAA").toString()); + assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString()); + assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString()); + } +}