X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FDnaTest.java;h=01ed18397e2cf92f0db95e0d1cc3ec17cd95be8b;hb=6ca7520368a1e7fc774ec77d4f1dfc3ea940ead2;hp=b1ce1414c7d9c3c26503d474419c75b76566c38a;hpb=4db713f772ee30861f368dcea9f306b987ad01d2;p=jalview.git diff --git a/test/jalview/analysis/DnaTest.java b/test/jalview/analysis/DnaTest.java index b1ce141..01ed183 100644 --- a/test/jalview/analysis/DnaTest.java +++ b/test/jalview/analysis/DnaTest.java @@ -1,8 +1,15 @@ package jalview.analysis; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import jalview.api.AlignViewportI; +import jalview.datamodel.AlignedCodon; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignViewport; import jalview.io.FormatAdapter; import java.io.IOException; @@ -11,6 +18,7 @@ import org.junit.Test; public class DnaTest { + // @formatter:off // AA encoding codons as ordered on the Jalview help page Amino Acid Table private static String fasta = ">B\n" + "GCT" + "GCC" + "GCA" + "GCG" + "TGT" + "TGC" + "GAT" + "GAC" + "GAA" + "GAG" + "TTT" + "TTC" @@ -22,23 +30,137 @@ public class DnaTest + "ACA" + "ACG" + "GTT" + "GTC" + "GTA" + "GTG" + "TGG" + "TAT" + "TAC" + "TAA" + "TAG" + "TGA"; + private static String JAL_1312_example_align_fasta = ">B.FR.83.HXB2_LAI_IIIB_BRU_K03455/45-306\n" + + "ATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAC\n" + + ">gi|27804621|gb|AY178912.1|/1-259\n" + + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTTCATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAT\n" + + ">gi|27804623|gb|AY178913.1|/1-259\n" + + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGAACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTTCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n" + + ">gi|27804627|gb|AY178915.1|/1-260\n" + + "-TGGGAAAA-ATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGTTGTAGACAAATATTGGGACA\n" + + "GCTACAACCATCCCTTGAGACAGGATCAGAAGAACTTAAATCATTATWTAATACCATAGCAGTCCTCTATTG\n" + + "TGTACATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAG\n" + + ">gi|27804631|gb|AY178917.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACACCAGAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCGTCCCTTCAGACAGGATCGGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAGGCTTTAGAC\n" + + ">gi|27804635|gb|AY178919.1|/1-261\n" + + "-TGGGAGAGAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAAATTGAAACATATAGTATGGGCAGGCAG\n" + + "AGAGCTAGATCGATTCGCAGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAGATATTGGGACA\n" + + "GCTACAACCGTCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n" + + ">gi|27804641|gb|AY178922.1|/1-261\n" + + "-TGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAAATACTGGGACA\n" + + "GTTACACCCATCCCTTCATACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGAAGTAAAAGACACCAAGGAAGCTTTAGAC\n" + + ">gi|27804647|gb|AY178925.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATGTAGTATGGGCAAGCAG\n" + + "GGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTG\n" + + "TGTACATCAAAGAATAGATGTAAAAGACACCAAGGAAGCTCTAGAA\n" + + ">gi|27804649|gb|AY178926.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTCGCGGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAACTACTGGGACA\n" + + "GTTACAACCATCCCTTCAGACAGGATCAGAAGAACTCAAATCATTATATAATACAATAGCAACCCTCTATTG\n" + + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCCTTAGAT\n" + + ">gi|27804653|gb|AY178928.1|/1-261\n" + + "-TGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAATTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGACCGATTCGCACTTAACCCCGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n" + + "GCTACAATCGTCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATATAATACAGTAGCAGTCCTCTATTG\n" + + "TGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCCTTAGAC\n" + + ">gi|27804659|gb|AY178931.1|/1-261\n" + + "-TGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAG\n" + + "GGAGCTAGAACGATTYGCAGTTAATCCTGGCCTTTTAGAAACAGCAGAAGGCTGTAGACAAATACTGGGACA\n" + + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n" + + "TGTACATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAA\n"; + // @formatter:on + + /** + * Corner case for this test is the presence of codons after codons that were + * not translated. + * + * @throws IOException + */ + @Test + public void testTranslateCdna_withUntranslatableCodons() + throws IOException + { + AlignmentI alf = new FormatAdapter().readFile( + JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE, + "FASTA"); + ColumnSelection cs = new ColumnSelection(); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, new int[] + { 0, alf.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); + assertNotNull("Couldn't do a full width translation of test data.", + translated); + } + + /** + * Test variant in which 15 column blocks at a time are translated (the rest + * hidden). + * + * @throws IOException + */ + @Test + public void testTranslateCdna_withUntranslatableCodonsAndHiddenColumns() + throws IOException + { + AlignmentI alf = new FormatAdapter().readFile( + JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE, + "FASTA"); + int vwidth = 15; + for (int ipos = 0; ipos + vwidth < alf.getWidth(); ipos += vwidth) + { + ColumnSelection cs = new ColumnSelection(); + if (ipos > 0) + { + cs.hideColumns(0, ipos - 1); + } + cs.hideColumns(ipos + vwidth, alf.getWidth()); + int[] vcontigs = cs.getVisibleContigs(0, alf.getWidth()); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, vcontigs); + AlignmentI transAlf = dna.translateCdna(); + + assertTrue("Translation failed (ipos=" + ipos + + ") No alignment data.", transAlf != null); + assertTrue("Translation failed (ipos=" + ipos + ") Empty alignment.", + transAlf.getHeight() > 0); + assertTrue("Translation failed (ipos=" + ipos + ") Translated " + + transAlf.getHeight() + " sequences from " + alf.getHeight() + + " sequences", alf.getHeight() == transAlf.getHeight()); + } + } + /** * Test simple translation to Amino Acids (with STOP codons translated to X). * * @throws IOException */ @Test - public void testCdnaTranslate_simple() throws IOException + public void testTranslateCdna_simple() throws IOException { AlignmentI alf = new FormatAdapter().readFile(fasta, FormatAdapter.PASTE, "FASTA"); - final String sequenceAsString = alf - .getSequenceAt(0).getSequenceAsString(); - AlignmentI translated = Dna.CdnaTranslate(alf.getSequencesArray(), - new String[] - { sequenceAsString }, new int[] - { 0, alf.getWidth() - 1 }, alf.getGapCharacter(), null, - alf.getWidth(), null); + ColumnSelection cs = new ColumnSelection(); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, new int[] + { 0, alf.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); String aa = translated.getSequenceAt(0).getSequenceAsString(); assertEquals( "AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYYXXX", @@ -51,7 +173,7 @@ public class DnaTest * @throws IOException */ @Test - public void testCdnaTranslate_hiddenColumns() throws IOException + public void testTranslateCdna_hiddenColumns() throws IOException { AlignmentI alf = new FormatAdapter().readFile(fasta, FormatAdapter.PASTE, "FASTA"); @@ -59,12 +181,265 @@ public class DnaTest cs.hideColumns(6, 14); // hide codons 3/4/5 cs.hideColumns(24, 35); // hide codons 9-12 cs.hideColumns(177, 191); // hide codons 60-64 - AlignmentI translated = Dna.CdnaTranslate(alf.getSequencesArray(), - cs.getVisibleSequenceStrings(0, alf.getWidth(), - alf.getSequencesArray()), new int[] - { 0, alf.getWidth() - 1 }, alf.getGapCharacter(), null, - alf.getWidth(), null); + AlignViewportI av = new AlignViewport(alf, cs); + Dna dna = new Dna(av, new int[] + { 0, alf.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); String aa = translated.getSequenceAt(0).getSequenceAsString(); assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa); } + + /** + * Use this test to help debug into any cases of interest. + */ + @Test + public void testCompareCodonPos_oneOnly() + { + assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + } + + /** + * Tests for method that compares 'alignment' of two codon position triplets. + */ + @Test + public void testCompareCodonPos() + { + /* + * Returns 0 for any null argument + */ + assertEquals(0, Dna.compareCodonPos(new AlignedCodon(1, 2, 3), null)); + assertEquals(0, Dna.compareCodonPos(null, new AlignedCodon(1, 2, 3))); + + /* + * Work through 27 combinations. First 9 cases where first position matches. + */ + assertMatches("AAA", "GGG"); // 2 and 3 match + assertFollows("AA-A", "GGG"); // 2 matches, 3 shifted seq1 + assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2 + assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches + assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1 + assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches + assertFollows("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * 9 cases where first position is shifted in first sequence. + */ + assertFollows("-AAA", "G-GG"); // 2 and 3 match + assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1 + // 'enclosing' case: pick first to start precedes + assertFollows("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2 + assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches + assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1 + // 'enclosing' case: pick first to start precedes + assertFollows("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2 + assertFollows("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches + assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * 9 cases where first position is shifted in second sequence. + */ + assertPrecedes("A-AA", "-GGG"); // 2 and 3 match + assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1 + assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2 + assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches + // 'enclosing' case with middle base deciding: + assertFollows("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1 + assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches + assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2 + } + + /** + * This test generates a random cDNA alignment and its translation, then + * reorders the cDNA and retranslates, and verifies that the translations are + * the same (apart from ordering). + */ + @Test + public void testTranslateCdna_sequenceOrderIndependent() + { + /* + * Generate cDNA - 8 sequences of 12 bases each. + */ + AlignmentI cdna = new DnaAlignmentGenerator().generate(12, 8, 97, 5, 5); + ColumnSelection cs = new ColumnSelection(); + AlignViewportI av = new AlignViewport(cdna, cs); + Dna dna = new Dna(av, new int[] + { 0, cdna.getWidth() - 1 }); + AlignmentI translated = dna.translateCdna(); + + /* + * Jumble the cDNA sequences and translate. + */ + SequenceI[] sorted = new SequenceI[cdna.getHeight()]; + final int[] jumbler = new int[] + { 6, 7, 3, 4, 2, 0, 1, 5 }; + int seqNo = 0; + for (int i : jumbler) + { + sorted[seqNo++] = cdna.getSequenceAt(i); + } + AlignmentI cdnaReordered = new Alignment(sorted); + av = new AlignViewport(cdnaReordered, cs); + dna = new Dna(av, new int[] + { 0, cdna.getWidth() - 1 }); + AlignmentI translated2 = dna.translateCdna(); + + /* + * Check translated sequences are the same in both alignments. + */ + System.out.println("Original"); + System.out.println(translated.toString()); + System.out.println("Sorted"); + System.out.println(translated2.toString()); + + int sortedSequenceIndex = 0; + for (int originalSequenceIndex : jumbler) + { + final String translation1 = translated.getSequenceAt( + originalSequenceIndex).getSequenceAsString(); + final String translation2 = translated2.getSequenceAt(sortedSequenceIndex) + .getSequenceAsString(); + assertEquals(translation2, translation1); + sortedSequenceIndex++; + } + } + + /** + * Test that all the cases in testCompareCodonPos have a 'symmetric' + * comparison (without checking the actual comparison result). + */ + @Test + public void testCompareCodonPos_isSymmetric() + { + assertSymmetric("AAA", "GGG"); + assertSymmetric("AA-A", "GGG"); + assertSymmetric("AAA", "GG-G"); + assertSymmetric("A-AA", "GG-G"); + assertSymmetric("A-A-A", "GG-G"); + assertSymmetric("A-AA", "GG--G"); + assertSymmetric("AA-A", "G-GG"); + assertSymmetric("AA--A", "G-GG"); + assertSymmetric("AAA", "G-GG"); + assertSymmetric("-AAA", "G-GG"); + assertSymmetric("-AA-A", "G-GG"); + assertSymmetric("-AAA", "G-G-G"); + assertSymmetric("-A-AA", "G-G-G"); + assertSymmetric("-A-A-A", "G-G-G"); + assertSymmetric("-A-AA", "G-G--G"); + assertSymmetric("-AA-A", "G--GG"); + assertSymmetric("-AA--A", "G--GG"); + assertSymmetric("-AAA", "G--GG"); + assertSymmetric("A-AA", "-GGG"); + assertSymmetric("A-A-A", "-GGG"); + assertSymmetric("A-AA", "-GG-G"); + assertSymmetric("A--AA", "-GG-G"); + assertSymmetric("A--AA", "-GGG"); + assertSymmetric("A--AA", "-GG--G"); + assertSymmetric("AA-A", "-GGG"); + assertSymmetric("AA--A", "-GGG"); + assertSymmetric("AAA", "-GGG"); + } + + private void assertSymmetric(String codon1, String codon2) + { + assertEquals("Comparison of '" + codon1 + "' and '" + codon2 + + " not symmetric", Integer.signum(compare(codon1, codon2)), + -Integer.signum(compare(codon2, codon1))); + } + + /** + * Assert that the first sequence should map to the same position as the + * second in a translated alignment. Also checks that this is true if the + * order of the codons is reversed. + * + * @param codon1 + * @param codon2 + */ + private void assertMatches(String codon1, String codon2) + { + assertEquals("Expected '" + codon1 + "' matches '" + codon2 + "'", 0, + compare(codon1, codon2)); + assertEquals("Expected '" + codon2 + "' matches '" + codon1 + "'", 0, + compare(codon2, codon1)); + } + + /** + * Assert that the first sequence should precede the second in a translated + * alignment + * + * @param codon1 + * @param codon2 + */ + private void assertPrecedes(String codon1, String codon2) + { + assertEquals("Expected '" + codon1 + "' precedes '" + codon2 + "'", + -1, compare(codon1, codon2)); + } + + /** + * Assert that the first sequence should follow the second in a translated + * alignment + * + * @param codon1 + * @param codon2 + */ + private void assertFollows(String codon1, String codon2) + { + assertEquals("Expected '" + codon1 + "' follows '" + codon2 + "'", 1, + compare(codon1, codon2)); + } + + /** + * Convert two nucleotide strings to base positions and pass to + * Dna.compareCodonPos, return the result. + * + * @param s1 + * @param s2 + * @return + */ + private int compare(String s1, String s2) + { + final AlignedCodon cd1 = convertCodon(s1); + final AlignedCodon cd2 = convertCodon(s2); + System.out.println("K: " + s1 + " " + cd1.toString()); + System.out.println("G: " + s2 + " " + cd2.toString()); + System.out.println(); + return Dna.compareCodonPos(cd1, cd2); + } + + /** + * Convert a string e.g. "-GC-T" to base positions e.g. [1, 2, 4]. The string + * should have exactly 3 non-gap characters, and use '-' for gaps. + * + * @param s + * @return + */ + private AlignedCodon convertCodon(String s) + { + int[] codon = new int[3]; + int i = 0; + for (int j = 0; j < s.length(); j++) + { + if (s.charAt(j) != '-') + { + codon[i++] = j; + } + } + return new AlignedCodon(codon[0], codon[1], codon[2]); + } + + /** + * Weirdly, maybe worth a test to prove the helper method of this test class. + */ + @Test + public void testConvertCodon() + { + assertEquals("[0, 1, 2]", convertCodon("AAA").toString()); + assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString()); + assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString()); + } }