Merge branch 'features/JAL-845splitPaneMergeDevelop' into develop
[jalview.git] / test / jalview / analysis / DnaTest.java
diff --git a/test/jalview/analysis/DnaTest.java b/test/jalview/analysis/DnaTest.java
new file mode 100644 (file)
index 0000000..01ed183
--- /dev/null
@@ -0,0 +1,445 @@
+package jalview.analysis;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import jalview.api.AlignViewportI;
+import jalview.datamodel.AlignedCodon;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.ColumnSelection;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignViewport;
+import jalview.io.FormatAdapter;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class DnaTest
+{
+  // @formatter:off
+  // AA encoding codons as ordered on the Jalview help page Amino Acid Table
+  private static String fasta = ">B\n" + "GCT" + "GCC" + "GCA" + "GCG"
+          + "TGT" + "TGC" + "GAT" + "GAC" + "GAA" + "GAG" + "TTT" + "TTC"
+          + "GGT" + "GGC" + "GGA" + "GGG" + "CAT" + "CAC" + "ATT" + "ATC"
+          + "ATA" + "AAA" + "AAG" + "TTG" + "TTA" + "CTT" + "CTC" + "CTA"
+          + "CTG" + "ATG" + "AAT" + "AAC" + "CCT" + "CCC" + "CCA" + "CCG"
+          + "CAA" + "CAG" + "CGT" + "CGC" + "CGA" + "CGG" + "AGA" + "AGG"
+          + "TCT" + "TCC" + "TCA" + "TCG" + "AGT" + "AGC" + "ACT" + "ACC"
+          + "ACA" + "ACG" + "GTT" + "GTC" + "GTA" + "GTG" + "TGG" + "TAT"
+          + "TAC" + "TAA" + "TAG" + "TGA";
+
+  private static String JAL_1312_example_align_fasta = ">B.FR.83.HXB2_LAI_IIIB_BRU_K03455/45-306\n"
+          + "ATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACA\n"
+          + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAC\n"
+          + ">gi|27804621|gb|AY178912.1|/1-259\n"
+          + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n"
+          + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGGACA\n"
+          + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTTCATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAT\n"
+          + ">gi|27804623|gb|AY178913.1|/1-259\n"
+          + "-TGGGAGAA-ATTCGGTT-CGGCCAGGGGGAAAGAAAAAATATCAGTTAAAACATATAGTATGGGCAAGCAG\n"
+          + "AGAGCTAGAACGATTCGCAGTTAACCCTGGCCTTTTAGAGACATCACAAGGCTGTAGACAAATACTGGAACA\n"
+          + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTTCATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n"
+          + ">gi|27804627|gb|AY178915.1|/1-260\n"
+          + "-TGGGAAAA-ATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGAACGATTCGCAGTTAACCCTGGCCTGTTAGAAACATCAGAAGGTTGTAGACAAATATTGGGACA\n"
+          + "GCTACAACCATCCCTTGAGACAGGATCAGAAGAACTTAAATCATTATWTAATACCATAGCAGTCCTCTATTG\n"
+          + "TGTACATCAAAGGATAGATATAAAAGACACCAAGGAAGCTTTAGAG\n"
+          + ">gi|27804631|gb|AY178917.1|/1-261\n"
+          + "-TGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACACCAGAAGGCTGTAGACAAATACTGGGACA\n"
+          + "GCTACAACCGTCCCTTCAGACAGGATCGGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTGCATCAAAGGATAGATGTAAAAGACACCAAGGAGGCTTTAGAC\n"
+          + ">gi|27804635|gb|AY178919.1|/1-261\n"
+          + "-TGGGAGAGAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAAATTGAAACATATAGTATGGGCAGGCAG\n"
+          + "AGAGCTAGATCGATTCGCAGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAGATATTGGGACA\n"
+          + "GCTACAACCGTCCCTTAAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCTTTAGAT\n"
+          + ">gi|27804641|gb|AY178922.1|/1-261\n"
+          + "-TGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAGATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGAACGATTCGCAGTCAACCCTGGCCTGTTAGAAACATCAGAAGGCTGCAGACAAATACTGGGACA\n"
+          + "GTTACACCCATCCCTTCATACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTGCATCAAAGGATAGAAGTAAAAGACACCAAGGAAGCTTTAGAC\n"
+          + ">gi|27804647|gb|AY178925.1|/1-261\n"
+          + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAATTAAAACATGTAGTATGGGCAAGCAG\n"
+          + "GGAACTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n"
+          + "GCTACAACCATCCCTTCAGACAGGATCAGAGGAACTTAAATCATTATTTAATACAGTAGCAGTCCTCTATTG\n"
+          + "TGTACATCAAAGAATAGATGTAAAAGACACCAAGGAAGCTCTAGAA\n"
+          + ">gi|27804649|gb|AY178926.1|/1-261\n"
+          + "-TGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAGTTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGAACGATTCGCGGTCAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAACTACTGGGACA\n"
+          + "GTTACAACCATCCCTTCAGACAGGATCAGAAGAACTCAAATCATTATATAATACAATAGCAACCCTCTATTG\n"
+          + "TGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCCTTAGAT\n"
+          + ">gi|27804653|gb|AY178928.1|/1-261\n"
+          + "-TGGGAAAGAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGACCGATTCGCACTTAACCCCGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATATTGGGACA\n"
+          + "GCTACAATCGTCCCTTCAGACAGGATCAGAAGAACTTAGATCACTATATAATACAGTAGCAGTCCTCTATTG\n"
+          + "TGTGCATCAAAAGATAGATGTAAAAGACACCAAGGAAGCCTTAGAC\n"
+          + ">gi|27804659|gb|AY178931.1|/1-261\n"
+          + "-TGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAG\n"
+          + "GGAGCTAGAACGATTYGCAGTTAATCCTGGCCTTTTAGAAACAGCAGAAGGCTGTAGACAAATACTGGGACA\n"
+          + "GCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAGCAACCCTCTATTG\n"
+          + "TGTACATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGAA\n";
+  // @formatter:on
+
+  /**
+   * Corner case for this test is the presence of codons after codons that were
+   * not translated.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testTranslateCdna_withUntranslatableCodons()
+          throws IOException
+  {
+    AlignmentI alf = new FormatAdapter().readFile(
+            JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE,
+            "FASTA");
+    ColumnSelection cs = new ColumnSelection();
+    AlignViewportI av = new AlignViewport(alf, cs);
+    Dna dna = new Dna(av, new int[]
+    { 0, alf.getWidth() - 1 });
+    AlignmentI translated = dna.translateCdna();
+    assertNotNull("Couldn't do a full width translation of test data.",
+            translated);
+  }
+
+  /**
+   * Test variant in which 15 column blocks at a time are translated (the rest
+   * hidden).
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testTranslateCdna_withUntranslatableCodonsAndHiddenColumns()
+          throws IOException
+  {
+    AlignmentI alf = new FormatAdapter().readFile(
+            JAL_1312_example_align_fasta, jalview.io.FormatAdapter.PASTE,
+            "FASTA");
+    int vwidth = 15;
+    for (int ipos = 0; ipos + vwidth < alf.getWidth(); ipos += vwidth)
+    {
+      ColumnSelection cs = new ColumnSelection();
+      if (ipos > 0)
+      {
+        cs.hideColumns(0, ipos - 1);
+      }
+      cs.hideColumns(ipos + vwidth, alf.getWidth());
+      int[] vcontigs = cs.getVisibleContigs(0, alf.getWidth());
+      AlignViewportI av = new AlignViewport(alf, cs);
+      Dna dna = new Dna(av, vcontigs);
+      AlignmentI transAlf = dna.translateCdna();
+
+      assertTrue("Translation failed (ipos=" + ipos
+              + ") No alignment data.", transAlf != null);
+      assertTrue("Translation failed (ipos=" + ipos + ") Empty alignment.",
+              transAlf.getHeight() > 0);
+      assertTrue("Translation failed (ipos=" + ipos + ") Translated "
+              + transAlf.getHeight() + " sequences from " + alf.getHeight()
+              + " sequences", alf.getHeight() == transAlf.getHeight());
+    }
+  }
+
+  /**
+   * Test simple translation to Amino Acids (with STOP codons translated to X).
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testTranslateCdna_simple() throws IOException
+  {
+    AlignmentI alf = new FormatAdapter().readFile(fasta,
+            FormatAdapter.PASTE, "FASTA");
+    ColumnSelection cs = new ColumnSelection();
+    AlignViewportI av = new AlignViewport(alf, cs);
+    Dna dna = new Dna(av, new int[]
+    { 0, alf.getWidth() - 1 });
+    AlignmentI translated = dna.translateCdna();
+    String aa = translated.getSequenceAt(0).getSequenceAsString();
+    assertEquals(
+            "AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYYXXX",
+            aa);
+  }
+
+  /**
+   * Test translation excluding hidden columns.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testTranslateCdna_hiddenColumns() throws IOException
+  {
+    AlignmentI alf = new FormatAdapter().readFile(fasta,
+            FormatAdapter.PASTE, "FASTA");
+    ColumnSelection cs = new jalview.datamodel.ColumnSelection();
+    cs.hideColumns(6, 14); // hide codons 3/4/5
+    cs.hideColumns(24, 35); // hide codons 9-12
+    cs.hideColumns(177, 191); // hide codons 60-64
+    AlignViewportI av = new AlignViewport(alf, cs);
+    Dna dna = new Dna(av, new int[]
+    { 0, alf.getWidth() - 1 });
+    AlignmentI translated = dna.translateCdna();
+    String aa = translated.getSequenceAt(0).getSequenceAsString();
+    assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa);
+  }
+
+  /**
+   * Use this test to help debug into any cases of interest.
+   */
+  @Test
+  public void testCompareCodonPos_oneOnly()
+  {
+    assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
+  }
+
+  /**
+   * Tests for method that compares 'alignment' of two codon position triplets.
+   */
+  @Test
+  public void testCompareCodonPos()
+  {
+    /*
+     * Returns 0 for any null argument
+     */
+    assertEquals(0, Dna.compareCodonPos(new AlignedCodon(1, 2, 3), null));
+    assertEquals(0, Dna.compareCodonPos(null, new AlignedCodon(1, 2, 3)));
+
+    /*
+     * Work through 27 combinations. First 9 cases where first position matches.
+     */
+    assertMatches("AAA", "GGG"); // 2 and 3 match
+    assertFollows("AA-A", "GGG"); // 2 matches, 3 shifted seq1
+    assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2
+    assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches
+    assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1
+    assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2
+    assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches
+    assertFollows("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1
+    assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2
+
+    /*
+     * 9 cases where first position is shifted in first sequence.
+     */
+    assertFollows("-AAA", "G-GG"); // 2 and 3 match
+    assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1
+    // 'enclosing' case: pick first to start precedes
+    assertFollows("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2
+    assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches
+    assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1
+    // 'enclosing' case: pick first to start precedes
+    assertFollows("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2
+    assertFollows("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches
+    assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
+    assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2
+
+    /*
+     * 9 cases where first position is shifted in second sequence.
+     */
+    assertPrecedes("A-AA", "-GGG"); // 2 and 3 match
+    assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1
+    assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2
+    assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches
+    // 'enclosing' case with middle base deciding:
+    assertFollows("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1
+    assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2
+    assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches
+    assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1
+    assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2
+  }
+
+  /**
+   * This test generates a random cDNA alignment and its translation, then
+   * reorders the cDNA and retranslates, and verifies that the translations are
+   * the same (apart from ordering).
+   */
+  @Test
+  public void testTranslateCdna_sequenceOrderIndependent()
+  {
+    /*
+     * Generate cDNA - 8 sequences of 12 bases each.
+     */
+    AlignmentI cdna = new DnaAlignmentGenerator().generate(12, 8, 97, 5, 5);
+    ColumnSelection cs = new ColumnSelection();
+    AlignViewportI av = new AlignViewport(cdna, cs);
+    Dna dna = new Dna(av, new int[]
+    { 0, cdna.getWidth() - 1 });
+    AlignmentI translated = dna.translateCdna();
+
+    /*
+     * Jumble the cDNA sequences and translate.
+     */
+    SequenceI[] sorted = new SequenceI[cdna.getHeight()];
+    final int[] jumbler = new int[]
+    { 6, 7, 3, 4, 2, 0, 1, 5 };
+    int seqNo = 0;
+    for (int i : jumbler)
+    {
+      sorted[seqNo++] = cdna.getSequenceAt(i);
+    }
+    AlignmentI cdnaReordered = new Alignment(sorted);
+    av = new AlignViewport(cdnaReordered, cs);
+    dna = new Dna(av, new int[]
+    { 0, cdna.getWidth() - 1 });
+    AlignmentI translated2 = dna.translateCdna();
+
+    /*
+     * Check translated sequences are the same in both alignments.
+     */
+    System.out.println("Original");
+    System.out.println(translated.toString());
+    System.out.println("Sorted");
+    System.out.println(translated2.toString());
+
+    int sortedSequenceIndex = 0;
+    for (int originalSequenceIndex : jumbler)
+    {
+      final String translation1 = translated.getSequenceAt(
+              originalSequenceIndex).getSequenceAsString();
+      final String translation2 = translated2.getSequenceAt(sortedSequenceIndex)
+              .getSequenceAsString();
+      assertEquals(translation2, translation1);
+      sortedSequenceIndex++;
+    }
+  }
+
+  /**
+   * Test that all the cases in testCompareCodonPos have a 'symmetric'
+   * comparison (without checking the actual comparison result).
+   */
+  @Test
+  public void testCompareCodonPos_isSymmetric()
+  {
+    assertSymmetric("AAA", "GGG");
+    assertSymmetric("AA-A", "GGG");
+    assertSymmetric("AAA", "GG-G");
+    assertSymmetric("A-AA", "GG-G");
+    assertSymmetric("A-A-A", "GG-G");
+    assertSymmetric("A-AA", "GG--G");
+    assertSymmetric("AA-A", "G-GG");
+    assertSymmetric("AA--A", "G-GG");
+    assertSymmetric("AAA", "G-GG");
+    assertSymmetric("-AAA", "G-GG");
+    assertSymmetric("-AA-A", "G-GG");
+    assertSymmetric("-AAA", "G-G-G");
+    assertSymmetric("-A-AA", "G-G-G");
+    assertSymmetric("-A-A-A", "G-G-G");
+    assertSymmetric("-A-AA", "G-G--G");
+    assertSymmetric("-AA-A", "G--GG");
+    assertSymmetric("-AA--A", "G--GG");
+    assertSymmetric("-AAA", "G--GG");
+    assertSymmetric("A-AA", "-GGG");
+    assertSymmetric("A-A-A", "-GGG");
+    assertSymmetric("A-AA", "-GG-G");
+    assertSymmetric("A--AA", "-GG-G");
+    assertSymmetric("A--AA", "-GGG");
+    assertSymmetric("A--AA", "-GG--G");
+    assertSymmetric("AA-A", "-GGG");
+    assertSymmetric("AA--A", "-GGG");
+    assertSymmetric("AAA", "-GGG");
+  }
+
+  private void assertSymmetric(String codon1, String codon2)
+  {
+    assertEquals("Comparison of '" + codon1 + "' and '" + codon2
+            + " not symmetric", Integer.signum(compare(codon1, codon2)),
+            -Integer.signum(compare(codon2, codon1)));
+  }
+
+  /**
+   * Assert that the first sequence should map to the same position as the
+   * second in a translated alignment. Also checks that this is true if the
+   * order of the codons is reversed.
+   * 
+   * @param codon1
+   * @param codon2
+   */
+  private void assertMatches(String codon1, String codon2)
+  {
+    assertEquals("Expected '" + codon1 + "' matches '" + codon2 + "'", 0,
+            compare(codon1, codon2));
+    assertEquals("Expected '" + codon2 + "' matches '" + codon1 + "'", 0,
+            compare(codon2, codon1));
+  }
+
+  /**
+   * Assert that the first sequence should precede the second in a translated
+   * alignment
+   * 
+   * @param codon1
+   * @param codon2
+   */
+  private void assertPrecedes(String codon1, String codon2)
+  {
+    assertEquals("Expected '" + codon1 + "'  precedes '" + codon2 + "'",
+            -1, compare(codon1, codon2));
+  }
+
+  /**
+   * Assert that the first sequence should follow the second in a translated
+   * alignment
+   * 
+   * @param codon1
+   * @param codon2
+   */
+  private void assertFollows(String codon1, String codon2)
+  {
+    assertEquals("Expected '" + codon1 + "'  follows '" + codon2 + "'", 1,
+            compare(codon1, codon2));
+  }
+
+  /**
+   * Convert two nucleotide strings to base positions and pass to
+   * Dna.compareCodonPos, return the result.
+   * 
+   * @param s1
+   * @param s2
+   * @return
+   */
+  private int compare(String s1, String s2)
+  {
+    final AlignedCodon cd1 = convertCodon(s1);
+    final AlignedCodon cd2 = convertCodon(s2);
+    System.out.println("K: " + s1 + "  " + cd1.toString());
+    System.out.println("G: " + s2 + "  " + cd2.toString());
+    System.out.println();
+    return Dna.compareCodonPos(cd1, cd2);
+  }
+
+  /**
+   * Convert a string e.g. "-GC-T" to base positions e.g. [1, 2, 4]. The string
+   * should have exactly 3 non-gap characters, and use '-' for gaps.
+   * 
+   * @param s
+   * @return
+   */
+  private AlignedCodon convertCodon(String s)
+  {
+    int[] codon = new int[3];
+    int i = 0;
+    for (int j = 0; j < s.length(); j++)
+    {
+      if (s.charAt(j) != '-')
+      {
+        codon[i++] = j;
+      }
+    }
+    return new AlignedCodon(codon[0], codon[1], codon[2]);
+  }
+
+  /**
+   * Weirdly, maybe worth a test to prove the helper method of this test class.
+   */
+  @Test
+  public void testConvertCodon()
+  {
+    assertEquals("[0, 1, 2]", convertCodon("AAA").toString());
+    assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString());
+    assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString());
+  }
+}