* Helper method to load an alignment and ensure dataset sequences are set up.
*
* @param data
- * @param format TODO
+ * @param format
+ * TODO
* @return
* @throws IOException
*/
- protected AlignmentI loadAlignment(final String data, String format) throws IOException
+ protected AlignmentI loadAlignment(final String data, String format)
+ throws IOException
{
Alignment a = new FormatAdapter().readFile(data,
AppletFormatAdapter.PASTE, format);
int i = 0;
for (AlignmentAnnotation ann : al.getAlignmentAnnotation())
{
- ann.setCalcId("CalcIdFor"
- + al.getSequenceAt(i).getName());
+ ann.setCalcId("CalcIdFor" + al.getSequenceAt(i).getName());
i++;
}
}
al2.addCodonFrame(acf);
al1.alignAs(al2);
- assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0)
+ assertEquals("AC-G---G--CUC-CA------A-CT", al1.getSequenceAt(0)
.getSequenceAsString());
- assertEquals("---CGT---TAACGA---AGT---", al1.getSequenceAt(1)
+ assertEquals("---CG-T---TA--ACG---A---AGT", al1.getSequenceAt(1)
+ .getSequenceAsString());
+ }
+
+ /**
+ * Test aligning cdna (with introns) as per protein alignment.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testAlignAs_cdnaAsProteinWithIntrons() throws IOException
+ {
+ /*
+ * Load alignments and add mappings for cDNA to protein
+ */
+ AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
+ AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList ml = new MapList(new int[]
+ { 1, 12 }, new int[]
+ { 1, 4 }, 3, 1);
+ acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml);
+ acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml);
+ al2.addCodonFrame(acf);
+
+ al1.alignAs(al2);
+ assertEquals("AC-G---G--CUC-CA------A-CT", al1.getSequenceAt(0)
+ .getSequenceAsString());
+ assertEquals("---CG-T---TA--ACG---A---AGT", al1.getSequenceAt(1)
+ .getSequenceAsString());
+ }
+
+ /**
+ * Test aligning dna as per protein alignment, for the case where there are
+ * introns (i.e. some dna sites have no mapping from a peptide).
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testAlignAs_dnaAsProtein_withIntrons() throws IOException
+ {
+ /*
+ * Load alignments and add mappings for cDNA to protein
+ */
+ String dna1 = "A-Aa-gG-GCC-cT-TT";
+ String dna2 = "c--CCGgg-TT--T-AA-A";
+ AlignmentI al1 = loadAlignment(">Seq1\n" + dna1 + "\n>Seq2\n" + dna2
+ + "\n", "FASTA");
+ AlignmentI al2 = loadAlignment(">Seq1\n-P--YK\n>Seq2\nG-T--F\n",
+ "FASTA");
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ // Seq1 has intron at dna positions 3,4,9 so splice is AAG GCC TTT
+ // Seq2 has intron at dna positions 1,5,6 so splice is CCG TTT AAA
+ MapList ml1 = new MapList(new int[]
+ { 1, 2, 5, 8, 10, 12 }, new int[]
+ { 1, 3 }, 3, 1);
+ acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml1);
+ MapList ml2 = new MapList(new int[]
+ { 2, 4, 7, 12 }, new int[]
+ { 1, 3 }, 3, 1);
+ acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml2);
+ al2.addCodonFrame(acf);
+
+ /*
+ * Align ignoring gaps in dna introns and exons
+ */
+ ((Alignment) al1).alignAs(al2, false, false);
+ assertEquals("---AAagG------GCCcTTT", al1.getSequenceAt(0)
+ .getSequenceAsString());
+ assertEquals("cCCGgg---TTT------AAA", al1.getSequenceAt(1)
+ .getSequenceAsString());
+
+ /*
+ * Reset and realign, preserving gaps in dna introns and exons
+ */
+ al1.getSequenceAt(0).setSequence(dna1);
+ al1.getSequenceAt(1).setSequence(dna2);
+ ((Alignment) al1).alignAs(al2, true, true);
+ // String dna1 = "A-Aa-gG-GCC-cT-TT";
+ // String dna2 = "c--CCGgg-TT--T-AA-A";
+ // assumption: we include 'the greater of' protein/dna gap lengths, not both
+ assertEquals("---A-Aa-gG------GCC-cT-TT", al1.getSequenceAt(0)
+ .getSequenceAsString());
+ assertEquals("c--CCGgg---TT--T------AA-A", al1.getSequenceAt(1)
.getSequenceAsString());
}
}