import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
import jalview.io.AppletFormatAdapter;
import jalview.io.FormatAdapter;
+import jalview.util.MapList;
import java.io.IOException;
import java.util.Iterator;
"#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" +
"//";
- private static final String TEST_DATA2 =
- ">TEST21 test21\n" +
- "AC-GG--CUC-CAA-CT\n" +
- ">TEST22 test22\n" +
- "-CG-TTA--ACG---AAGT\n";
-
- private static final String TEST_DATA3 =
- ">TEST31 test31\n" +
+ private static final String AA_SEQS_1 =
+ ">Seq1Name\n" +
"K-QY--L\n" +
- ">TEST32 test32\n" +
+ ">Seq2Name\n" +
"-R-FP-W-\n";
- private static final String TEST_DATA4 =
- ">TEST41 test41\n" +
+ private static final String CDNA_SEQS_1 =
+ ">Seq1Name\n" +
+ "AC-GG--CUC-CAA-CT\n" +
+ ">Seq2Name\n" +
+ "-CG-TTA--ACG---AAGT\n";
+
+ private static final String CDNA_SEQS_2 =
+ ">Seq1Name\n" +
"GCTCGUCGTACT\n" +
- ">TEST42 test42\n" +
+ ">Seq2Name\n" +
"GGGTCAGGCAGT\n";
// @formatter:on
- private Alignment al;
+ private AlignmentI al;
+
+ /**
+ * Helper method to load an alignment and ensure dataset sequences are set up.
+ *
+ * @param data
+ * @param format
+ * TODO
+ * @return
+ * @throws IOException
+ */
+ protected AlignmentI loadAlignment(final String data, String format)
+ throws IOException
+ {
+ Alignment a = new FormatAdapter().readFile(data,
+ AppletFormatAdapter.PASTE, format);
+ a.setDataset(null);
+ return a;
+ }
/*
* Read in Stockholm format test data including secondary structure
@Before
public void setUp() throws IOException
{
- al = new FormatAdapter().readFile(TEST_DATA,
- AppletFormatAdapter.PASTE, "STH");
+ al = loadAlignment(TEST_DATA, "STH");
int i = 0;
for (AlignmentAnnotation ann : al.getAlignmentAnnotation())
{
- ann.setCalcId("CalcIdFor"
- + al.getSequenceAt(i).getName());
+ ann.setCalcId("CalcIdFor" + al.getSequenceAt(i).getName());
i++;
}
}
assertFalse(iter.hasNext());
}
- /**
- * Tests for method that checks for alignment 'mappability'.
- *
- * @throws IOException
- */
@Test
- public void testIsMappableTo() throws IOException
+ public void testDeleteAllAnnotations_includingAutocalculated()
{
- al = new FormatAdapter().readFile(TEST_DATA2,
- AppletFormatAdapter.PASTE, "FASTA");
- al.setDataset(null);
-
- // not mappable to self
- assertFalse(al.isMappableTo(al));
-
- // dna mappable to protein and vice versa
- AlignmentI alp = new FormatAdapter().readFile(TEST_DATA3,
- AppletFormatAdapter.PASTE, "FASTA");
- alp.setDataset(null);
- assertTrue(al.isMappableTo(alp));
- assertTrue(alp.isMappableTo(al));
- assertFalse(alp.isMappableTo(alp));
-
- // not mappable if any sequence length mismatch
- alp.getSequenceAt(1).setSequence("-R--FP-");
- alp.getSequenceAt(1).setDatasetSequence(new Sequence("", "RFP"));
- assertFalse(alp.isMappableTo(al));
- assertFalse(al.isMappableTo(alp));
-
- // not mappable if number of sequences differs
- alp.deleteSequence(1);
- assertFalse(alp.isMappableTo(al));
- assertFalse(al.isMappableTo(alp));
+ AlignmentAnnotation aa = new AlignmentAnnotation("Consensus",
+ "Consensus", 0.5);
+ aa.autoCalculated = true;
+ al.addAnnotation(aa);
+ AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
+ assertEquals("Wrong number of annotations before deleting", 4,
+ anns.length);
+ al.deleteAllAnnotations(true);
+ assertEquals("Not all deleted", 0, al.getAlignmentAnnotation().length);
+ }
+
+ @Test
+ public void testDeleteAllAnnotations_excludingAutocalculated()
+ {
+ AlignmentAnnotation aa = new AlignmentAnnotation("Consensus",
+ "Consensus", 0.5);
+ aa.autoCalculated = true;
+ al.addAnnotation(aa);
+ AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
+ assertEquals("Wrong number of annotations before deleting", 4,
+ anns.length);
+ al.deleteAllAnnotations(false);
+ assertEquals("Not just one annotation left", 1,
+ al.getAlignmentAnnotation().length);
}
/**
- * Tests for realigning as per a supplied alignment.
+ * Tests for realigning as per a supplied alignment: Dna as Dna.
+ *
+ * Note: AlignedCodonFrame's state variables are named for protein-to-cDNA
+ * mapping, but can be exploited for a general 'sequence-to-sequence' mapping
+ * as here.
*
* @throws IOException
*/
public void testAlignAs_dnaAsDna() throws IOException
{
// aligned cDNA:
- Alignment al1 = new FormatAdapter().readFile(TEST_DATA2,
- AppletFormatAdapter.PASTE, "FASTA");
- al1.setDataset(null);
+ AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
// unaligned cDNA:
- Alignment al2 = new FormatAdapter().readFile(TEST_DATA4,
- AppletFormatAdapter.PASTE, "FASTA");
- al2.setDataset(null);
+ AlignmentI al2 = loadAlignment(CDNA_SEQS_2, "FASTA");
- al2.alignAs(al1);
+ /*
+ * Make mappings between sequences. The 'aligned cDNA' is playing the role
+ * of what would normally be protein here.
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList ml = new MapList(new int[]
+ { 1, 12 }, new int[]
+ { 1, 12 }, 1, 1);
+ acf.addMap(al2.getSequenceAt(0), al1.getSequenceAt(0), ml);
+ acf.addMap(al2.getSequenceAt(1), al1.getSequenceAt(1), ml);
+ al1.addCodonFrame(acf);
+
+ ((Alignment) al2).alignAs(al1, false, true);
assertEquals("GC-TC--GUC-GTA-CT", al2.getSequenceAt(0)
.getSequenceAsString());
assertEquals("-GG-GTC--AGG---CAGT", al2.getSequenceAt(1)
}
/**
- * Aligning protein from cDNA yet to be implemented.
+ * Aligning protein from cDNA yet to be implemented, does nothing.
*
* @throws IOException
*/
@Test
public void testAlignAs_proteinAsCdna() throws IOException
{
- // aligned cDNA:
- Alignment al1 = new FormatAdapter().readFile(TEST_DATA2,
- AppletFormatAdapter.PASTE, "FASTA");
- al1.setDataset(null);
- // unaligned cDNA:
- Alignment al2 = new FormatAdapter().readFile(TEST_DATA3,
- AppletFormatAdapter.PASTE, "FASTA");
- al2.setDataset(null);
+ AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
+ AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
+ String before0 = al2.getSequenceAt(0).getSequenceAsString();
+ String before1 = al2.getSequenceAt(1).getSequenceAsString();
- try
- {
- al2.alignAs(al1);
- fail("No exception thrown");
- } catch (UnsupportedOperationException e)
- {
- // expected;
- }
+ ((Alignment) al2).alignAs(al1, false, true);
+ assertEquals(before0, al2.getSequenceAt(0).getSequenceAsString());
+ assertEquals(before1, al2.getSequenceAt(1).getSequenceAsString());
}
/**
@Test
public void testAlignAs_cdnaAsProtein() throws IOException
{
- // aligned cDNA:
- Alignment al1 = new FormatAdapter().readFile(TEST_DATA2,
- AppletFormatAdapter.PASTE, "FASTA");
- al1.setDataset(null);
- // unaligned cDNA:
- Alignment al2 = new FormatAdapter().readFile(TEST_DATA3,
- AppletFormatAdapter.PASTE, "FASTA");
- al2.setDataset(null);
+ /*
+ * Load alignments and add mappings for cDNA to protein
+ */
+ AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
+ AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList ml = new MapList(new int[]
+ { 1, 12 }, new int[]
+ { 1, 4 }, 3, 1);
+ acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml);
+ acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml);
+ al2.addCodonFrame(acf);
- al1.alignAs(al2);
+ /*
+ * Realign DNA; currently keeping existing gaps in introns only
+ */
+ ((Alignment) al1).alignAs(al2, false, true);
assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0)
.getSequenceAsString());
- assertEquals("---CGT---TAACGA---AGT---", al1.getSequenceAt(1)
+ assertEquals("---CGT---TAACGA---AGT", al1.getSequenceAt(1)
+ .getSequenceAsString());
+ }
+
+ /**
+ * Test aligning dna as per protein alignment, for the case where there are
+ * introns (i.e. some dna sites have no mapping from a peptide).
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testAlignAs_dnaAsProtein_withIntrons() throws IOException
+ {
+ /*
+ * Load alignments and add mappings for cDNA to protein
+ */
+ String dna1 = "A-Aa-gG-GCC-cT-TT";
+ String dna2 = "c--CCGgg-TT--T-AA-A";
+ AlignmentI al1 = loadAlignment(">Seq1\n" + dna1 + "\n>Seq2\n" + dna2
+ + "\n", "FASTA");
+ AlignmentI al2 = loadAlignment(">Seq1\n-P--YK\n>Seq2\nG-T--F\n",
+ "FASTA");
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ // Seq1 has intron at dna positions 3,4,9 so splice is AAG GCC TTT
+ // Seq2 has intron at dna positions 1,5,6 so splice is CCG TTT AAA
+ MapList ml1 = new MapList(new int[]
+ { 1, 2, 5, 8, 10, 12 }, new int[]
+ { 1, 3 }, 3, 1);
+ acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml1);
+ MapList ml2 = new MapList(new int[]
+ { 2, 4, 7, 12 }, new int[]
+ { 1, 3 }, 3, 1);
+ acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml2);
+ al2.addCodonFrame(acf);
+
+ /*
+ * Align ignoring gaps in dna introns and exons
+ */
+ ((Alignment) al1).alignAs(al2, false, false);
+ assertEquals("---AAagG------GCCcTTT", al1.getSequenceAt(0)
+ .getSequenceAsString());
+ // note 1 gap in protein corresponds to 'gg-' in DNA (3 positions)
+ assertEquals("cCCGgg-TTT------AAA", al1.getSequenceAt(1)
+ .getSequenceAsString());
+
+ /*
+ * Reset and realign, preserving gaps in dna introns and exons
+ */
+ al1.getSequenceAt(0).setSequence(dna1);
+ al1.getSequenceAt(1).setSequence(dna2);
+ ((Alignment) al1).alignAs(al2, true, true);
+ // String dna1 = "A-Aa-gG-GCC-cT-TT";
+ // String dna2 = "c--CCGgg-TT--T-AA-A";
+ // assumption: we include 'the greater of' protein/dna gap lengths, not both
+ assertEquals("---A-Aa-gG------GCC-cT-TT", al1.getSequenceAt(0)
+ .getSequenceAsString());
+ assertEquals("c--CCGgg-TT--T------AA-A", al1.getSequenceAt(1)
.getSequenceAsString());
}
}