+
+ /**
+ * Test for the case where the products for which we want CDS are specified.
+ * This is to represent the case where EMBL has CDS mappings to both Uniprot
+ * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
+ * the protein sequences specified.
+ */
+ @Test(groups = { "Functional" })
+ public void testMakeCdsAlignment_filterProducts()
+ {
+ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+ SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
+ SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
+ SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
+ SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
+ dna1.createDatasetSequence();
+ dna2.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ pep3.createDatasetSequence();
+ pep4.createDatasetSequence();
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
+ dna.setDataset(null);
+ AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
+ emblPeptides.setDataset(null);
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ new int[] { 1, 2 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ acf = new AlignedCodonFrame();
+ map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ 3, 1);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ /*
+ * execute method under test to find CDS for EMBL peptides only
+ */
+ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+ dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray());
+
+ assertEquals(2, cds.getSequences().size());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * verify shared, extended alignment dataset
+ */
+ assertSame(dna.getDataset(), cds.getDataset());
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(0).getDatasetSequence()));
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(1).getDatasetSequence()));
+
+ /*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
+ */
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ /*
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
+ */
+ assertEquals(6, cdsMappings.size());
+
+ /*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep3 (dna to pep3, cds to pep3)
+ * Mapping from pep3 to GGGTTT in first new exon sequence
+ */
+ List<AlignedCodonFrame> pep3Mappings = MappingUtils
+ .findMappingsForSequence(pep3, cdsMappings);
+ assertEquals(2, pep3Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
+ assertEquals(1, mappings.size());
+
+ // map G to GGG
+ SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ Match m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+
+ /*
+ * Two mappings involve pep4 (dna to pep4, cds to pep4)
+ * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
+ */
+ List<AlignedCodonFrame> pep4Mappings = MappingUtils
+ .findMappingsForSequence(pep4, cdsMappings);
+ assertEquals(2, pep4Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep4Mappings);
+ assertEquals(1, mappings.size());
+ // map G to GGG
+ sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ // map P to CCC
+ sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ }
+
+ /**
+ * Test the method that just copies aligned sequences, provided all sequences
+ * to be aligned share the aligned sequence's dataset
+ */
+ @Test(groups = "Functional")
+ public void testAlignAsSameSequences()
+ {
+ SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
+ AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
+ ((Alignment) al1).createDatasetAlignment();
+
+ SequenceI dna3 = new Sequence(dna1);
+ SequenceI dna4 = new Sequence(dna2);
+ assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
+ assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
+ String seq1 = "-cc-GG-GT-TT--aaa";
+ dna3.setSequence(seq1);
+ String seq2 = "C--C-Cgg--gtt-tAA-A-";
+ dna4.setSequence(seq2);
+ AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
+ ((Alignment) al2).createDatasetAlignment();
+
+ assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
+ assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
+ assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * add another sequence to 'aligned' - should still succeed, since
+ * unaligned sequences still share a dataset with aligned sequences
+ */
+ SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
+ dna5.createDatasetSequence();
+ al2.addSequence(dna5);
+ assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
+ assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
+ assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * add another sequence to 'unaligned' - should fail, since now not
+ * all unaligned sequences share a dataset with aligned sequences
+ */
+ SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
+ dna6.createDatasetSequence();
+ al1.addSequence(dna6);
+ // JAL-2110 JBP Comment: what's the use case for this behaviour ?
+ assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
+ }
+
+ @Test(groups = "Functional")
+ public void testAlignAsSameSequencesMultipleSubSeq()
+ {
+ SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
+ SequenceI as1 = dna1.deriveSequence();
+ SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7);
+ SequenceI as3 = dna2.deriveSequence();
+ as1.insertCharAt(6, 5, '-');
+ String s_as1 = as1.getSequenceAsString();
+ as2.insertCharAt(6, 5, '-');
+ String s_as2 = as2.getSequenceAsString();
+ as3.insertCharAt(6, 5, '-');
+ String s_as3 = as3.getSequenceAsString();
+ AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
+
+ // why do we need to cast this still ?
+ ((Alignment) aligned).createDatasetAlignment();
+ SequenceI uas1 = dna1.deriveSequence();
+ SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
+ SequenceI uas3 = dna2.deriveSequence();
+ AlignmentI tobealigned = new Alignment(new SequenceI[] { uas1, uas2,
+ uas3 });
+ ((Alignment) tobealigned).createDatasetAlignment();
+
+ assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
+ assertEquals(s_as1, uas1.getSequenceAsString());
+ assertEquals(s_as2, uas2.getSequenceAsString());
+ assertEquals(s_as3, uas3.getSequenceAsString());
+ }
+