dna2.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
dna.setDataset(null);
* execute method under test:
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, dna.getDataset());
+ dna1, dna2 }, dna.getDataset(), null);
assertEquals(2, cds.getSequences().size());
assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
pep1.createDatasetSequence();
pep2.createDatasetSequence();
pep3.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
- null));
pep1.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "2", "A12345"));
pep2.getDatasetSequence().addDBRef(
* execute method under test
*/
AlignmentI cdsal = AlignmentUtils.makeCdsAlignment(
- new SequenceI[] { dna1 }, dna.getDataset());
+ new SequenceI[] { dna1 }, dna.getDataset(), null);
/*
* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
dna3.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
dna.setDataset(null);
dna.addCodonFrame(acf);
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2, dna3 }, dna.getDataset());
+ dna1, dna2, dna3 }, dna.getDataset(), null);
List<SequenceI> cdsSeqs = cds.getSequences();
assertEquals(2, cdsSeqs.size());
assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
assertEquals('T', map.get(11).get(seq1).charValue());
assertEquals('T', map.get(12).get(seq1).charValue());
}
+
+ /**
+ * Test for the case where the products for which we want CDS are specified.
+ * This is to represent the case where EMBL has CDS mappings to both Uniprot
+ * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
+ * the protein sequences specified.
+ */
+ @Test(groups = { "Functional" })
+ public void testMakeCdsAlignment_filterProducts()
+ {
+ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+ SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
+ SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
+ SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
+ SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
+ dna1.createDatasetSequence();
+ dna2.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ pep3.createDatasetSequence();
+ pep4.createDatasetSequence();
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
+ dna.setDataset(null);
+ AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
+ emblPeptides.setDataset(null);
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ new int[] { 1, 2 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ acf = new AlignedCodonFrame();
+ map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ 3, 1);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ /*
+ * execute method under test to find CDS for EMBL peptides only
+ */
+ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+ dna1, dna2 }, dna.getDataset(), emblPeptides);
+
+ assertEquals(2, cds.getSequences().size());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * verify shared, extended alignment dataset
+ */
+ assertSame(dna.getDataset(), cds.getDataset());
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(0).getDatasetSequence()));
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(1).getDatasetSequence()));
+
+ /*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
+ */
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ /*
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
+ */
+ assertEquals(6, cdsMappings.size());
+
+ /*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep3 (dna to pep3, cds to pep3)
+ * Mapping from pep3 to GGGTTT in first new exon sequence
+ */
+ List<AlignedCodonFrame> pep3Mappings = MappingUtils
+ .findMappingsForSequence(pep3, cdsMappings);
+ assertEquals(2, pep3Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
+ assertEquals(1, mappings.size());
+
+ // map G to GGG
+ SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ Match m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+
+ /*
+ * Two mappings involve pep4 (dna to pep4, cds to pep4)
+ * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
+ */
+ List<AlignedCodonFrame> pep4Mappings = MappingUtils
+ .findMappingsForSequence(pep4, cdsMappings);
+ assertEquals(2, pep4Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep4Mappings);
+ assertEquals(1, mappings.size());
+ // map G to GGG
+ sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ // map P to CCC
+ sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ }
}