import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
@Test(groups = { "Functional" })
public void testMakeCdsAlignment()
{
+ /*
+ * scenario:
+ * dna1 --> [4, 6] [10,12] --> pep1
+ * dna2 --> [1, 3] [7, 9] [13,15] --> pep1
+ */
SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
SequenceI pep1 = new Sequence("pep1", "GF");
SequenceI pep2 = new Sequence("pep2", "GFP");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
+ pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
dna1.createDatasetSequence();
dna2.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
dna.setDataset(null);
+ /*
+ * need a sourceDbRef if we are to construct dbrefs to the CDS
+ * sequence
+ */
+ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
+ dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
+ dna2.getDatasetSequence().setSourceDBRef(dbref);
+
+ /*
+ * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
+ * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
+ */
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
* execute method under test:
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, dna.getDataset());
+ dna1, dna2 }, dna.getDataset(), null);
+ /*
+ * verify cds sequences
+ */
assertEquals(2, cds.getSequences().size());
assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
* verify shared, extended alignment dataset
*/
assertSame(dna.getDataset(), cds.getDataset());
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(0).getDatasetSequence()));
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(1).getDatasetSequence()));
+ SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
+ SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
+ assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
+ assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
+
+ /*
+ * verify CDS has a dbref with mapping to peptide
+ */
+ assertNotNull(cds1Dss.getDBRefs());
+ assertEquals(1, cds1Dss.getDBRefs().length);
+ dbref = cds1Dss.getDBRefs()[0];
+ assertEquals("UNIPROT", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("pep1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
+ MapList cdsMapping = new MapList(new int[] { 1, 6 },
+ new int[] { 1, 2 }, 3, 1);
+ assertEquals(cdsMapping, dbref.getMap().getMap());
/*
- * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
- * the mappings are on the shared alignment dataset
+ * verify peptide has added a dbref with reverse mapping to CDS
*/
- List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ assertNotNull(pep1.getDBRefs());
+ assertEquals(2, pep1.getDBRefs().length);
+ dbref = pep1.getDBRefs()[1];
+ assertEquals("ENSEMBL", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("CDS|dna1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(cds1Dss, dbref.getMap().getTo());
+ assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
+
/*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
* 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
*/
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
assertEquals(6, cdsMappings.size());
/*
SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
// map P to CCC
sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
}
pep1.createDatasetSequence();
pep2.createDatasetSequence();
pep3.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
- null));
pep1.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "2", "A12345"));
pep2.getDatasetSequence().addDBRef(
* execute method under test
*/
AlignmentI cdsal = AlignmentUtils.makeCdsAlignment(
- new SequenceI[] { dna1 }, dna.getDataset());
+ new SequenceI[] { dna1 }, dna.getDataset(), null);
/*
* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
SequenceI cdsSeq = cds.get(0);
assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12345", cdsSeq.getName());
- assertEquals("dna1|pep1", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(1);
assertEquals("aaaccc", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12346", cdsSeq.getName());
- assertEquals("dna1|pep2", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(2);
assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12347", cdsSeq.getName());
- assertEquals("dna1|pep3", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
dna3.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
dna.setDataset(null);
dna.addCodonFrame(acf);
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2, dna3 }, dna.getDataset());
+ dna1, dna2, dna3 }, dna.getDataset(), null);
List<SequenceI> cdsSeqs = cds.getSequences();
assertEquals(2, cdsSeqs.size());
assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
assertEquals('T', map.get(11).get(seq1).charValue());
assertEquals('T', map.get(12).get(seq1).charValue());
}
+
+ /**
+ * Test for the case where the products for which we want CDS are specified.
+ * This is to represent the case where EMBL has CDS mappings to both Uniprot
+ * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
+ * the protein sequences specified.
+ */
+ @Test(groups = { "Functional" })
+ public void testMakeCdsAlignment_filterProducts()
+ {
+ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+ SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
+ SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
+ SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
+ SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
+ dna1.createDatasetSequence();
+ dna2.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ pep3.createDatasetSequence();
+ pep4.createDatasetSequence();
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
+ dna.setDataset(null);
+ AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
+ emblPeptides.setDataset(null);
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ new int[] { 1, 2 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ acf = new AlignedCodonFrame();
+ map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ 3, 1);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ /*
+ * execute method under test to find CDS for EMBL peptides only
+ */
+ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+ dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray());
+
+ assertEquals(2, cds.getSequences().size());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * verify shared, extended alignment dataset
+ */
+ assertSame(dna.getDataset(), cds.getDataset());
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(0).getDatasetSequence()));
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(1).getDatasetSequence()));
+
+ /*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
+ */
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ /*
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
+ */
+ assertEquals(6, cdsMappings.size());
+
+ /*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep3 (dna to pep3, cds to pep3)
+ * Mapping from pep3 to GGGTTT in first new exon sequence
+ */
+ List<AlignedCodonFrame> pep3Mappings = MappingUtils
+ .findMappingsForSequence(pep3, cdsMappings);
+ assertEquals(2, pep3Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
+ assertEquals(1, mappings.size());
+
+ // map G to GGG
+ SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ Match m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+
+ /*
+ * Two mappings involve pep4 (dna to pep4, cds to pep4)
+ * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
+ */
+ List<AlignedCodonFrame> pep4Mappings = MappingUtils
+ .findMappingsForSequence(pep4, cdsMappings);
+ assertEquals(2, pep4Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep4Mappings);
+ assertEquals(1, mappings.size());
+ // map G to GGG
+ sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ // map P to CCC
+ sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ }
}