import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
@Test(groups = { "Functional" })
public void testMakeCdsAlignment()
{
+ /*
+ * scenario:
+ * dna1 --> [4, 6] [10,12] --> pep1
+ * dna2 --> [1, 3] [7, 9] [13,15] --> pep1
+ */
SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
SequenceI pep1 = new Sequence("pep1", "GF");
SequenceI pep2 = new Sequence("pep2", "GFP");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
+ pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
dna1.createDatasetSequence();
dna2.createDatasetSequence();
pep1.createDatasetSequence();
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
dna.setDataset(null);
+ /*
+ * need a sourceDbRef if we are to construct dbrefs to the CDS
+ * sequence
+ */
+ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
+ dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
+ dna2.getDatasetSequence().setSourceDBRef(dbref);
+
+ /*
+ * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
+ * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
+ */
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
dna1, dna2 }, dna.getDataset(), null);
+ /*
+ * verify cds sequences
+ */
assertEquals(2, cds.getSequences().size());
assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
* verify shared, extended alignment dataset
*/
assertSame(dna.getDataset(), cds.getDataset());
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(0).getDatasetSequence()));
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(1).getDatasetSequence()));
+ SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
+ SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
+ assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
+ assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
+
+ /*
+ * verify CDS has a dbref with mapping to peptide
+ */
+ assertNotNull(cds1Dss.getDBRefs());
+ assertEquals(1, cds1Dss.getDBRefs().length);
+ dbref = cds1Dss.getDBRefs()[0];
+ assertEquals("UNIPROT", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("pep1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
+ MapList cdsMapping = new MapList(new int[] { 1, 6 },
+ new int[] { 1, 2 }, 3, 1);
+ assertEquals(cdsMapping, dbref.getMap().getMap());
/*
- * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
- * the mappings are on the shared alignment dataset
+ * verify peptide has added a dbref with reverse mapping to CDS
*/
- List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ assertNotNull(pep1.getDBRefs());
+ assertEquals(2, pep1.getDBRefs().length);
+ dbref = pep1.getDBRefs()[1];
+ assertEquals("ENSEMBL", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("CDS|dna1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(cds1Dss, dbref.getMap().getTo());
+ assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
+
/*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
* 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
*/
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
assertEquals(6, cdsMappings.size());
/*
SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
assertEquals(1, sr.getResults().size());
Match m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
// map P to CCC
sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
}
SequenceI cdsSeq = cds.get(0);
assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12345", cdsSeq.getName());
- assertEquals("dna1|pep1", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(1);
assertEquals("aaaccc", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12346", cdsSeq.getName());
- assertEquals("dna1|pep2", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(2);
assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12347", cdsSeq.getName());
- assertEquals("dna1|pep3", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
* execute method under test to find CDS for EMBL peptides only
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, dna.getDataset(), emblPeptides);
+ dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray());
assertEquals(2, cds.getSequences().size());
assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());