/**
* Returns a mapping from dna to protein by inspecting sequence features of
- * type "CDS" on the dna.
+ * type "CDS" on the dna. A mapping is constructed if the total CDS feature
+ * length is 3 times the peptide length (optionally after dropping a trailing
+ * stop codon). This method does not check whether the CDS nucleotide sequence
+ * translates to the peptide sequence.
*
* @param dnaSeq
* @param proteinSeq
List<int[]> ranges = findCdsPositions(dnaSeq);
int mappedDnaLength = MappingUtils.getLength(ranges);
+ /*
+ * if not a whole number of codons, something is wrong,
+ * abort mapping
+ */
+ if (mappedDnaLength % CODON_LENGTH > 0)
+ {
+ return null;
+ }
+
int proteinLength = proteinSeq.getLength();
int proteinStart = proteinSeq.getStart();
int proteinEnd = proteinSeq.getEnd();
if (codesForResidues == (proteinLength + 1))
{
// assuming extra codon is for STOP and not in peptide
+ // todo: check trailing codon is indeed a STOP codon
codesForResidues--;
mappedDnaLength -= CODON_LENGTH;
MappingUtils.removeEndPositions(CODON_LENGTH, ranges);
return result;
}
SequenceFeatures.sortFeatures(sfs, true);
- int startPhase = 0;
for (SequenceFeature sf : sfs)
{
*/
int begin = sf.getBegin();
int end = sf.getEnd();
- if (result.isEmpty())
+ if (result.isEmpty() && phase > 0)
{
begin += phase;
if (begin > end)
}
/*
- * remove 'startPhase' positions (usually 0) from the first range
- * so we begin at the start of a complete codon
- */
- if (!result.isEmpty())
- {
- // TODO JAL-2022 correctly model start phase > 0
- result.get(0)[0] += startPhase;
- }
-
- /*
* Finally sort ranges by start position. This avoids a dependency on
* keeping features in order on the sequence (if they are in order anyway,
* the sort will have almost no work to do). The implicit assumption is CDS
assertEquals(s_as3, uas3.getSequenceAsString());
}
+ /**
+ * Tests for the method that maps nucleotide to protein based on CDS features
+ */
+ @Test(groups = "Functional")
+ public void testMapCdsToProtein()
+ {
+ SequenceI peptide = new Sequence("pep", "KLQ");
+
+ /*
+ * Case 1: CDS 3 times length of peptide
+ * NB method only checks lengths match, not translation
+ */
+ SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
+ MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 2: CDS 3 times length of peptide + stop codon
+ * (note code does not currently check trailing codon is a stop codon)
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTGA");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 3: CDS not 3 times length of peptide - no mapping is made
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTG");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertNull(ml);
+
+ /*
+ * Case 4: incomplete start codon corresponding to X in peptide
+ */
+ dna = new Sequence("dna", "ACGacgtCTCCTTGG");
+ dna.createDatasetSequence();
+ SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
+ sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
+ dna.addSequenceFeature(sf);
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
+ peptide = new Sequence("pep", "XLQ");
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals("[[2, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[3, 3], [8, 12]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+ }
+
}