/**
* Returns a mapping from dna to protein by inspecting sequence features of
- * type "CDS" on the dna.
+ * type "CDS" on the dna. A mapping is constructed if the total CDS feature
+ * length is 3 times the peptide length (optionally after dropping a trailing
+ * stop codon). This method does not check whether the CDS nucleotide sequence
+ * translates to the peptide sequence.
*
* @param dnaSeq
* @param proteinSeq
List<int[]> ranges = findCdsPositions(dnaSeq);
int mappedDnaLength = MappingUtils.getLength(ranges);
+ /*
+ * if not a whole number of codons, something is wrong,
+ * abort mapping
+ */
+ if (mappedDnaLength % CODON_LENGTH > 0)
+ {
+ return null;
+ }
+
int proteinLength = proteinSeq.getLength();
int proteinStart = proteinSeq.getStart();
int proteinEnd = proteinSeq.getEnd();
if (codesForResidues == (proteinLength + 1))
{
// assuming extra codon is for STOP and not in peptide
+ // todo: check trailing codon is indeed a STOP codon
codesForResidues--;
mappedDnaLength -= CODON_LENGTH;
MappingUtils.removeEndPositions(CODON_LENGTH, ranges);
return result;
}
SequenceFeatures.sortFeatures(sfs, true);
- int startPhase = 0;
for (SequenceFeature sf : sfs)
{
*/
int begin = sf.getBegin();
int end = sf.getEnd();
- if (result.isEmpty())
+ if (result.isEmpty() && phase > 0)
{
begin += phase;
if (begin > end)
}
/*
- * remove 'startPhase' positions (usually 0) from the first range
- * so we begin at the start of a complete codon
- */
- if (!result.isEmpty())
- {
- // TODO JAL-2022 correctly model start phase > 0
- result.get(0)[0] += startPhase;
- }
-
- /*
* Finally sort ranges by start position. This avoids a dependency on
* keeping features in order on the sequence (if they are in order anyway,
* the sort will have almost no work to do). The implicit assumption is CDS