/**
* Returns a mapping from dna to protein by inspecting sequence features of
- * type "CDS" on the dna.
+ * type "CDS" on the dna. A mapping is constructed if the total CDS feature
+ * length is 3 times the peptide length (optionally after dropping a trailing
+ * stop codon). This method does not check whether the CDS nucleotide sequence
+ * translates to the peptide sequence.
*
* @param dnaSeq
* @param proteinSeq
List<int[]> ranges = findCdsPositions(dnaSeq);
int mappedDnaLength = MappingUtils.getLength(ranges);
+ /*
+ * if not a whole number of codons, something is wrong,
+ * abort mapping
+ */
+ if (mappedDnaLength % CODON_LENGTH > 0)
+ {
+ return null;
+ }
+
int proteinLength = proteinSeq.getLength();
int proteinStart = proteinSeq.getStart();
int proteinEnd = proteinSeq.getEnd();
if (codesForResidues == (proteinLength + 1))
{
// assuming extra codon is for STOP and not in peptide
+ // todo: check trailing codon is indeed a STOP codon
codesForResidues--;
+ mappedDnaLength -= CODON_LENGTH;
+ MappingUtils.removeEndPositions(CODON_LENGTH, ranges);
}
+
if (codesForResidues == proteinLength)
{
proteinRange.add(new int[] { proteinStart, proteinEnd });
/**
* Returns a list of CDS ranges found (as sequence positions base 1), i.e. of
- * start/end positions of sequence features of type "CDS" (or a sub-type of
+ * [start, end] positions of sequence features of type "CDS" (or a sub-type of
* CDS in the Sequence Ontology). The ranges are sorted into ascending start
* position order, so this method is only valid for linear CDS in the same
* sense as the protein product.
return result;
}
SequenceFeatures.sortFeatures(sfs, true);
- int startPhase = 0;
for (SequenceFeature sf : sfs)
{
*/
int begin = sf.getBegin();
int end = sf.getEnd();
- if (result.isEmpty())
+ if (result.isEmpty() && phase > 0)
{
begin += phase;
if (begin > end)
}
/*
- * remove 'startPhase' positions (usually 0) from the first range
- * so we begin at the start of a complete codon
- */
- if (!result.isEmpty())
- {
- // TODO JAL-2022 correctly model start phase > 0
- result.get(0)[0] += startPhase;
- }
-
- /*
* Finally sort ranges by start position. This avoids a dependency on
* keeping features in order on the sequence (if they are in order anyway,
* the sort will have almost no work to do). The implicit assumption is CDS
}
return copy;
}
+
+ /**
+ * Removes the specified number of positions from the given ranges. Provided
+ * to allow a stop codon to be stripped from a CDS sequence so that it matches
+ * the peptide translation length.
+ *
+ * @param positions
+ * @param ranges
+ * a list of (single) [start, end] ranges
+ * @return
+ */
+ public static void removeEndPositions(int positions,
+ List<int[]> ranges)
+ {
+ int toRemove = positions;
+ Iterator<int[]> it = new ReverseListIterator<>(ranges);
+ while (toRemove > 0)
+ {
+ int[] endRange = it.next();
+ if (endRange.length != 2)
+ {
+ /*
+ * not coded for [start1, end1, start2, end2, ...]
+ */
+ System.err
+ .println("MappingUtils.removeEndPositions doesn't handle multiple ranges");
+ return;
+ }
+
+ int length = endRange[1] - endRange[0] + 1;
+ if (length <= 0)
+ {
+ /*
+ * not coded for a reverse strand range (end < start)
+ */
+ System.err
+ .println("MappingUtils.removeEndPositions doesn't handle reverse strand");
+ return;
+ }
+ if (length > toRemove)
+ {
+ endRange[1] -= toRemove;
+ toRemove = 0;
+ }
+ else
+ {
+ toRemove -= length;
+ it.remove();
+ }
+ }
+ }
}
assertEquals(s_as3, uas3.getSequenceAsString());
}
+ /**
+ * Tests for the method that maps nucleotide to protein based on CDS features
+ */
+ @Test(groups = "Functional")
+ public void testMapCdsToProtein()
+ {
+ SequenceI peptide = new Sequence("pep", "KLQ");
+
+ /*
+ * Case 1: CDS 3 times length of peptide
+ * NB method only checks lengths match, not translation
+ */
+ SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
+ MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 2: CDS 3 times length of peptide + stop codon
+ * (note code does not currently check trailing codon is a stop codon)
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTGA");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 3: CDS not 3 times length of peptide - no mapping is made
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTG");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertNull(ml);
+
+ /*
+ * Case 4: incomplete start codon corresponding to X in peptide
+ */
+ dna = new Sequence("dna", "ACGacgtCTCCTTGG");
+ dna.createDatasetSequence();
+ SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
+ sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
+ dna.addSequenceFeature(sf);
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
+ peptide = new Sequence("pep", "XLQ");
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals("[[2, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[3, 3], [8, 12]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+ }
+
}
assertEquals("[12, 11, 8, 4]", Arrays.toString(ranges));
}
+ @Test(groups = "Functional")
+ public void testRemoveEndPositions()
+ {
+ List<int[]> ranges = new ArrayList<>();
+
+ /*
+ * case 1: truncate last range
+ */
+ ranges.add(new int[] { 1, 10 });
+ ranges.add(new int[] { 20, 30 });
+ MappingUtils.removeEndPositions(5, ranges);
+ assertEquals(2, ranges.size());
+ assertEquals(25, ranges.get(1)[1]);
+
+ /*
+ * case 2: remove last range
+ */
+ ranges.clear();
+ ranges.add(new int[] { 1, 10 });
+ ranges.add(new int[] { 20, 22 });
+ MappingUtils.removeEndPositions(3, ranges);
+ assertEquals(1, ranges.size());
+ assertEquals(10, ranges.get(0)[1]);
+
+ /*
+ * case 3: truncate penultimate range
+ */
+ ranges.clear();
+ ranges.add(new int[] { 1, 10 });
+ ranges.add(new int[] { 20, 21 });
+ MappingUtils.removeEndPositions(3, ranges);
+ assertEquals(1, ranges.size());
+ assertEquals(9, ranges.get(0)[1]);
+
+ /*
+ * case 4: remove last two ranges
+ */
+ ranges.clear();
+ ranges.add(new int[] { 1, 10 });
+ ranges.add(new int[] { 20, 20 });
+ ranges.add(new int[] { 30, 30 });
+ MappingUtils.removeEndPositions(3, ranges);
+ assertEquals(1, ranges.size());
+ assertEquals(9, ranges.get(0)[1]);
+ }
}