From: Jim Procter Date: Wed, 25 Oct 2017 16:23:03 +0000 (+0100) Subject: Merge branch 'bug/JAL-2617mappedStopCodon' into develop X-Git-Tag: Release_2_10_3b1~68 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=fbab824503f3746682d8bac0327e5ffe0e0fb8a5;hp=2a27ba6c3c4a450f17eae24c592a565a62c1821f;p=jalview.git Merge branch 'bug/JAL-2617mappedStopCodon' into develop --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 2b9b9f9..90d9197 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -2164,7 +2164,10 @@ public class AlignmentUtils /** * Returns a mapping from dna to protein by inspecting sequence features of - * type "CDS" on the dna. + * type "CDS" on the dna. A mapping is constructed if the total CDS feature + * length is 3 times the peptide length (optionally after dropping a trailing + * stop codon). This method does not check whether the CDS nucleotide sequence + * translates to the peptide sequence. * * @param dnaSeq * @param proteinSeq @@ -2176,6 +2179,15 @@ public class AlignmentUtils List ranges = findCdsPositions(dnaSeq); int mappedDnaLength = MappingUtils.getLength(ranges); + /* + * if not a whole number of codons, something is wrong, + * abort mapping + */ + if (mappedDnaLength % CODON_LENGTH > 0) + { + return null; + } + int proteinLength = proteinSeq.getLength(); int proteinStart = proteinSeq.getStart(); int proteinEnd = proteinSeq.getEnd(); @@ -2199,8 +2211,12 @@ public class AlignmentUtils if (codesForResidues == (proteinLength + 1)) { // assuming extra codon is for STOP and not in peptide + // todo: check trailing codon is indeed a STOP codon codesForResidues--; + mappedDnaLength -= CODON_LENGTH; + MappingUtils.removeEndPositions(CODON_LENGTH, ranges); } + if (codesForResidues == proteinLength) { proteinRange.add(new int[] { proteinStart, proteinEnd }); @@ -2211,7 +2227,7 @@ public class AlignmentUtils /** * Returns a list of CDS ranges found (as sequence positions base 1), i.e. of - * start/end positions of sequence features of type "CDS" (or a sub-type of + * [start, end] positions of sequence features of type "CDS" (or a sub-type of * CDS in the Sequence Ontology). The ranges are sorted into ascending start * position order, so this method is only valid for linear CDS in the same * sense as the protein product. @@ -2230,7 +2246,6 @@ public class AlignmentUtils return result; } SequenceFeatures.sortFeatures(sfs, true); - int startPhase = 0; for (SequenceFeature sf : sfs) { @@ -2248,7 +2263,7 @@ public class AlignmentUtils */ int begin = sf.getBegin(); int end = sf.getEnd(); - if (result.isEmpty()) + if (result.isEmpty() && phase > 0) { begin += phase; if (begin > end) @@ -2263,16 +2278,6 @@ public class AlignmentUtils } /* - * remove 'startPhase' positions (usually 0) from the first range - * so we begin at the start of a complete codon - */ - if (!result.isEmpty()) - { - // TODO JAL-2022 correctly model start phase > 0 - result.get(0)[0] += startPhase; - } - - /* * Finally sort ranges by start position. This avoids a dependency on * keeping features in order on the sequence (if they are in order anyway, * the sort will have almost no work to do). The implicit assumption is CDS diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 3682239..9c5c109 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -939,4 +939,55 @@ public final class MappingUtils } return copy; } + + /** + * Removes the specified number of positions from the given ranges. Provided + * to allow a stop codon to be stripped from a CDS sequence so that it matches + * the peptide translation length. + * + * @param positions + * @param ranges + * a list of (single) [start, end] ranges + * @return + */ + public static void removeEndPositions(int positions, + List ranges) + { + int toRemove = positions; + Iterator it = new ReverseListIterator<>(ranges); + while (toRemove > 0) + { + int[] endRange = it.next(); + if (endRange.length != 2) + { + /* + * not coded for [start1, end1, start2, end2, ...] + */ + System.err + .println("MappingUtils.removeEndPositions doesn't handle multiple ranges"); + return; + } + + int length = endRange[1] - endRange[0] + 1; + if (length <= 0) + { + /* + * not coded for a reverse strand range (end < start) + */ + System.err + .println("MappingUtils.removeEndPositions doesn't handle reverse strand"); + return; + } + if (length > toRemove) + { + endRange[1] -= toRemove; + toRemove = 0; + } + else + { + toRemove -= length; + it.remove(); + } + } + } } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 4439bb9..06b51e6 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -2533,4 +2533,71 @@ public class AlignmentUtilsTests assertEquals(s_as3, uas3.getSequenceAsString()); } + /** + * Tests for the method that maps nucleotide to protein based on CDS features + */ + @Test(groups = "Functional") + public void testMapCdsToProtein() + { + SequenceI peptide = new Sequence("pep", "KLQ"); + + /* + * Case 1: CDS 3 times length of peptide + * NB method only checks lengths match, not translation + */ + SequenceI dna = new Sequence("dna", "AACGacgtCTCCT"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null)); + MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); + + /* + * Case 2: CDS 3 times length of peptide + stop codon + * (note code does not currently check trailing codon is a stop codon) + */ + dna = new Sequence("dna", "AACGacgtCTCCTTGA"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); + + /* + * Case 3: CDS not 3 times length of peptide - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCCTTG"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 4: incomplete start codon corresponding to X in peptide + */ + dna = new Sequence("dna", "ACGacgtCTCCTTGG"); + dna.createDatasetSequence(); + SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null); + sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT) + dna.addSequenceFeature(sf); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null)); + peptide = new Sequence("pep", "XLQ"); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertEquals("[[2, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[3, 3], [8, 12]]", + Arrays.deepToString(ml.getFromRanges().toArray())); + } + } diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index d0ec3e8..5226819 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -1149,4 +1149,49 @@ public class MappingUtilsTest assertEquals("[12, 11, 8, 4]", Arrays.toString(ranges)); } + @Test(groups = "Functional") + public void testRemoveEndPositions() + { + List ranges = new ArrayList<>(); + + /* + * case 1: truncate last range + */ + ranges.add(new int[] { 1, 10 }); + ranges.add(new int[] { 20, 30 }); + MappingUtils.removeEndPositions(5, ranges); + assertEquals(2, ranges.size()); + assertEquals(25, ranges.get(1)[1]); + + /* + * case 2: remove last range + */ + ranges.clear(); + ranges.add(new int[] { 1, 10 }); + ranges.add(new int[] { 20, 22 }); + MappingUtils.removeEndPositions(3, ranges); + assertEquals(1, ranges.size()); + assertEquals(10, ranges.get(0)[1]); + + /* + * case 3: truncate penultimate range + */ + ranges.clear(); + ranges.add(new int[] { 1, 10 }); + ranges.add(new int[] { 20, 21 }); + MappingUtils.removeEndPositions(3, ranges); + assertEquals(1, ranges.size()); + assertEquals(9, ranges.get(0)[1]); + + /* + * case 4: remove last two ranges + */ + ranges.clear(); + ranges.add(new int[] { 1, 10 }); + ranges.add(new int[] { 20, 20 }); + ranges.add(new int[] { 30, 30 }); + MappingUtils.removeEndPositions(3, ranges); + assertEquals(1, ranges.size()); + assertEquals(9, ranges.get(0)[1]); + } }