From: gmungoc Date: Thu, 12 Nov 2020 10:03:02 +0000 (+0000) Subject: JAL-3761 locateInFrom2 work in progress commit X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d5289472f112190875559c98fc184d64fa7d5072;p=jalview.git JAL-3761 locateInFrom2 work in progress commit --- diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index e095106..2e673e0 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -562,7 +562,7 @@ public class MapList List shiftFrom, int toRatio) { // TODO: javadoc; tests - int[] fromCount = countPos(shiftTo, pos); + int[] fromCount = countPositions(shiftTo, pos); if (fromCount == null) { return null; @@ -572,27 +572,41 @@ public class MapList int[] toPos = traverseToPosition(shiftFrom, toCount); if (toPos == null) { - return null; // throw new Error("Bad Mapping!"); + return null; } - // System.out.println(fromCount[0]+" "+fromCount[1]+" "+toCount); return new int[] { toPos[0], fromRemainder, toPos[1] }; } /** - * count how many positions pos is along the series of intervals. + * Counts how many positions pos is along the series of intervals. Returns an + * array of two values: + * + * Returns null if {@code pos} does not lie in any of the given intervals. * - * @param shiftTo + * @param intervals + * a list of start-end intervals * @param pos - * @return number of positions or null if pos is not within intervals + * a position that may lie in one (or more) of the intervals + * @return */ - protected static int[] countPos(List shiftTo, int pos) + protected static int[] countPositions(List intervals, int pos) { - int count = 0, intv[], iv = 0, ivSize = shiftTo.size(); + int count = 0; + int iv = 0; + int ivSize = intervals.size(); + while (iv < ivSize) { - intv = shiftTo.get(iv++); + int[] intv = intervals.get(iv++); if (intv[0] <= intv[1]) { + /* + * forwards interval + */ if (pos >= intv[0] && pos <= intv[1]) { return new int[] { count + pos - intv[0] + 1, +1 }; @@ -604,6 +618,9 @@ public class MapList } else { + /* + * reverse interval + */ if (pos >= intv[1] && pos <= intv[0]) { return new int[] { count + intv[0] - pos + 1, -1 }; @@ -690,7 +707,7 @@ public class MapList int fromStart[] = shiftTo(start); // needs to be inclusive of end of symbol position int fromEnd[] = shiftTo(end); - + System.out.println("locateInFrom"); return getIntervals(fromShifts, fromStart, fromEnd, fromRatio); } @@ -1225,4 +1242,231 @@ public class MapList { return fromShifts.size() == 1 && toShifts.size() == 1; } + + /** + * Returns the [start, end, start, end, ...] ranges in the 'from' range that + * map to the given start-end in the 'to' range. Returns null if either + * {@code start} or {@code end} is not a mapped 'to' range position. + * + * @param start + * @param end + * @return + */ + public int[] locateInFrom2(int start, int end) + { + List ranges = mapBetween(start, end, toShifts, fromShifts, + toRatio, fromRatio); + + // TODO: or just return the List and adjust calling code to match + return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges); + } + + /** + * Returns the [start, end, start, end, ...] ranges in the 'to' range that map + * to the given start-end in the 'from' range. Returns null if either + * {@code start} or {@code end} is not a mapped 'from' range position. + * + * @param start + * @param end + * @return + */ + public int[] locateInTo2(int start, int end) + { + List ranges = mapBetween(start, end, fromShifts, toShifts, + fromRatio, toRatio); + + return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges); + } + + /** + * A helper method for navigating the mapping. Returns a (possibly empty) list + * of [start-end] positions in {@code ranges2} that map to positions in + * {@code ranges1} between {@code start} and {@code end}. + * + * @param start + * @param end + * @param ranges1 + * @param ranges2 + * @param wordLength1 + * @param wordLength2 + * @return + */ + final static List mapBetween(int start, int end, + List ranges1, List ranges2, int wordLength1, + int wordLength2) + { + /* + * first traverse ranges1 and record count of mapped positions + * to any that overlap start-end + */ + List overlaps = findOverlapPositions(ranges1, start, end); + if (overlaps.isEmpty()) + { + return overlaps; + } + + /* + * convert positions to equivalent 'word' positions in ranges + */ + mapWords(overlaps, wordLength1, wordLength2); + + /* + * walk ranges2 and record the values found at + * the offsets in 'overlaps' + */ + List mapped = new ArrayList<>(); + final int s1 = overlaps.size(); + final int s2 = ranges2.size(); + int rangeIndex = 0; + int rangeOffset = 0; + int mappedCount = 0; + + for (int i = 0 ; i < s1 ; i++) + { + /* + * for each range in overlaps, walk ranges2 and record the values + * at the offsets, advancing rangeIndex / Offset + */ + int [] mappedRange = ranges2.get(rangeIndex); + int [] overlap = overlaps.get(s1); + while (mappedCount < overlap[1]) + { + + } + } + + return mapped; + } + + /** + * Converts the start-end positions (counted from zero) in the {@code ranges} + * list from one word length to another. Start-end positions are expanded if + * necessary to cover a whole word of length {@code wordLength1}. Positions + * are then divided by {@code wordLength1} and multiplied by + * {@code wordLength2} to give equivalent mapped words. + *

+ * Put simply, this converts peptide residue positions to the corresponding + * codon ranges, and codons - including partial codons - to the corresponding + * peptide positions; for example + * + *

+   * [1, 10] with word lengths 3:1 converts (as if bases [0-11]) to [1, 4]
+   * 
+ * + * @param ranges + * @param wordLength1 + * @param wordLength2 + * @return + */ + final static void mapWords(List ranges, int wordLength1, + int wordLength2) + { + if (wordLength1 == 1 && wordLength2 == 1) + { + return; // nothing to do here + } + int s = ranges.size(); + for (int i = 0; i < s; i++) + { + int[] range = ranges.get(i); + + /* + * expand range start to the start of a word, + * and convert to wordLength2 + */ + range[0] -= range[0] % wordLength1; + range[0] = range[0] / wordLength1 * wordLength2; + + /* + * similar calculation for range end, adding + * (wordLength2 - 1) for end of mapped word + */ + range[1] -= range[1] % wordLength1; + range[1] = range[1] / wordLength1 * wordLength2; + range[1] += wordLength2 - 1; + } + } + + /** + * Helper method that returns a (possibly empty) list of offsets in + * {@code ranges} to subranges that overlap {@code start-end}. The list + * returned holds counts of the number of positions traversed (inclusive) to + * reach the overlapping positions, not the overlapping values. Returns null + * if there are no overlaps. + * + * @param ranges + * @param start + * @param end + * @return + */ + final static List findOverlapPositions(List ranges, + int start, int end) + { + List positions = new ArrayList<>(); + int pos = 0; + int s = ranges.size(); + for (int i = 0; i < s; i++) + { + int[] range = ranges.get(i); + addOverlap(positions, pos, range, start, end); + pos += 1 + Math.abs(range[1] - range[0]); + } + return positions; + } + + /** + * A helper method that checks whether {@code range} overlaps + * {@code start-end}, and if so adds the positional offset of the overlap to + * {@code positions}. + * + * @param positions + * a list of map offsets to add to + * @param pos + * the number of mapped positions already visited + * @param range + * a from-to range (may be forward or reverse) + * @param start + * position to test for overlap in range + * @param end + * position to test for overlap in range + * @return + */ + final static void addOverlap(List positions, int pos, int[] range, + int start, int end) + { + if (range[1] >= range[0]) + { + /* + * forward direction range + */ + if (start <= range[1] && end >= range[0]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[0]); + int overlapEnd = Math.min(end, range[1]); + positions + .add(new int[] + { 1 + overlapStart - range[0], 1 + overlapEnd - range[0] }); + } + } + else + { + /* + * reverse direction range + */ + if (start <= range[0] && end >= range[1]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[1]); + int overlapEnd = Math.min(end, range[0]); + positions + .add(new int[] + { 1 + range[0] - overlapStart, 1 + range[0] - overlapEnd }); + } + } + } } diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index b552c21..4e07a08 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -20,6 +20,13 @@ */ package jalview.util; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + import jalview.analysis.AlignmentSorter; import jalview.api.AlignViewportI; import jalview.commands.CommandI; @@ -39,13 +46,6 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - /** * Helper methods for manipulations involving sequence mappings. * @@ -546,8 +546,7 @@ public final class MappingUtils while (regions.hasNext()) { mapHiddenColumns(regions.next(), codonFrames, newHidden, - fromSequences, - toSequences, fromGapChar); + fromSequences, toSequences, fromGapChar); } return; // mappedColumns; } @@ -965,7 +964,7 @@ public final class MappingUtils int min = Math.min(range[0], range[1]); int max = Math.max(range[0], range[1]); - + return (min <= queryRange[0] && max >= queryRange[0] && min <= queryRange[1] && max >= queryRange[1]); } @@ -980,8 +979,7 @@ public final class MappingUtils * a list of (single) [start, end] ranges * @return */ - public static void removeEndPositions(int positions, - List ranges) + public static void removeEndPositions(int positions, List ranges) { int toRemove = positions; Iterator it = new ReverseListIterator<>(ranges); @@ -993,8 +991,8 @@ public final class MappingUtils /* * not coded for [start1, end1, start2, end2, ...] */ - System.err - .println("MappingUtils.removeEndPositions doesn't handle multiple ranges"); + System.err.println( + "MappingUtils.removeEndPositions doesn't handle multiple ranges"); return; } @@ -1004,8 +1002,8 @@ public final class MappingUtils /* * not coded for a reverse strand range (end < start) */ - System.err - .println("MappingUtils.removeEndPositions doesn't handle reverse strand"); + System.err.println( + "MappingUtils.removeEndPositions doesn't handle reverse strand"); return; } if (length > toRemove) @@ -1020,4 +1018,25 @@ public final class MappingUtils } } } + + /** + * Converts a list of {@code start-end} ranges to a single array of + * {@code start1, end1, start2, ... } ranges + * + * @param ranges + * @return + */ + public static int[] rangeListToArray(List ranges) + { + int rangeCount = ranges.size(); + int[] result = new int[rangeCount * 2]; + int j = 0; + for (int i = 0; i < rangeCount; i++) + { + int[] range = ranges.get(i); + result[j++] = range[0]; + result[j++] = range[1]; + } + return result; + } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index bca778d..07da78f 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -276,6 +276,8 @@ public class MapListTest assertEquals("[4, 12]", Arrays.toString(ml.locateInFrom(2, 4))); assertEquals("[7, 12]", Arrays.toString(ml.locateInFrom(3, 4))); assertEquals("[10, 12]", Arrays.toString(ml.locateInFrom(4, 4))); + // reversing the range reverses the result: + assertEquals("[12, 7]", Arrays.toString(ml.locateInFrom(4, 3))); // fails with [10, 9] ! assertNull(ml.locateInFrom(0, 0)); assertNull(ml.locateInFrom(1, 5)); @@ -303,7 +305,17 @@ public class MapListTest assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4))); } - /** + @Test(groups = { "Functional" }) + public void testLocateInFrom_reverseStrand() + { + int[] codons = new int[] { 12, 1 }; + int[] protein = new int[] { 1, 4 }; + MapList ml = new MapList(codons, protein, 3, 1); + assertEquals("[12, 10]", Arrays.toString(ml.locateInFrom(1, 1))); + assertEquals("[9, 4]", Arrays.toString(ml.locateInFrom(2, 3))); + } + + /** * Tests for method that locates ranges in the 'to' map for given range in the * 'from' map. */ @@ -325,6 +337,8 @@ public class MapListTest assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(1, 12))); assertEquals("[2, 2]", Arrays.toString(ml.locateInTo(4, 6))); assertEquals("[2, 4]", Arrays.toString(ml.locateInTo(4, 12))); + // reversing the 'from' range reverses the result + assertEquals("[4, 2]", Arrays.toString(ml.locateInTo(12, 4))); /* * A part codon is treated as if a whole one. @@ -363,11 +377,12 @@ public class MapListTest * Can't map from an unmapped position */ assertNull(ml.locateInTo(1, 2)); + assertNull(ml.locateInTo(1, 4)); assertNull(ml.locateInTo(2, 4)); assertNull(ml.locateInTo(4, 4)); /* - * Valid range or subrange of codon1 maps to protein1. + * Valid range or subrange of codon1 maps to protein1 */ assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 2))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(3, 3))); @@ -380,7 +395,6 @@ public class MapListTest // codon positions 7 to 17 (part) cover proteins 2/3/4 at positions 3/4/6 assertEquals("[3, 4, 6, 6]", Arrays.toString(ml.locateInTo(7, 17))); - } /** @@ -966,6 +980,9 @@ public class MapListTest @Test(groups = { "Functional" }) public void testLocateInFrom_withOverlap() { + /* + * gene to protein... + */ int[] codons = new int[] { 1, 12, 12, 17 }; int[] protein = new int[] { 1, 6 }; MapList ml = new MapList(codons, protein, 3, 1); @@ -986,6 +1003,16 @@ public class MapListTest assertNull(ml.locateInFrom(0, 0)); assertNull(ml.locateInFrom(1, 7)); assertNull(ml.locateInFrom(-1, 1)); + + /* + * gene to CDS...from EMBL:MN908947 + */ + int [] gene = new int[] { 266, 13468, 13468, 21555 }; + int [] cds = new int[] { 1, 21291 }; + ml = new MapList(gene, cds, 1, 1); + assertEquals("[13468, 13468]", Arrays.toString(ml.locateInFrom(13203, 13203))); + assertEquals("[13468, 13468]", Arrays.toString(ml.locateInFrom(13204, 13204))); + assertEquals("[13468, 13468]", Arrays.toString(ml.locateInFrom(13203, 13204))); } /** @@ -994,6 +1021,9 @@ public class MapListTest @Test(groups = { "Functional" }) public void testLocateInTo_withOverlap() { + /* + * gene to protein... + */ int[] codons = new int[] { 1, 12, 12, 17 }; int[] protein = new int[] { 1, 6 }; MapList ml = new MapList(codons, protein, 3, 1); @@ -1011,6 +1041,14 @@ public class MapListTest assertNull(ml.locateInTo(0, 0)); assertNull(ml.locateInTo(1, 18)); assertNull(ml.locateInTo(-1, 1)); + + /* + * gene to CDS...from EMBL:MN908947 + */ + int [] gene = new int[] { 266, 13468, 13468, 21555 }; + int [] cds = new int[] { 1, 21291 }; + ml = new MapList(gene, cds, 1, 1); + assertEquals("[13203, 13204]", Arrays.toString(ml.locateInTo(13468, 13468))); } @Test(groups = { "Functional" }) @@ -1024,11 +1062,11 @@ public class MapListTest } @Test(groups = { "Functional" }) - public void testCountPos() + public void testCountPositions() { try { - MapList.countPos(null, 1); + MapList.countPositions(null, 1); fail("expected exception"); } catch (NullPointerException e) { @@ -1036,53 +1074,121 @@ public class MapListTest } List intervals = new ArrayList<>(); - assertNull(MapList.countPos(intervals, 1)); + assertNull(MapList.countPositions(intervals, 1)); /* * forward strand */ intervals.add(new int[] {10, 20}); - assertNull(MapList.countPos(intervals, 9)); - assertNull(MapList.countPos(intervals, 21)); - assertArrayEquals(new int[] {1, 1}, MapList.countPos(intervals, 10)); - assertArrayEquals(new int[] {6, 1}, MapList.countPos(intervals, 15)); - assertArrayEquals(new int[] {11, 1}, MapList.countPos(intervals, 20)); + assertNull(MapList.countPositions(intervals, 9)); + assertNull(MapList.countPositions(intervals, 21)); + assertArrayEquals(new int[] {1, 1}, MapList.countPositions(intervals, 10)); + assertArrayEquals(new int[] {6, 1}, MapList.countPositions(intervals, 15)); + assertArrayEquals(new int[] {11, 1}, MapList.countPositions(intervals, 20)); intervals.add(new int[] {25, 25}); - assertArrayEquals(new int[] {12, 1}, MapList.countPos(intervals, 25)); + assertArrayEquals(new int[] {12, 1}, MapList.countPositions(intervals, 25)); // next interval repeats position 25 - which should be counted twice if traversed intervals.add(new int[] {25, 26}); - assertArrayEquals(new int[] {12, 1}, MapList.countPos(intervals, 25)); - assertArrayEquals(new int[] {14, 1}, MapList.countPos(intervals, 26)); + assertArrayEquals(new int[] {12, 1}, MapList.countPositions(intervals, 25)); + assertArrayEquals(new int[] {14, 1}, MapList.countPositions(intervals, 26)); /* * reverse strand */ intervals.clear(); intervals.add(new int[] {5, -5}); - assertNull(MapList.countPos(intervals, 6)); - assertNull(MapList.countPos(intervals, -6)); - assertArrayEquals(new int[] {1, -1}, MapList.countPos(intervals, 5)); - assertArrayEquals(new int[] {7, -1}, MapList.countPos(intervals, -1)); - assertArrayEquals(new int[] {11, -1}, MapList.countPos(intervals, -5)); + assertNull(MapList.countPositions(intervals, 6)); + assertNull(MapList.countPositions(intervals, -6)); + assertArrayEquals(new int[] {1, -1}, MapList.countPositions(intervals, 5)); + assertArrayEquals(new int[] {7, -1}, MapList.countPositions(intervals, -1)); + assertArrayEquals(new int[] {11, -1}, MapList.countPositions(intervals, -5)); /* * reverse then forward */ intervals.add(new int[] {5, 10}); - assertArrayEquals(new int[] {13, 1}, MapList.countPos(intervals, 6)); + assertArrayEquals(new int[] {13, 1}, MapList.countPositions(intervals, 6)); /* * reverse then forward then reverse */ intervals.add(new int[] {-10, -20}); - assertArrayEquals(new int[] {20, -1}, MapList.countPos(intervals, -12)); + assertArrayEquals(new int[] {20, -1}, MapList.countPositions(intervals, -12)); /* * an interval [x, x] is treated as forward */ intervals.add(new int[] {30, 30}); - assertArrayEquals(new int[] {29, 1}, MapList.countPos(intervals, 30)); + assertArrayEquals(new int[] {29, 1}, MapList.countPositions(intervals, 30)); + + /* + * it is the first matched occurrence that is returned + */ + intervals.clear(); + intervals.add(new int[] {1, 2}); + intervals.add(new int[] {2, 3}); + assertArrayEquals(new int[] {2, 1}, MapList.countPositions(intervals, 2)); + intervals.add(new int[] {-1, -2}); + intervals.add(new int[] {-2, -3}); + assertArrayEquals(new int[] {6, -1}, MapList.countPositions(intervals, -2)); + } + + @Test(groups = { "Functional" }) + public void testFindOverlapPositions() + { + List ranges = new ArrayList<>(); + List overlaps = MapList.findOverlapPositions(ranges, 20, 30); + assertTrue(overlaps.isEmpty()); + ranges.add(new int[] {15, 25}); + overlaps = MapList.findOverlapPositions(ranges, 20, 30); + assertEquals(1, overlaps.size()); + assertArrayEquals(new int[] {6, 11}, overlaps.get(0)); + } + + @Test(groups = { "Functional" }) + public void testMapWords() + { + List ranges = new ArrayList<>(); + + /* + * 1:1 (trivial) case + */ + ranges.add(new int[] {2, 4}); + ranges.add(new int[] {6, 9}); + MapList.mapWords(ranges, 1, 1); + assertEquals(ranges.size(), 2); + assertArrayEquals(new int[] {2, 4}, ranges.get(0)); + assertArrayEquals(new int[] {6, 9}, ranges.get(1)); + + /* + * 1:3 case (peptide to codon ranges) + */ + MapList.mapWords(ranges, 1, 3); + assertEquals(ranges.size(), 2); + assertArrayEquals(new int[] {6, 14}, ranges.get(0)); + assertArrayEquals(new int[] {18, 29}, ranges.get(1)); + + /* + * 3:1 case (codon or part codon to peptide) + */ + ranges.clear(); + ranges.add(new int[] {0, 5}); // 2 whole codons + ranges.add(new int[] {7, 11}); // part + whole codon + ranges.add(new int[] {15, 19}); // whole + part codon + ranges.add(new int[] {23, 27}); // part + part codon + ranges.add(new int[] {30, 30}); // first base of codon + ranges.add(new int[] {31, 31}); // second base of codon + ranges.add(new int[] {32, 32}); // third base of codon + MapList.mapWords(ranges, 3, 1); + assertEquals(ranges.size(), 7); + assertArrayEquals(new int[] {0, 1}, ranges.get(0)); + assertArrayEquals(new int[] {2, 3}, ranges.get(1)); + assertArrayEquals(new int[] {5, 6}, ranges.get(2)); + assertArrayEquals(new int[] {7, 9}, ranges.get(3)); + assertArrayEquals(new int[] {10, 10}, ranges.get(4)); + assertArrayEquals(new int[] {10, 10}, ranges.get(5)); + assertArrayEquals(new int[] {10, 10}, ranges.get(6)); } }