List<int[]> shiftFrom, int toRatio)
{
// TODO: javadoc; tests
- int[] fromCount = countPos(shiftTo, pos);
+ int[] fromCount = countPositions(shiftTo, pos);
if (fromCount == null)
{
return null;
int[] toPos = traverseToPosition(shiftFrom, toCount);
if (toPos == null)
{
- return null; // throw new Error("Bad Mapping!");
+ return null;
}
- // System.out.println(fromCount[0]+" "+fromCount[1]+" "+toCount);
return new int[] { toPos[0], fromRemainder, toPos[1] };
}
/**
- * count how many positions pos is along the series of intervals.
+ * Counts how many positions pos is along the series of intervals. Returns an
+ * array of two values:
+ * <ul>
+ * <li>the number of positions traversed (inclusive) to reach {@code pos}</li>
+ * <li>+1 if the last interval traversed is forward, -1 if in a negative
+ * direction</li>
+ * </ul>
+ * Returns null if {@code pos} does not lie in any of the given intervals.
*
- * @param shiftTo
+ * @param intervals
+ * a list of start-end intervals
* @param pos
- * @return number of positions or null if pos is not within intervals
+ * a position that may lie in one (or more) of the intervals
+ * @return
*/
- protected static int[] countPos(List<int[]> shiftTo, int pos)
+ protected static int[] countPositions(List<int[]> intervals, int pos)
{
- int count = 0, intv[], iv = 0, ivSize = shiftTo.size();
+ int count = 0;
+ int iv = 0;
+ int ivSize = intervals.size();
+
while (iv < ivSize)
{
- intv = shiftTo.get(iv++);
+ int[] intv = intervals.get(iv++);
if (intv[0] <= intv[1])
{
+ /*
+ * forwards interval
+ */
if (pos >= intv[0] && pos <= intv[1])
{
return new int[] { count + pos - intv[0] + 1, +1 };
}
else
{
+ /*
+ * reverse interval
+ */
if (pos >= intv[1] && pos <= intv[0])
{
return new int[] { count + intv[0] - pos + 1, -1 };
int fromStart[] = shiftTo(start);
// needs to be inclusive of end of symbol position
int fromEnd[] = shiftTo(end);
-
+ System.out.println("locateInFrom");
return getIntervals(fromShifts, fromStart, fromEnd, fromRatio);
}
{
return fromShifts.size() == 1 && toShifts.size() == 1;
}
+
+ /**
+ * Returns the [start, end, start, end, ...] ranges in the 'from' range that
+ * map to the given start-end in the 'to' range. Returns null if either
+ * {@code start} or {@code end} is not a mapped 'to' range position.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInFrom2(int start, int end)
+ {
+ List<int[]> ranges = mapBetween(start, end, toShifts, fromShifts,
+ toRatio, fromRatio);
+
+ // TODO: or just return the List and adjust calling code to match
+ return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges);
+ }
+
+ /**
+ * Returns the [start, end, start, end, ...] ranges in the 'to' range that map
+ * to the given start-end in the 'from' range. Returns null if either
+ * {@code start} or {@code end} is not a mapped 'from' range position.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInTo2(int start, int end)
+ {
+ List<int[]> ranges = mapBetween(start, end, fromShifts, toShifts,
+ fromRatio, toRatio);
+
+ return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges);
+ }
+
+ /**
+ * A helper method for navigating the mapping. Returns a (possibly empty) list
+ * of [start-end] positions in {@code ranges2} that map to positions in
+ * {@code ranges1} between {@code start} and {@code end}.
+ *
+ * @param start
+ * @param end
+ * @param ranges1
+ * @param ranges2
+ * @param wordLength1
+ * @param wordLength2
+ * @return
+ */
+ final static List<int[]> mapBetween(int start, int end,
+ List<int[]> ranges1, List<int[]> ranges2, int wordLength1,
+ int wordLength2)
+ {
+ /*
+ * first traverse ranges1 and record count of mapped positions
+ * to any that overlap start-end
+ */
+ List<int[]> overlaps = findOverlapPositions(ranges1, start, end);
+ if (overlaps.isEmpty())
+ {
+ return overlaps;
+ }
+
+ /*
+ * convert positions to equivalent 'word' positions in ranges
+ */
+ mapWords(overlaps, wordLength1, wordLength2);
+
+ /*
+ * walk ranges2 and record the values found at
+ * the offsets in 'overlaps'
+ */
+ List<int[]> mapped = new ArrayList<>();
+ final int s1 = overlaps.size();
+ final int s2 = ranges2.size();
+ int rangeIndex = 0;
+ int rangeOffset = 0;
+ int mappedCount = 0;
+
+ for (int i = 0 ; i < s1 ; i++)
+ {
+ /*
+ * for each range in overlaps, walk ranges2 and record the values
+ * at the offsets, advancing rangeIndex / Offset
+ */
+ int [] mappedRange = ranges2.get(rangeIndex);
+ int [] overlap = overlaps.get(s1);
+ while (mappedCount < overlap[1])
+ {
+
+ }
+ }
+
+ return mapped;
+ }
+
+ /**
+ * Converts the start-end positions (counted from zero) in the {@code ranges}
+ * list from one word length to another. Start-end positions are expanded if
+ * necessary to cover a whole word of length {@code wordLength1}. Positions
+ * are then divided by {@code wordLength1} and multiplied by
+ * {@code wordLength2} to give equivalent mapped words.
+ * <p>
+ * Put simply, this converts peptide residue positions to the corresponding
+ * codon ranges, and codons - including partial codons - to the corresponding
+ * peptide positions; for example
+ *
+ * <pre>
+ * [1, 10] with word lengths 3:1 converts (as if bases [0-11]) to [1, 4]
+ * </pre>
+ *
+ * @param ranges
+ * @param wordLength1
+ * @param wordLength2
+ * @return
+ */
+ final static void mapWords(List<int[]> ranges, int wordLength1,
+ int wordLength2)
+ {
+ if (wordLength1 == 1 && wordLength2 == 1)
+ {
+ return; // nothing to do here
+ }
+ int s = ranges.size();
+ for (int i = 0; i < s; i++)
+ {
+ int[] range = ranges.get(i);
+
+ /*
+ * expand range start to the start of a word,
+ * and convert to wordLength2
+ */
+ range[0] -= range[0] % wordLength1;
+ range[0] = range[0] / wordLength1 * wordLength2;
+
+ /*
+ * similar calculation for range end, adding
+ * (wordLength2 - 1) for end of mapped word
+ */
+ range[1] -= range[1] % wordLength1;
+ range[1] = range[1] / wordLength1 * wordLength2;
+ range[1] += wordLength2 - 1;
+ }
+ }
+
+ /**
+ * Helper method that returns a (possibly empty) list of offsets in
+ * {@code ranges} to subranges that overlap {@code start-end}. The list
+ * returned holds counts of the number of positions traversed (inclusive) to
+ * reach the overlapping positions, not the overlapping values. Returns null
+ * if there are no overlaps.
+ *
+ * @param ranges
+ * @param start
+ * @param end
+ * @return
+ */
+ final static List<int[]> findOverlapPositions(List<int[]> ranges,
+ int start, int end)
+ {
+ List<int[]> positions = new ArrayList<>();
+ int pos = 0;
+ int s = ranges.size();
+ for (int i = 0; i < s; i++)
+ {
+ int[] range = ranges.get(i);
+ addOverlap(positions, pos, range, start, end);
+ pos += 1 + Math.abs(range[1] - range[0]);
+ }
+ return positions;
+ }
+
+ /**
+ * A helper method that checks whether {@code range} overlaps
+ * {@code start-end}, and if so adds the positional offset of the overlap to
+ * {@code positions}.
+ *
+ * @param positions
+ * a list of map offsets to add to
+ * @param pos
+ * the number of mapped positions already visited
+ * @param range
+ * a from-to range (may be forward or reverse)
+ * @param start
+ * position to test for overlap in range
+ * @param end
+ * position to test for overlap in range
+ * @return
+ */
+ final static void addOverlap(List<int[]> positions, int pos, int[] range,
+ int start, int end)
+ {
+ if (range[1] >= range[0])
+ {
+ /*
+ * forward direction range
+ */
+ if (start <= range[1] && end >= range[0])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[0]);
+ int overlapEnd = Math.min(end, range[1]);
+ positions
+ .add(new int[]
+ { 1 + overlapStart - range[0], 1 + overlapEnd - range[0] });
+ }
+ }
+ else
+ {
+ /*
+ * reverse direction range
+ */
+ if (start <= range[0] && end >= range[1])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[1]);
+ int overlapEnd = Math.min(end, range[0]);
+ positions
+ .add(new int[]
+ { 1 + range[0] - overlapStart, 1 + range[0] - overlapEnd });
+ }
+ }
+ }
}
assertEquals("[4, 12]", Arrays.toString(ml.locateInFrom(2, 4)));
assertEquals("[7, 12]", Arrays.toString(ml.locateInFrom(3, 4)));
assertEquals("[10, 12]", Arrays.toString(ml.locateInFrom(4, 4)));
+ // reversing the range reverses the result:
+ assertEquals("[12, 7]", Arrays.toString(ml.locateInFrom(4, 3))); // fails with [10, 9] !
assertNull(ml.locateInFrom(0, 0));
assertNull(ml.locateInFrom(1, 5));
assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4)));
}
- /**
+ @Test(groups = { "Functional" })
+ public void testLocateInFrom_reverseStrand()
+ {
+ int[] codons = new int[] { 12, 1 };
+ int[] protein = new int[] { 1, 4 };
+ MapList ml = new MapList(codons, protein, 3, 1);
+ assertEquals("[12, 10]", Arrays.toString(ml.locateInFrom(1, 1)));
+ assertEquals("[9, 4]", Arrays.toString(ml.locateInFrom(2, 3)));
+ }
+
+ /**
* Tests for method that locates ranges in the 'to' map for given range in the
* 'from' map.
*/
assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(1, 12)));
assertEquals("[2, 2]", Arrays.toString(ml.locateInTo(4, 6)));
assertEquals("[2, 4]", Arrays.toString(ml.locateInTo(4, 12)));
+ // reversing the 'from' range reverses the result
+ assertEquals("[4, 2]", Arrays.toString(ml.locateInTo(12, 4)));
/*
* A part codon is treated as if a whole one.
* Can't map from an unmapped position
*/
assertNull(ml.locateInTo(1, 2));
+ assertNull(ml.locateInTo(1, 4));
assertNull(ml.locateInTo(2, 4));
assertNull(ml.locateInTo(4, 4));
/*
- * Valid range or subrange of codon1 maps to protein1.
+ * Valid range or subrange of codon1 maps to protein1
*/
assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 2)));
assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(3, 3)));
// codon positions 7 to 17 (part) cover proteins 2/3/4 at positions 3/4/6
assertEquals("[3, 4, 6, 6]", Arrays.toString(ml.locateInTo(7, 17)));
-
}
/**
@Test(groups = { "Functional" })
public void testLocateInFrom_withOverlap()
{
+ /*
+ * gene to protein...
+ */
int[] codons = new int[] { 1, 12, 12, 17 };
int[] protein = new int[] { 1, 6 };
MapList ml = new MapList(codons, protein, 3, 1);
assertNull(ml.locateInFrom(0, 0));
assertNull(ml.locateInFrom(1, 7));
assertNull(ml.locateInFrom(-1, 1));
+
+ /*
+ * gene to CDS...from EMBL:MN908947
+ */
+ int [] gene = new int[] { 266, 13468, 13468, 21555 };
+ int [] cds = new int[] { 1, 21291 };
+ ml = new MapList(gene, cds, 1, 1);
+ assertEquals("[13468, 13468]", Arrays.toString(ml.locateInFrom(13203, 13203)));
+ assertEquals("[13468, 13468]", Arrays.toString(ml.locateInFrom(13204, 13204)));
+ assertEquals("[13468, 13468]", Arrays.toString(ml.locateInFrom(13203, 13204)));
}
/**
@Test(groups = { "Functional" })
public void testLocateInTo_withOverlap()
{
+ /*
+ * gene to protein...
+ */
int[] codons = new int[] { 1, 12, 12, 17 };
int[] protein = new int[] { 1, 6 };
MapList ml = new MapList(codons, protein, 3, 1);
assertNull(ml.locateInTo(0, 0));
assertNull(ml.locateInTo(1, 18));
assertNull(ml.locateInTo(-1, 1));
+
+ /*
+ * gene to CDS...from EMBL:MN908947
+ */
+ int [] gene = new int[] { 266, 13468, 13468, 21555 };
+ int [] cds = new int[] { 1, 21291 };
+ ml = new MapList(gene, cds, 1, 1);
+ assertEquals("[13203, 13204]", Arrays.toString(ml.locateInTo(13468, 13468)));
}
@Test(groups = { "Functional" })
}
@Test(groups = { "Functional" })
- public void testCountPos()
+ public void testCountPositions()
{
try
{
- MapList.countPos(null, 1);
+ MapList.countPositions(null, 1);
fail("expected exception");
} catch (NullPointerException e)
{
}
List<int[]> intervals = new ArrayList<>();
- assertNull(MapList.countPos(intervals, 1));
+ assertNull(MapList.countPositions(intervals, 1));
/*
* forward strand
*/
intervals.add(new int[] {10, 20});
- assertNull(MapList.countPos(intervals, 9));
- assertNull(MapList.countPos(intervals, 21));
- assertArrayEquals(new int[] {1, 1}, MapList.countPos(intervals, 10));
- assertArrayEquals(new int[] {6, 1}, MapList.countPos(intervals, 15));
- assertArrayEquals(new int[] {11, 1}, MapList.countPos(intervals, 20));
+ assertNull(MapList.countPositions(intervals, 9));
+ assertNull(MapList.countPositions(intervals, 21));
+ assertArrayEquals(new int[] {1, 1}, MapList.countPositions(intervals, 10));
+ assertArrayEquals(new int[] {6, 1}, MapList.countPositions(intervals, 15));
+ assertArrayEquals(new int[] {11, 1}, MapList.countPositions(intervals, 20));
intervals.add(new int[] {25, 25});
- assertArrayEquals(new int[] {12, 1}, MapList.countPos(intervals, 25));
+ assertArrayEquals(new int[] {12, 1}, MapList.countPositions(intervals, 25));
// next interval repeats position 25 - which should be counted twice if traversed
intervals.add(new int[] {25, 26});
- assertArrayEquals(new int[] {12, 1}, MapList.countPos(intervals, 25));
- assertArrayEquals(new int[] {14, 1}, MapList.countPos(intervals, 26));
+ assertArrayEquals(new int[] {12, 1}, MapList.countPositions(intervals, 25));
+ assertArrayEquals(new int[] {14, 1}, MapList.countPositions(intervals, 26));
/*
* reverse strand
*/
intervals.clear();
intervals.add(new int[] {5, -5});
- assertNull(MapList.countPos(intervals, 6));
- assertNull(MapList.countPos(intervals, -6));
- assertArrayEquals(new int[] {1, -1}, MapList.countPos(intervals, 5));
- assertArrayEquals(new int[] {7, -1}, MapList.countPos(intervals, -1));
- assertArrayEquals(new int[] {11, -1}, MapList.countPos(intervals, -5));
+ assertNull(MapList.countPositions(intervals, 6));
+ assertNull(MapList.countPositions(intervals, -6));
+ assertArrayEquals(new int[] {1, -1}, MapList.countPositions(intervals, 5));
+ assertArrayEquals(new int[] {7, -1}, MapList.countPositions(intervals, -1));
+ assertArrayEquals(new int[] {11, -1}, MapList.countPositions(intervals, -5));
/*
* reverse then forward
*/
intervals.add(new int[] {5, 10});
- assertArrayEquals(new int[] {13, 1}, MapList.countPos(intervals, 6));
+ assertArrayEquals(new int[] {13, 1}, MapList.countPositions(intervals, 6));
/*
* reverse then forward then reverse
*/
intervals.add(new int[] {-10, -20});
- assertArrayEquals(new int[] {20, -1}, MapList.countPos(intervals, -12));
+ assertArrayEquals(new int[] {20, -1}, MapList.countPositions(intervals, -12));
/*
* an interval [x, x] is treated as forward
*/
intervals.add(new int[] {30, 30});
- assertArrayEquals(new int[] {29, 1}, MapList.countPos(intervals, 30));
+ assertArrayEquals(new int[] {29, 1}, MapList.countPositions(intervals, 30));
+
+ /*
+ * it is the first matched occurrence that is returned
+ */
+ intervals.clear();
+ intervals.add(new int[] {1, 2});
+ intervals.add(new int[] {2, 3});
+ assertArrayEquals(new int[] {2, 1}, MapList.countPositions(intervals, 2));
+ intervals.add(new int[] {-1, -2});
+ intervals.add(new int[] {-2, -3});
+ assertArrayEquals(new int[] {6, -1}, MapList.countPositions(intervals, -2));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFindOverlapPositions()
+ {
+ List<int[]> ranges = new ArrayList<>();
+ List<int[]> overlaps = MapList.findOverlapPositions(ranges, 20, 30);
+ assertTrue(overlaps.isEmpty());
+ ranges.add(new int[] {15, 25});
+ overlaps = MapList.findOverlapPositions(ranges, 20, 30);
+ assertEquals(1, overlaps.size());
+ assertArrayEquals(new int[] {6, 11}, overlaps.get(0));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testMapWords()
+ {
+ List<int[]> ranges = new ArrayList<>();
+
+ /*
+ * 1:1 (trivial) case
+ */
+ ranges.add(new int[] {2, 4});
+ ranges.add(new int[] {6, 9});
+ MapList.mapWords(ranges, 1, 1);
+ assertEquals(ranges.size(), 2);
+ assertArrayEquals(new int[] {2, 4}, ranges.get(0));
+ assertArrayEquals(new int[] {6, 9}, ranges.get(1));
+
+ /*
+ * 1:3 case (peptide to codon ranges)
+ */
+ MapList.mapWords(ranges, 1, 3);
+ assertEquals(ranges.size(), 2);
+ assertArrayEquals(new int[] {6, 14}, ranges.get(0));
+ assertArrayEquals(new int[] {18, 29}, ranges.get(1));
+
+ /*
+ * 3:1 case (codon or part codon to peptide)
+ */
+ ranges.clear();
+ ranges.add(new int[] {0, 5}); // 2 whole codons
+ ranges.add(new int[] {7, 11}); // part + whole codon
+ ranges.add(new int[] {15, 19}); // whole + part codon
+ ranges.add(new int[] {23, 27}); // part + part codon
+ ranges.add(new int[] {30, 30}); // first base of codon
+ ranges.add(new int[] {31, 31}); // second base of codon
+ ranges.add(new int[] {32, 32}); // third base of codon
+ MapList.mapWords(ranges, 3, 1);
+ assertEquals(ranges.size(), 7);
+ assertArrayEquals(new int[] {0, 1}, ranges.get(0));
+ assertArrayEquals(new int[] {2, 3}, ranges.get(1));
+ assertArrayEquals(new int[] {5, 6}, ranges.get(2));
+ assertArrayEquals(new int[] {7, 9}, ranges.get(3));
+ assertArrayEquals(new int[] {10, 10}, ranges.get(4));
+ assertArrayEquals(new int[] {10, 10}, ranges.get(5));
+ assertArrayEquals(new int[] {10, 10}, ranges.get(6));
}
}