import java.util.ArrayList;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.List;
/**
*
* Use at your own risk!
*
- * TODO: efficient implementation of private posMap method
- *
* TODO: test/ensure that sense of from and to ratio start position is conserved
* (codon start position recovery)
*/
*/
protected int[][] makeFromMap()
{
- // TODO not used - remove??
+ // TODO only used for test - remove??
return posMap(fromShifts, fromRatio, toShifts, toRatio);
}
*/
protected int[][] makeToMap()
{
- // TODO not used - remove??
+ // TODO only used for test - remove??
return posMap(toShifts, toRatio, fromShifts, fromRatio);
}
private int[][] posMap(List<int[]> shiftTo, int ratio,
List<int[]> shiftFrom, int toRatio)
{
- // TODO not used - remove??
+ // TODO only used for test - remove??
int iv = 0, ivSize = shiftTo.size();
if (iv >= ivSize)
{
List<int[]> shiftFrom, int toRatio)
{
// TODO: javadoc; tests
- int[] fromCount = countPos(shiftTo, pos);
+ int[] fromCount = countPositions(shiftTo, pos);
if (fromCount == null)
{
return null;
int[] toPos = traverseToPosition(shiftFrom, toCount);
if (toPos == null)
{
- return null; // throw new Error("Bad Mapping!");
+ return null;
}
- // System.out.println(fromCount[0]+" "+fromCount[1]+" "+toCount);
return new int[] { toPos[0], fromRemainder, toPos[1] };
}
/**
- * count how many positions pos is along the series of intervals.
+ * Counts how many positions pos is along the series of intervals. Returns an
+ * array of two values:
+ * <ul>
+ * <li>the number of positions traversed (inclusive) to reach {@code pos}</li>
+ * <li>+1 if the last interval traversed is forward, -1 if in a negative
+ * direction</li>
+ * </ul>
+ * Returns null if {@code pos} does not lie in any of the given intervals.
*
- * @param shiftTo
+ * @param intervals
+ * a list of start-end intervals
* @param pos
- * @return number of positions or null if pos is not within intervals
+ * a position that may lie in one (or more) of the intervals
+ * @return
*/
- protected static int[] countPos(List<int[]> shiftTo, int pos)
+ protected static int[] countPositions(List<int[]> intervals, int pos)
{
- int count = 0, intv[], iv = 0, ivSize = shiftTo.size();
+ int count = 0;
+ int iv = 0;
+ int ivSize = intervals.size();
+
while (iv < ivSize)
{
- intv = shiftTo.get(iv++);
+ int[] intv = intervals.get(iv++);
if (intv[0] <= intv[1])
{
+ /*
+ * forwards interval
+ */
if (pos >= intv[0] && pos <= intv[1])
{
return new int[] { count + pos - intv[0] + 1, +1 };
}
else
{
+ /*
+ * reverse interval
+ */
if (pos >= intv[1] && pos <= intv[0])
{
return new int[] { count + intv[0] - pos + 1, -1 };
}
/**
- * find series of intervals mapping from start-end in the From map.
- *
- * @param start
- * position mapped 'to'
- * @param end
- * position mapped 'to'
- * @return series of [start, end] ranges in sequence mapped 'from'
- */
- public int[] locateInFrom(int start, int end)
- {
- // inefficient implementation
- int fromStart[] = shiftTo(start);
- // needs to be inclusive of end of symbol position
- int fromEnd[] = shiftTo(end);
-
- return getIntervals(fromShifts, fromStart, fromEnd, fromRatio);
- }
-
- /**
- * find series of intervals mapping from start-end in the to map.
- *
- * @param start
- * position mapped 'from'
- * @param end
- * position mapped 'from'
- * @return series of [start, end] ranges in sequence mapped 'to'
- */
- public int[] locateInTo(int start, int end)
- {
- int toStart[] = shiftFrom(start);
- int toEnd[] = shiftFrom(end);
- return getIntervals(toShifts, toStart, toEnd, toRatio);
- }
-
- /**
* like shift - except returns the intervals in the given vector of shifts
* which were spanned in traversing fromStart to fromEnd
*
*/
public int getToPosition(int mpos)
{
- // TODO not used - remove??
int[] mp = shiftTo(mpos);
if (mp != null)
{
}
/**
- * get range of positions in To frame for the mpos word in From
- *
- * @param mpos
- * position in From
- * @return null or int[] first position in To for mpos, last position in to
- * for Mpos
- */
- public int[] getToWord(int mpos)
- {
- int[] mp = shiftTo(mpos);
- if (mp != null)
- {
- return new int[] { mp[0], mp[0] + mp[2] * (getFromRatio() - 1) };
- }
- return null;
- }
-
- /**
- * get From position in the associated reference frame for position pos in the
- * associated sequence.
- *
- * @param pos
- * @return
- */
- public int getMappedPosition(int pos)
- {
- // TODO not used - remove??
- int[] mp = shiftFrom(pos);
- if (mp != null)
- {
- return mp[0];
- }
- return pos;
- }
-
- public int[] getMappedWord(int pos)
- {
- // TODO not used - remove??
- int[] mp = shiftFrom(pos);
- if (mp != null)
- {
- return new int[] { mp[0], mp[0] + mp[2] * (getToRatio() - 1) };
- }
- return null;
- }
-
- /**
*
* @return a MapList whose From range is this maplist's To Range, and vice
* versa
}
/**
- * test for containment rather than equivalence to another mapping
- *
- * @param map
- * to be tested for containment
- * @return true if local or mapped range map contains or is contained by this
- * mapping
- */
- public boolean containsEither(boolean local, MapList map)
- {
- // TODO not used - remove?
- if (local)
- {
- return ((getFromLowest() >= map.getFromLowest()
- && getFromHighest() <= map.getFromHighest())
- || (getFromLowest() <= map.getFromLowest()
- && getFromHighest() >= map.getFromHighest()));
- }
- else
- {
- return ((getToLowest() >= map.getToLowest()
- && getToHighest() <= map.getToHighest())
- || (getToLowest() <= map.getToLowest()
- && getToHighest() >= map.getToHighest()));
- }
- }
-
- /**
* String representation - for debugging, not guaranteed not to change
*/
@Override
List<int[]> toRanges = new ArrayList<>();
for (int[] range : getToRanges())
{
+ int fromLength = Math.abs(range[1] - range[0]) + 1;
int[] transferred = map.locateInTo(range[0], range[1]);
if (transferred == null || transferred.length % 2 != 0)
{
* convert [start1, end1, start2, end2, ...]
* to [[start1, end1], [start2, end2], ...]
*/
+ int toLength = 0;
for (int i = 0; i < transferred.length;)
{
toRanges.add(new int[] { transferred[i], transferred[i + 1] });
+ toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1;
i += 2;
}
+
+ /*
+ * check we mapped the full range - if not, abort
+ */
+ if (fromLength * map.getToRatio() != toLength * map.getFromRatio())
+ {
+ return null;
+ }
}
return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio);
{
return fromShifts.size() == 1 && toShifts.size() == 1;
}
+
+ /**
+ * Returns the [start1, end1, start2, end2, ...] positions in the 'from' range
+ * that map to positions between {@code start} and {@code end} in the 'to'
+ * range. Note that for a reverse strand mapping this will return ranges with
+ * end < start. Returns null if no mapped positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInFrom(int start, int end)
+ {
+ return mapPositions(start, end, toShifts, fromShifts,
+ toRatio, fromRatio);
+ }
+
+ /**
+ * Returns the [start1, end1, start2, end2, ...] positions in the 'to' range
+ * that map to positions between {@code start} and {@code end} in the 'from'
+ * range. Note that for a reverse strand mapping this will return ranges with
+ * end < start. Returns null if no mapped positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInTo(int start, int end)
+ {
+ return mapPositions(start, end, fromShifts, toShifts,
+ fromRatio, toRatio);
+ }
+
+ /**
+ * Helper method that returns the [start1, end1, start2, end2, ...] positions
+ * in {@code targetRange} that map to positions between {@code start} and
+ * {@code end} in {@code sourceRange}. Note that for a reverse strand mapping
+ * this will return ranges with end < start. Returns null if no mapped
+ * positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @param sourceRange
+ * @param targetRange
+ * @param sourceWordLength
+ * @param targetWordLength
+ * @return
+ */
+ final static int[] mapPositions(int start, int end,
+ List<int[]> sourceRange, List<int[]> targetRange,
+ int sourceWordLength, int targetWordLength)
+ {
+ if (end < start)
+ {
+ int tmp = end;
+ end = start;
+ start = tmp;
+ }
+
+ /*
+ * traverse sourceRange and mark offsets in targetRange
+ * of any positions that lie in [start, end]
+ */
+ BitSet offsets = getMappedOffsetsForPositions(start, end, sourceRange,
+ sourceWordLength, targetWordLength);
+
+ /*
+ * traverse targetRange and collect positions at the marked offsets
+ */
+ List<int[]> mapped = getPositionsForOffsets(targetRange, offsets);
+
+ // TODO: or just return the List and adjust calling code to match
+ return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped);
+ }
+
+ /**
+ * Scans the list of {@code ranges} for any values (positions) that lie
+ * between start and end (inclusive), and records the <em>offsets</em> from
+ * the start of the list as a BitSet. The offset positions are converted to
+ * corresponding words in blocks of {@code wordLength2}.
+ *
+ * <pre>
+ * For example:
+ * 1:1 (e.g. gene to CDS):
+ * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1
+ * for start = 1, end = 9, returns a BitSet with no bits set
+ * for start = 1, end = 11, returns a BitSet with bits 0-1 set
+ * for start = 15, end = 35, returns a BitSet with bits 5-15 set
+ * 1:3 (peptide to codon):
+ * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3
+ * for start = 9, end = 9, returns a BitSet with bits 24-26 set
+ * 3:1 (codon to peptide):
+ * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set
+ * for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set
+ * 3:1 (circular DNA to peptide):
+ * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set
+ * </pre>
+ *
+ * @param start
+ * @param end
+ * @param ranges
+ * @param wordLengthFrom
+ * @param wordLengthTo
+ * @return
+ */
+ protected final static BitSet getMappedOffsetsForPositions(int start,
+ int end, List<int[]> ranges, int wordLengthFrom, int wordLengthTo)
+ {
+ BitSet overlaps = new BitSet();
+ int offset = 0;
+ final int s1 = ranges.size();
+ for (int i = 0; i < s1; i++)
+ {
+ int[] range = ranges.get(i);
+ final int offset1 = offset;
+ int overlapStartOffset = -1;
+ int overlapEndOffset = -1;
+
+ if (range[1] >= range[0])
+ {
+ /*
+ * forward direction range
+ */
+ if (start <= range[1] && end >= range[0])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[0]);
+ overlapStartOffset = offset1 + overlapStart - range[0];
+ int overlapEnd = Math.min(end, range[1]);
+ overlapEndOffset = offset1 + overlapEnd - range[0];
+ }
+ }
+ else
+ {
+ /*
+ * reverse direction range
+ */
+ if (start <= range[0] && end >= range[1])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[1]);
+ int overlapEnd = Math.min(end, range[0]);
+ overlapStartOffset = offset1 + range[0] - overlapEnd;
+ overlapEndOffset = offset1 + range[0] - overlapStart;
+ }
+ }
+
+ if (overlapStartOffset > -1)
+ {
+ /*
+ * found an overlap
+ */
+ if (wordLengthFrom != wordLengthTo)
+ {
+ /*
+ * convert any overlap found to whole words in the target range
+ * (e.g. treat any partial codon overlap as if the whole codon)
+ */
+ overlapStartOffset -= overlapStartOffset % wordLengthFrom;
+ overlapStartOffset = overlapStartOffset / wordLengthFrom
+ * wordLengthTo;
+
+ /*
+ * similar calculation for range end, adding
+ * (wordLength2 - 1) for end of mapped word
+ */
+ overlapEndOffset -= overlapEndOffset % wordLengthFrom;
+ overlapEndOffset = overlapEndOffset / wordLengthFrom
+ * wordLengthTo;
+ overlapEndOffset += wordLengthTo - 1;
+ }
+ overlaps.set(overlapStartOffset, overlapEndOffset + 1);
+ }
+ offset += 1 + Math.abs(range[1] - range[0]);
+ }
+ return overlaps;
+ }
+
+ /**
+ * Returns a (possibly empty) list of the [start-end] values (positions) at
+ * offsets in the {@code ranges} list that are marked by 'on' bits in the
+ * {@code offsets} bitset.
+ *
+ * @param ranges
+ * @param offsets
+ * @return
+ */
+ protected final static List<int[]> getPositionsForOffsets(
+ List<int[]> ranges, BitSet offsets)
+ {
+ List<int[]> mapped = new ArrayList<>();
+ if (offsets.isEmpty())
+ {
+ return mapped;
+ }
+
+ /*
+ * count of positions preceding ranges[i]
+ */
+ int traversed = 0;
+
+ /*
+ * for each [from-to] range in ranges:
+ * - find subranges (if any) at marked offsets
+ * - add the start-end values at the marked positions
+ */
+ final int toAdd = offsets.cardinality();
+ int added = 0;
+ final int s2 = ranges.size();
+ for (int i = 0; added < toAdd && i < s2; i++)
+ {
+ int[] range = ranges.get(i);
+ added += addOffsetPositions(mapped, traversed, range, offsets);
+ traversed += Math.abs(range[1] - range[0]) + 1;
+ }
+ return mapped;
+ }
+
+ /**
+ * Helper method that adds any start-end subranges of {@code range} that are
+ * at offsets in {@code range} marked by set bits in overlaps.
+ * {@code mapOffset} is added to {@code range} offset positions. Returns the
+ * count of positions added.
+ *
+ * @param mapped
+ * @param mapOffset
+ * @param range
+ * @param overlaps
+ * @return
+ */
+ final static int addOffsetPositions(List<int[]> mapped,
+ final int mapOffset, final int[] range, final BitSet overlaps)
+ {
+ final int rangeLength = 1 + Math.abs(range[1] - range[0]);
+ final int step = range[1] < range[0] ? -1 : 1;
+ int offsetStart = 0; // offset into range
+ int added = 0;
+
+ while (offsetStart < rangeLength)
+ {
+ /*
+ * find the start of the next marked overlap offset;
+ * if there is none, or it is beyond range, then finished
+ */
+ int overlapStart = overlaps.nextSetBit(mapOffset + offsetStart);
+ if (overlapStart == -1 || overlapStart - mapOffset >= rangeLength)
+ {
+ /*
+ * no more overlaps, or no more within range[]
+ */
+ return added;
+ }
+ overlapStart -= mapOffset;
+
+ /*
+ * end of the overlap range is just before the next clear bit;
+ * restrict it to end of range if necessary;
+ * note we may add a reverse strand range here (end < start)
+ */
+ int overlapEnd = overlaps.nextClearBit(mapOffset + overlapStart + 1);
+ overlapEnd = (overlapEnd == -1) ? rangeLength - 1
+ : Math.min(rangeLength - 1, overlapEnd - mapOffset - 1);
+ int startPosition = range[0] + step * overlapStart;
+ int endPosition = range[0] + step * overlapEnd;
+ mapped.add(new int[] { startPosition, endPosition });
+ offsetStart = overlapEnd + 1;
+ added += Math.abs(endPosition - startPosition) + 1;
+ }
+
+ return added;
+ }
}