import java.util.ArrayList;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.List;
/**
*
* Use at your own risk!
*
- * TODO: efficient implementation of private posMap method
- *
* TODO: test/ensure that sense of from and to ratio start position is conserved
* (codon start position recovery)
*/
*/
protected int[][] makeFromMap()
{
- // TODO not used - remove??
+ // TODO only used for test - remove??
return posMap(fromShifts, fromRatio, toShifts, toRatio);
}
*/
protected int[][] makeToMap()
{
- // TODO not used - remove??
+ // TODO only used for test - remove??
return posMap(toShifts, toRatio, fromShifts, fromRatio);
}
private int[][] posMap(List<int[]> shiftTo, int ratio,
List<int[]> shiftFrom, int toRatio)
{
- // TODO not used - remove??
+ // TODO only used for test - remove??
int iv = 0, ivSize = shiftTo.size();
if (iv >= ivSize)
{
}
/**
- * find series of intervals mapping from start-end in the From map.
- *
- * @param start
- * position mapped 'to'
- * @param end
- * position mapped 'to'
- * @return series of [start, end] ranges in sequence mapped 'from'
- */
- public int[] locateInFrom(int start, int end)
- {
- return locateInFrom2(start, end);
-
- // inefficient implementation
- // int fromStart[] = shiftTo(start);
- // needs to be inclusive of end of symbol position
- // int fromEnd[] = shiftTo(end);
- // return getIntervals(fromShifts, fromStart, fromEnd, fromRatio);
- }
-
- /**
- * find series of intervals mapping from start-end in the to map.
- *
- * @param start
- * position mapped 'from'
- * @param end
- * position mapped 'from'
- * @return series of [start, end] ranges in sequence mapped 'to'
- */
- public int[] locateInTo(int start, int end)
- {
- return locateInTo2(start, end);
-
- // int toStart[] = shiftFrom(start);
- // int toEnd[] = shiftFrom(end);
- // return getIntervals(toShifts, toStart, toEnd, toRatio);
- }
-
- /**
* like shift - except returns the intervals in the given vector of shifts
* which were spanned in traversing fromStart to fromEnd
*
*/
public int getToPosition(int mpos)
{
- // TODO not used - remove??
int[] mp = shiftTo(mpos);
if (mp != null)
{
}
/**
- * get range of positions in To frame for the mpos word in From
- *
- * @param mpos
- * position in From
- * @return null or int[] first position in To for mpos, last position in to
- * for Mpos
- */
- public int[] getToWord(int mpos)
- {
- int[] mp = shiftTo(mpos);
- if (mp != null)
- {
- return new int[] { mp[0], mp[0] + mp[2] * (getFromRatio() - 1) };
- }
- return null;
- }
-
- /**
- * get From position in the associated reference frame for position pos in the
- * associated sequence.
- *
- * @param pos
- * @return
- */
- public int getMappedPosition(int pos)
- {
- // TODO not used - remove??
- int[] mp = shiftFrom(pos);
- if (mp != null)
- {
- return mp[0];
- }
- return pos;
- }
-
- public int[] getMappedWord(int pos)
- {
- // TODO not used - remove??
- int[] mp = shiftFrom(pos);
- if (mp != null)
- {
- return new int[] { mp[0], mp[0] + mp[2] * (getToRatio() - 1) };
- }
- return null;
- }
-
- /**
*
* @return a MapList whose From range is this maplist's To Range, and vice
* versa
}
/**
- * test for containment rather than equivalence to another mapping
- *
- * @param map
- * to be tested for containment
- * @return true if local or mapped range map contains or is contained by this
- * mapping
- */
- public boolean containsEither(boolean local, MapList map)
- {
- // TODO not used - remove?
- if (local)
- {
- return ((getFromLowest() >= map.getFromLowest()
- && getFromHighest() <= map.getFromHighest())
- || (getFromLowest() <= map.getFromLowest()
- && getFromHighest() >= map.getFromHighest()));
- }
- else
- {
- return ((getToLowest() >= map.getToLowest()
- && getToHighest() <= map.getToHighest())
- || (getToLowest() <= map.getToLowest()
- && getToHighest() >= map.getToHighest()));
- }
- }
-
- /**
* String representation - for debugging, not guaranteed not to change
*/
@Override
toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1;
i += 2;
}
-
+
/*
* check we mapped the full range - if not, abort
*/
}
/**
- * Returns the [start, end, start, end, ...] ranges in the 'from' range that
- * map to positions between {@code start} and {@code end} in the 'to' range.
- * Returns null if no mapped positions are found in start-end.
+ * Returns the [start1, end1, start2, end2, ...] positions in the 'from' range
+ * that map to positions between {@code start} and {@code end} in the 'to'
+ * range. Note that for a reverse strand mapping this will return ranges with
+ * end < start. Returns null if no mapped positions are found in start-end.
*
* @param start
* @param end
* @return
*/
- public int[] locateInFrom2(int start, int end)
+ public int[] locateInFrom(int start, int end)
{
- List<int[]> ranges = mapBetween(start, end, toShifts, fromShifts,
- toRatio, fromRatio);
+ if (end < start)
+ {
+ int tmp = end;
+ end = start;
+ start = tmp;
+ }
- // TODO: or just return the List and adjust calling code to match
- return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges);
- }
+ /*
+ * traverse toShifts and mark offsets in fromShifts
+ * of any positions that lie in [start, end]
+ */
+ BitSet offsets = getMappedOffsetsForPositions(start, end, toShifts,
+ toRatio, fromRatio);
- /**
- * Returns the [start, end, start, end, ...] ranges in the 'to' range that map
- * to the given start-end in the 'from' range. Returns null if either
- * {@code start} or {@code end} is not a mapped 'from' range position.
- *
- * @param start
- * @param end
- * @return
- */
- public int[] locateInTo2(int start, int end)
- {
- List<int[]> ranges = mapBetween(start, end, fromShifts, toShifts,
- fromRatio, toRatio);
+ /*
+ * traverse fromShifts and collect positions at the marked offsets
+ */
+ List<int[]> mapped = getPositionsForOffsets(fromShifts, offsets);
- return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges);
+ // TODO: or just return the List and adjust calling code to match
+ return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped);
}
/**
- * A helper method for navigating the mapping. Returns a (possibly empty) list
- * of [start-end] positions in {@code ranges2} that map to positions in
- * {@code ranges1} between {@code start} and {@code end}.
+ * Returns the [start1, end1, start2, end2, ...] positions in the 'to' range
+ * that map to positions between {@code start} and {@code end} in the 'from'
+ * range. Note that for a reverse strand mapping this will return ranges with
+ * end < start. Returns null if no mapped positions are found in start-end.
*
* @param start
* @param end
- * @param ranges1
- * @param ranges2
- * @param wordLength1
- * @param wordLength2
* @return
*/
- final static List<int[]> mapBetween(int start, int end,
- List<int[]> ranges1, List<int[]> ranges2, int wordLength1,
- int wordLength2)
+ public int[] locateInTo(int start, int end)
{
if (end < start)
{
}
/*
- * first traverse ranges1 and record count of mapped positions
- * to any that overlap start-end
+ * traverse fromShifts and mark offsets in toShifts
+ * of any positions that lie in [start, end]
*/
- List<int[]> overlaps = findOverlapPositions(ranges1, start, end);
- if (overlaps.isEmpty())
- {
- return overlaps;
- }
-
- /*
- * convert positions to equivalent 'word' positions in ranges2
- */
- mapWords(overlaps, wordLength1, wordLength2);
+ BitSet offsets = getMappedOffsetsForPositions(start, end, fromShifts,
+ fromRatio, toRatio);
/*
- * walk ranges2 and record the values found at
- * the offsets in 'overlaps'
- */
- List<int[]> mapped = new ArrayList<>();
- final int s1 = overlaps.size();
- final int s2 = ranges2.size();
- int ranges2Index = 0;
-
- /*
- * count of mapped positions preceding ranges2[ranges2Index]
+ * traverse toShifts and collect positions at the marked offsets
*/
- int traversed = 0;
+ List<int[]> mapped = getPositionsForOffsets(toShifts, offsets);
- /*
- * for each [from-to] range in overlaps:
- * - walk (what remains of) ranges2
- * - record the values at offsets [from-to]
- * - stop when past 'to' positions (or at end of ranges2)
- */
- for (int i = 0; i < s1; i++)
- {
- int[] overlap = overlaps.get(i);
- final int toAdd = overlap[1] - overlap[0] + 1;
- int added = 0; // how much of overlap has been 'found'
- for (; added < toAdd && ranges2Index < s2; ranges2Index++)
- {
- int[] range2 = ranges2.get(ranges2Index);
- int rangeStart = range2[0];
- int rangeEnd = range2[1];
- boolean reverseStrand = range2[1] < range2[0];
- int rangeLength = Math.abs(rangeEnd - rangeStart) + 1;
- if (traversed + rangeLength <= overlap[0])
- {
- /*
- * precedes overlap - keep looking
- */
- traversed += rangeLength;
- continue;
- }
- int overlapStart = overlap[0] - traversed;
- int overlapEnd = Math.min(overlapStart + toAdd - added - 1,
- rangeLength - 1);
- int mappedFrom = range2[0] + (reverseStrand ? - overlapStart : overlapStart);
- int mappedTo = range2[0] + (reverseStrand ? - overlapEnd : overlapEnd);
- mapped.add(new int[] { mappedFrom, mappedTo });
- int found = overlapEnd - overlapStart + 1;
- added += found;
- overlap[0] += found;
- traversed += rangeLength;
- }
- }
-
- return mapped;
+ return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped);
}
/**
- * Converts the start-end positions (counted from zero) in the {@code ranges}
- * list from one word length to another. Start-end positions are expanded if
- * necessary to cover a whole word of length {@code wordLength1}. Positions
- * are then divided by {@code wordLength1} and multiplied by
- * {@code wordLength2} to give equivalent mapped words.
- * <p>
- * Put simply, this converts peptide residue positions to the corresponding
- * codon ranges, and codons - including partial codons - to the corresponding
- * peptide positions; for example
+ * Scans the list of {@code ranges} for any values (positions) that lie
+ * between start and end (inclusive), and records the <em>offsets</em> from
+ * the start of the list as a BitSet. The offset positions are converted to
+ * corresponding words in blocks of {@code wordLength2}.
*
* <pre>
- * [1, 10] with word lengths 3:1 converts (as if bases [0-11]) to [1, 4]
+ * For example:
+ * 1:1 (e.g. gene to CDS):
+ * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1
+ * for start = 1, end = 9, returns a BitSet with no bits set
+ * for start = 1, end = 11, returns a BitSet with bits 0-1 set
+ * for start = 15, end = 35, returns a BitSet with bits 5-15 set
+ * 1:3 (peptide to codon):
+ * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3
+ * for start = 9, end = 9, returns a BitSet with bits 24-26 set
+ * 3:1 (codon to peptide):
+ * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set
+ * for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set
+ * 3:1 (circular DNA to peptide):
+ * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set
* </pre>
*
+ * @param start
+ * @param end
* @param ranges
- * @param wordLength1
- * @param wordLength2
+ * @param wordLengthFrom
+ * @param wordLengthTo
* @return
*/
- final static void mapWords(List<int[]> ranges, int wordLength1,
- int wordLength2)
+ protected final static BitSet getMappedOffsetsForPositions(int start,
+ int end, List<int[]> ranges, int wordLengthFrom, int wordLengthTo)
{
- if (wordLength1 == 1 && wordLength2 == 1)
- {
- return; // nothing to do here
- }
- int s = ranges.size();
- for (int i = 0; i < s; i++)
+ BitSet overlaps = new BitSet();
+ int offset = 0;
+ final int s1 = ranges.size();
+ for (int i = 0; i < s1; i++)
{
int[] range = ranges.get(i);
+ final int offset1 = offset;
+ int overlapStartOffset = -1;
+ int overlapEndOffset = -1;
- /*
- * expand range start to the start of a word,
- * and convert to wordLength2
- */
- range[0] -= range[0] % wordLength1;
- range[0] = range[0] / wordLength1 * wordLength2;
+ if (range[1] >= range[0])
+ {
+ /*
+ * forward direction range
+ */
+ if (start <= range[1] && end >= range[0])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[0]);
+ overlapStartOffset = offset1 + overlapStart - range[0];
+ int overlapEnd = Math.min(end, range[1]);
+ overlapEndOffset = offset1 + overlapEnd - range[0];
+ }
+ }
+ else
+ {
+ /*
+ * reverse direction range
+ */
+ if (start <= range[0] && end >= range[1])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[1]);
+ int overlapEnd = Math.min(end, range[0]);
+ overlapStartOffset = offset1 + range[0] - overlapEnd;
+ overlapEndOffset = offset1 + range[0] - overlapStart;
+ }
+ }
- /*
- * similar calculation for range end, adding
- * (wordLength2 - 1) for end of mapped word
- */
- range[1] -= range[1] % wordLength1;
- range[1] = range[1] / wordLength1 * wordLength2;
- range[1] += wordLength2 - 1;
+ if (overlapStartOffset > -1)
+ {
+ /*
+ * found an overlap
+ */
+ if (wordLengthFrom != wordLengthTo)
+ {
+ /*
+ * convert any overlap found to whole words in the target range
+ * (e.g. treat any partial codon overlap as if the whole codon)
+ */
+ overlapStartOffset -= overlapStartOffset % wordLengthFrom;
+ overlapStartOffset = overlapStartOffset / wordLengthFrom
+ * wordLengthTo;
+
+ /*
+ * similar calculation for range end, adding
+ * (wordLength2 - 1) for end of mapped word
+ */
+ overlapEndOffset -= overlapEndOffset % wordLengthFrom;
+ overlapEndOffset = overlapEndOffset / wordLengthFrom
+ * wordLengthTo;
+ overlapEndOffset += wordLengthTo - 1;
+ }
+ overlaps.set(overlapStartOffset, overlapEndOffset + 1);
+ }
+ offset += 1 + Math.abs(range[1] - range[0]);
}
+ return overlaps;
}
/**
- * Helper method that returns a (possibly empty) list of offsets in
- * {@code ranges} to subranges that overlap {@code start-end} (where start <=
- * end}. The list returned holds counts of the number of positions traversed
- * (exclusive) to reach the overlapping positions, not the overlapping values.
- * Returns null if there are no overlaps.
+ * Returns a (possibly empty) list of the [start-end] values (positions) at
+ * offsets in the {@code ranges} list that are marked by 'on' bits in the
+ * {@code offsets} bitset.
*
* @param ranges
- * @param start
- * @param end
+ * @param offsets
* @return
*/
- final static List<int[]> findOverlapPositions(List<int[]> ranges,
- int start, int end)
+ protected final static List<int[]> getPositionsForOffsets(
+ List<int[]> ranges, BitSet offsets)
{
- List<int[]> positions = new ArrayList<>();
- int pos = 0;
- int s = ranges.size();
- for (int i = 0; i < s; i++)
+ List<int[]> mapped = new ArrayList<>();
+ if (offsets.isEmpty())
+ {
+ return mapped;
+ }
+
+ /*
+ * count of positions preceding ranges[i]
+ */
+ int traversed = 0;
+
+ /*
+ * for each [from-to] range in ranges:
+ * - find subranges (if any) at marked offsets
+ * - add the start-end values at the marked positions
+ */
+ final int toAdd = offsets.cardinality();
+ int added = 0;
+ final int s2 = ranges.size();
+ for (int i = 0; added < toAdd && i < s2; i++)
{
int[] range = ranges.get(i);
- addOverlap(positions, pos, range, start, end);
- pos += 1 + Math.abs(range[1] - range[0]);
+ added += addOffsetPositions(mapped, traversed, range, offsets);
+ traversed += Math.abs(range[1] - range[0]) + 1;
}
- return positions;
+ return mapped;
}
/**
- * A helper method that checks whether {@code range} overlaps
- * {@code start-end}, and if so adds the offset of the overlap in
- * {@code range}, plus {@code pos}, to {@code positions}.
+ * Helper method that adds any start-end subranges of {@code range} that are
+ * at offsets in {@code range} marked by set bits in overlaps.
+ * {@code mapOffset} is added to {@code range} offset positions. Returns the
+ * count of positions added.
*
- * @param positions
- * a list of map offsets to add to
- * @param pos
- * the number of mapped positions already visited
+ * @param mapped
+ * @param mapOffset
* @param range
- * a from-to range (may be forward or reverse)
- * @param start
- * position to test for overlap in range
- * @param end
- * position to test for overlap in range
+ * @param overlaps
* @return
*/
- final static void addOverlap(List<int[]> positions, int pos, int[] range,
- int start, int end)
+ final static int addOffsetPositions(List<int[]> mapped,
+ final int mapOffset, final int[] range, final BitSet overlaps)
{
- if (range[1] >= range[0])
+ final int rangeLength = 1 + Math.abs(range[1] - range[0]);
+ final int step = range[1] < range[0] ? -1 : 1;
+ int offsetStart = 0; // offset into range
+ int added = 0;
+
+ while (offsetStart < rangeLength)
{
/*
- * forward direction range
+ * find the start of the next marked overlap offset;
+ * if there is none, or it is beyond range, then finished
*/
- if (start <= range[1] && end >= range[0])
+ int overlapStart = overlaps.nextSetBit(mapOffset + offsetStart);
+ if (overlapStart == -1 || overlapStart - mapOffset >= rangeLength)
{
/*
- * overlap
+ * no more overlaps, or no more within range[]
*/
- int overlapStart = Math.max(start, range[0]);
- int overlapStartOffset = pos + overlapStart - range[0];
- int overlapEnd = Math.min(end, range[1]);
- int overlapEndOffset = pos + overlapEnd - range[0];
- int[] lastOverlap = positions.isEmpty() ? null
- : positions.get(positions.size() - 1);
- if (lastOverlap != null && overlapStartOffset == lastOverlap[1] + 1)
- {
- /*
- * just extending the last overlap range
- */
- lastOverlap[1] = overlapEndOffset;
- }
- else
- {
- /*
- * add a new (discontiguous) overlap range
- */
- positions.add(new int[] { overlapStartOffset, overlapEndOffset });
- }
+ return added;
}
- }
- else
- {
+ overlapStart -= mapOffset;
+
/*
- * reverse direction range
+ * end of the overlap range is just before the next clear bit;
+ * restrict it to end of range if necessary;
+ * note we may add a reverse strand range here (end < start)
*/
- if (start <= range[0] && end >= range[1])
- {
- /*
- * overlap
- */
- int overlapStart = Math.max(start, range[1]);
- int overlapEnd = Math.min(end, range[0]);
- positions
- .add(new int[]
- { pos + range[0] - overlapEnd,
- pos + range[0] - overlapStart });
- }
+ int overlapEnd = overlaps.nextClearBit(mapOffset + overlapStart + 1);
+ overlapEnd = (overlapEnd == -1) ? rangeLength - 1
+ : Math.min(rangeLength - 1, overlapEnd - mapOffset - 1);
+ int startPosition = range[0] + step * overlapStart;
+ int endPosition = range[0] + step * overlapEnd;
+ mapped.add(new int[] { startPosition, endPosition });
+ offsetStart = overlapEnd + 1;
+ added += Math.abs(endPosition - startPosition) + 1;
}
+
+ return added;
}
}
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.List;
import org.testng.annotations.BeforeClass;
* no overlap
*/
assertNull(ml.locateInFrom(0, 0));
+
}
/**
assertEquals("[10, 10, 12, 12, 14, 14]",
Arrays.toString(ml.locateInFrom(3, 3)));
assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4)));
+
+ /*
+ * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8
+ */
+ ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 },
+ new int[]
+ { 1, 1, 3, 4, 6, 8 }, 3, 1);
+ assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom(1, 1));
+ assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom(1, 3));
+ assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom(1, 4));
+ assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom(3, 4));
+
}
@Test(groups = { "Functional" })
toRanges = compound.getToRanges();
assertEquals(2, toRanges.size());
assertArrayEquals(new int[] { 931, 901 }, toRanges.get(0));
- assertArrayEquals(new int[] { 600, 582 }, toRanges.get(1));
+ assertArrayEquals(new int[] { 600, 582}, toRanges.get(1));
/*
* 1:1 plus 1:3 should result in 1:3
}
/**
- * Tests for helper method that adds any overlap (plus offset) to a list of
+ * Tests for helper method that adds any overlap (plus offset) to a set of
* overlaps
*/
@Test(groups = { "Functional" })
- public void testAddOverlap()
+ public void testAddOffsetPositions()
{
- List<int[]> overlaps = new ArrayList<>();
- int[] candidate = new int[] { 10, 19 };
- MapList.addOverlap(overlaps, 5, candidate, 20, 30); // doesn't overlap
- assertTrue(overlaps.isEmpty());
- MapList.addOverlap(overlaps, 5, candidate, 31, 40); // doesn't overlap
- assertTrue(overlaps.isEmpty());
+ List<int[]> mapped = new ArrayList<>();
+ int[] range = new int[] {10, 20};
+ BitSet offsets = new BitSet();
- /*
- * 10-19 overlaps 15-25 at 15-19, which is offset 5-9 in 10-19
- * + 5 initial offset
- */
- MapList.addOverlap(overlaps, 5, candidate, 15, 25);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 10, 14 }, overlaps.get(0));
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // nothing marked for overlap
+
+ offsets.set(11);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // no offset 11 in range
+
+ offsets.set(4, 6); // this sets bits 4 and 5
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] { 14, 15 }, mapped.get(0));
+
+ mapped.clear();
+ offsets.set(10);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 14, 15 }, mapped.get(0));
+ assertArrayEquals(new int[] { 20, 20 }, mapped.get(1));
/*
- * reverse range overlap:
- * 300-20 overlaps 15-25 at 25-20, which is offset 275-280 in 300-20
- * + 8 initial offset
+ * reverse range
*/
- overlaps.clear();
- candidate = new int[] { 300, 20 };
- MapList.addOverlap(overlaps, 8, candidate, 15, 25);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 283, 288 }, overlaps.get(0));
+ range = new int[] { 20, 10 };
+ mapped.clear();
+ offsets.clear();
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // nothing marked for overlap
+ offsets.set(11);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // no offset 11 in range
+ offsets.set(0);
+ offsets.set(10);
+ offsets.set(6, 8); // sets bits 6 and 7
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(3, mapped.size());
+ assertArrayEquals(new int[] { 20, 20 }, mapped.get(0));
+ assertArrayEquals(new int[] { 14, 13 }, mapped.get(1));
+ assertArrayEquals(new int[] { 10, 10 }, mapped.get(2));
}
-
+
@Test(groups = { "Functional" })
- public void testFindOverlapPositions()
+ public void testGetPositionsForOffsets()
{
List<int[]> ranges = new ArrayList<>();
- List<int[]> overlaps = MapList.findOverlapPositions(ranges, 20, 30);
- assertTrue(overlaps.isEmpty()); // nothing to overlap
-
- ranges.add(new int[] { 15, 25 });
- overlaps = MapList.findOverlapPositions(ranges, 5, 10);
- assertTrue(overlaps.isEmpty()); // no overlap
-
- overlaps = MapList.findOverlapPositions(ranges, 20, 20);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 5, 5 }, overlaps.get(0));
-
- overlaps = MapList.findOverlapPositions(ranges, 5, 19);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 0, 4 }, overlaps.get(0));
-
- ranges.add(new int[] { 35, 45 });
- overlaps = MapList.findOverlapPositions(ranges, 26, 34);
- assertTrue(overlaps.isEmpty());
-
+ BitSet offsets = new BitSet();
+ List<int[]> mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertTrue(mapped.isEmpty()); // no ranges and no offsets!
+
+ offsets.set(5, 1000);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertTrue(mapped.isEmpty()); // no ranges
+
/*
- * 24-37 overlaps the end of 15-25 and the start of 35-45
- * - offset positions are contiguous in the map so merged
+ * one range with overlap of offsets
*/
- overlaps = MapList.findOverlapPositions(ranges, 24, 37);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 9, 13 }, overlaps.get(0));
-
+ ranges.add(new int[] {15, 25});
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] {20, 25}, mapped.get(0));
+
/*
- * EMBL:MN908947 https://www.ebi.ac.uk/ena/browser/api/embl/MN908947
- * (Covid-SARS-2) CDS mapping with 'slippage'
- * (base 13468 is used twice in transcription)
+ * two ranges
*/
- ranges.clear();
- ranges.add(new int[] { 266, 13468 });
- ranges.add(new int[] { 13468, 21555 });
-
- // 13468 occupies two offsets in the range list
- overlaps = MapList.findOverlapPositions(ranges, 13468, 13468);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 13202, 13203 }, overlaps.get(0));
- overlaps = MapList.findOverlapPositions(ranges, 13469, 13470);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 13204, 13205 }, overlaps.get(0));
-
+ ranges.add(new int[] {300, 320});
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {20, 25}, mapped.get(0));
+ assertArrayEquals(new int[] {300, 320}, mapped.get(1));
+
/*
- * EMBL:J03321 https://www.ebi.ac.uk/ena/browser/api/embl/J03321
- * circular dna: CDS at [7022-7502, 1-437]
- * = 481 + 437 = 918 bases = 305 aa's + stop codon
+ * boundary case - right end of first range overlaps
*/
- ranges.clear();
- ranges.add(new int[] { 7022, 7502 });
- ranges.add(new int[] { 1, 437 });
-
- overlaps = MapList.findOverlapPositions(ranges, 438, 7021);
- assertTrue(overlaps.isEmpty());
-
- // overlap first exon only:
- overlaps = MapList.findOverlapPositions(ranges, 7000, 7100);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 0, 78 }, overlaps.get(0));
-
- // overlap second exon only: offset to mapping includes first exon
- overlaps = MapList.findOverlapPositions(ranges, 400, 500);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 880, 917 }, overlaps.get(0));
-
- // overlap both exons: first exon overlap precedes second exon overlap
- // offsets of overlaps are not contiguous
- overlaps = MapList.findOverlapPositions(ranges, 200, 7500);
- assertEquals(2, overlaps.size());
- // first overlap is offsets of 7022-7500 in exon 1 (7022-7502):
- assertArrayEquals(new int[] { 0, 478 }, overlaps.get(0));
- // second overlap offsets is (first exon length = 481) + (200-437)
- assertArrayEquals(new int[] { 680, 917 }, overlaps.get(1));
- }
-
- @Test(groups = { "Functional" })
- public void testMapWords()
- {
- List<int[]> ranges = new ArrayList<>();
-
+ offsets.clear();
+ offsets.set(10);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] {25, 25}, mapped.get(0));
+
/*
- * 1:1 (trivial) case
+ * boundary case - left end of second range overlaps
*/
- ranges.add(new int[] { 2, 4 });
- ranges.add(new int[] { 6, 9 });
- MapList.mapWords(ranges, 1, 1);
- assertEquals(ranges.size(), 2);
- assertArrayEquals(new int[] { 2, 4 }, ranges.get(0));
- assertArrayEquals(new int[] { 6, 9 }, ranges.get(1));
-
+ offsets.set(11);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {25, 25}, mapped.get(0));
+ assertArrayEquals(new int[] {300, 300}, mapped.get(1));
+
/*
- * 1:3 case (peptide to codon ranges)
+ * offsets into a circular range are reported in
+ * the order in which they are traversed
*/
- MapList.mapWords(ranges, 1, 3);
- assertEquals(ranges.size(), 2);
- assertArrayEquals(new int[] { 6, 14 }, ranges.get(0));
- assertArrayEquals(new int[] { 18, 29 }, ranges.get(1));
+ ranges.clear();
+ ranges.add(new int[] {100, 150});
+ ranges.add(new int[] {60, 80});
+ offsets.clear();
+ offsets.set(45, 55); // sets bits 45 to 54
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {145, 150}, mapped.get(0)); // offsets 45-50
+ assertArrayEquals(new int[] {60, 63}, mapped.get(1)); // offsets 51-54
/*
- * 3:1 case (codon or part codon to peptide)
+ * reverse range overlap is reported with start < end
*/
ranges.clear();
- ranges.add(new int[] { 0, 5 }); // 2 whole codons
- ranges.add(new int[] { 7, 11 }); // part + whole codon
- ranges.add(new int[] { 15, 19 }); // whole + part codon
- ranges.add(new int[] { 23, 27 }); // part + part codon
- ranges.add(new int[] { 30, 30 }); // first base of codon
- ranges.add(new int[] { 31, 31 }); // second base of codon
- ranges.add(new int[] { 32, 32 }); // third base of codon
- MapList.mapWords(ranges, 3, 1);
- assertEquals(ranges.size(), 7);
- assertArrayEquals(new int[] { 0, 1 }, ranges.get(0));
- assertArrayEquals(new int[] { 2, 3 }, ranges.get(1));
- assertArrayEquals(new int[] { 5, 6 }, ranges.get(2));
- assertArrayEquals(new int[] { 7, 9 }, ranges.get(3));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(4));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(5));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(6));
+ ranges.add(new int[] {4321, 4000});
+ offsets.clear();
+ offsets.set(20, 22); // sets bits 20 and 21
+ offsets.set(30);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {4301, 4300}, mapped.get(0));
+ assertArrayEquals(new int[] {4291, 4291}, mapped.get(1));
}
-
+
@Test(groups = { "Functional" })
- public void testLocateInFrom2()
+ public void testGetMappedOffsetsForPositions()
{
/*
- * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8
+ * start by verifying the examples in the method's Javadoc!
*/
- MapList ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 },
- new int[]
- { 1, 1, 3, 4, 6, 8 }, 3, 1);
- assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom2(1, 1));
- assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom2(1, 3));
- assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom2(1, 4));
- assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom2(3, 4));
+ List<int[]> ranges = new ArrayList<>();
+ ranges.add(new int[] {10, 20});
+ ranges.add(new int[] {31, 40});
+ BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1, 1);
+ assertTrue(overlaps.isEmpty());
+ overlaps = MapList.getMappedOffsetsForPositions(1, 11, ranges, 1, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(0));
+ assertTrue(overlaps.get(1));
+ overlaps = MapList.getMappedOffsetsForPositions(15, 35, ranges, 1, 1);
+ assertEquals(11, overlaps.cardinality());
+ for (int i = 5 ; i <= 11 ; i++)
+ {
+ assertTrue(overlaps.get(i));
+ }
+
+ ranges.clear();
+ ranges.add(new int[] {1, 200});
+ overlaps = MapList.getMappedOffsetsForPositions(9, 9, ranges, 1, 3);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(24));
+ assertTrue(overlaps.get(25));
+ assertTrue(overlaps.get(26));
+
+ ranges.clear();
+ ranges.add(new int[] {101, 150});
+ ranges.add(new int[] {171, 180});
+ overlaps = MapList.getMappedOffsetsForPositions(101, 102, ranges, 3, 1);
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(0));
+ overlaps = MapList.getMappedOffsetsForPositions(150, 171, ranges, 3, 1);
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(16));
+
+ ranges.clear();
+ ranges.add(new int[] {101, 150});
+ ranges.add(new int[] {21, 30});
+ overlaps = MapList.getMappedOffsetsForPositions(24, 40, ranges, 3, 1);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(17));
+ assertTrue(overlaps.get(18));
+ assertTrue(overlaps.get(19));
+
+ /*
+ * reverse range 1:1 (e.g. reverse strand gene to transcript)
+ */
+ ranges.clear();
+ ranges.add(new int[] {20, 10});
+ overlaps = MapList.getMappedOffsetsForPositions(12, 13, ranges, 1, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(7));
+ assertTrue(overlaps.get(8));
+
+ /*
+ * reverse range 3:1 (e.g. reverse strand gene to peptide)
+ * from EMBL:J03321 to P0CE20
+ */
+ ranges.clear();
+ ranges.add(new int[] {1480, 488});
+ overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3, 1);
+ // 1460 is the end of the 7th codon
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ // add one base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add second base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add third base (whole codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add one more base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3, 1);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ assertTrue(overlaps.get(8));
}
}