From: gmungoc Date: Thu, 26 Nov 2020 14:17:41 +0000 (+0000) Subject: JAL-3761 locateInFrom/To revised with tests; unused methods removed X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=473def501c996493b373b8c94cc7c737e16feebf;p=jalview.git JAL-3761 locateInFrom/To revised with tests; unused methods removed --- diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index b5184fb..4d90e3e 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -20,13 +20,13 @@ */ package jalview.datamodel; -import jalview.util.Comparison; -import jalview.util.MapList; - import java.util.Iterator; import java.util.NoSuchElementException; import java.util.Vector; +import jalview.util.Comparison; +import jalview.util.MapList; + public class Mapping { /** @@ -433,23 +433,6 @@ public class Mapping } /** - * gets boundary in direction of mapping - * - * @param position - * in mapped reference frame - * @return int{start, end} positions in associated sequence (in direction of - * mapped word) - */ - public int[] getWord(int mpos) - { - if (map != null) - { - return map.getToWord(mpos); - } - return null; - } - - /** * width of mapped unit in associated sequence * */ diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 0d71bb4..198066d 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -22,6 +22,7 @@ package jalview.util; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.List; /** @@ -30,8 +31,6 @@ import java.util.List; * * Use at your own risk! * - * TODO: efficient implementation of private posMap method - * * TODO: test/ensure that sense of from and to ratio start position is conserved * (codon start position recovery) */ @@ -406,7 +405,7 @@ public class MapList */ protected int[][] makeFromMap() { - // TODO not used - remove?? + // TODO only used for test - remove?? return posMap(fromShifts, fromRatio, toShifts, toRatio); } @@ -417,7 +416,7 @@ public class MapList */ protected int[][] makeToMap() { - // TODO not used - remove?? + // TODO only used for test - remove?? return posMap(toShifts, toRatio, fromShifts, fromRatio); } @@ -431,7 +430,7 @@ public class MapList private int[][] posMap(List shiftTo, int ratio, List shiftFrom, int toRatio) { - // TODO not used - remove?? + // TODO only used for test - remove?? int iv = 0, ivSize = shiftTo.size(); if (iv >= ivSize) { @@ -693,44 +692,6 @@ public class MapList } /** - * find series of intervals mapping from start-end in the From map. - * - * @param start - * position mapped 'to' - * @param end - * position mapped 'to' - * @return series of [start, end] ranges in sequence mapped 'from' - */ - public int[] locateInFrom(int start, int end) - { - return locateInFrom2(start, end); - - // inefficient implementation - // int fromStart[] = shiftTo(start); - // needs to be inclusive of end of symbol position - // int fromEnd[] = shiftTo(end); - // return getIntervals(fromShifts, fromStart, fromEnd, fromRatio); - } - - /** - * find series of intervals mapping from start-end in the to map. - * - * @param start - * position mapped 'from' - * @param end - * position mapped 'from' - * @return series of [start, end] ranges in sequence mapped 'to' - */ - public int[] locateInTo(int start, int end) - { - return locateInTo2(start, end); - - // int toStart[] = shiftFrom(start); - // int toEnd[] = shiftFrom(end); - // return getIntervals(toShifts, toStart, toEnd, toRatio); - } - - /** * like shift - except returns the intervals in the given vector of shifts * which were spanned in traversing fromStart to fromEnd * @@ -906,7 +867,6 @@ public class MapList */ public int getToPosition(int mpos) { - // TODO not used - remove?? int[] mp = shiftTo(mpos); if (mp != null) { @@ -916,53 +876,6 @@ public class MapList } /** - * get range of positions in To frame for the mpos word in From - * - * @param mpos - * position in From - * @return null or int[] first position in To for mpos, last position in to - * for Mpos - */ - public int[] getToWord(int mpos) - { - int[] mp = shiftTo(mpos); - if (mp != null) - { - return new int[] { mp[0], mp[0] + mp[2] * (getFromRatio() - 1) }; - } - return null; - } - - /** - * get From position in the associated reference frame for position pos in the - * associated sequence. - * - * @param pos - * @return - */ - public int getMappedPosition(int pos) - { - // TODO not used - remove?? - int[] mp = shiftFrom(pos); - if (mp != null) - { - return mp[0]; - } - return pos; - } - - public int[] getMappedWord(int pos) - { - // TODO not used - remove?? - int[] mp = shiftFrom(pos); - if (mp != null) - { - return new int[] { mp[0], mp[0] + mp[2] * (getToRatio() - 1) }; - } - return null; - } - - /** * * @return a MapList whose From range is this maplist's To Range, and vice * versa @@ -974,33 +887,6 @@ public class MapList } /** - * test for containment rather than equivalence to another mapping - * - * @param map - * to be tested for containment - * @return true if local or mapped range map contains or is contained by this - * mapping - */ - public boolean containsEither(boolean local, MapList map) - { - // TODO not used - remove? - if (local) - { - return ((getFromLowest() >= map.getFromLowest() - && getFromHighest() <= map.getFromHighest()) - || (getFromLowest() <= map.getFromLowest() - && getFromHighest() >= map.getFromHighest())); - } - else - { - return ((getToLowest() >= map.getToLowest() - && getToHighest() <= map.getToHighest()) - || (getToLowest() <= map.getToLowest() - && getToHighest() >= map.getToHighest())); - } - } - - /** * String representation - for debugging, not guaranteed not to change */ @Override @@ -1233,7 +1119,7 @@ public class MapList toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1; i += 2; } - + /* * check we mapped the full range - if not, abort */ @@ -1258,56 +1144,51 @@ public class MapList } /** - * Returns the [start, end, start, end, ...] ranges in the 'from' range that - * map to positions between {@code start} and {@code end} in the 'to' range. - * Returns null if no mapped positions are found in start-end. + * Returns the [start1, end1, start2, end2, ...] positions in the 'from' range + * that map to positions between {@code start} and {@code end} in the 'to' + * range. Note that for a reverse strand mapping this will return ranges with + * end < start. Returns null if no mapped positions are found in start-end. * * @param start * @param end * @return */ - public int[] locateInFrom2(int start, int end) + public int[] locateInFrom(int start, int end) { - List ranges = mapBetween(start, end, toShifts, fromShifts, - toRatio, fromRatio); + if (end < start) + { + int tmp = end; + end = start; + start = tmp; + } - // TODO: or just return the List and adjust calling code to match - return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges); - } + /* + * traverse toShifts and mark offsets in fromShifts + * of any positions that lie in [start, end] + */ + BitSet offsets = getMappedOffsetsForPositions(start, end, toShifts, + toRatio, fromRatio); - /** - * Returns the [start, end, start, end, ...] ranges in the 'to' range that map - * to the given start-end in the 'from' range. Returns null if either - * {@code start} or {@code end} is not a mapped 'from' range position. - * - * @param start - * @param end - * @return - */ - public int[] locateInTo2(int start, int end) - { - List ranges = mapBetween(start, end, fromShifts, toShifts, - fromRatio, toRatio); + /* + * traverse fromShifts and collect positions at the marked offsets + */ + List mapped = getPositionsForOffsets(fromShifts, offsets); - return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges); + // TODO: or just return the List and adjust calling code to match + return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped); } /** - * A helper method for navigating the mapping. Returns a (possibly empty) list - * of [start-end] positions in {@code ranges2} that map to positions in - * {@code ranges1} between {@code start} and {@code end}. + * Returns the [start1, end1, start2, end2, ...] positions in the 'to' range + * that map to positions between {@code start} and {@code end} in the 'from' + * range. Note that for a reverse strand mapping this will return ranges with + * end < start. Returns null if no mapped positions are found in start-end. * * @param start * @param end - * @param ranges1 - * @param ranges2 - * @param wordLength1 - * @param wordLength2 * @return */ - final static List mapBetween(int start, int end, - List ranges1, List ranges2, int wordLength1, - int wordLength2) + public int[] locateInTo(int start, int end) { if (end < start) { @@ -1317,221 +1198,220 @@ public class MapList } /* - * first traverse ranges1 and record count of mapped positions - * to any that overlap start-end + * traverse fromShifts and mark offsets in toShifts + * of any positions that lie in [start, end] */ - List overlaps = findOverlapPositions(ranges1, start, end); - if (overlaps.isEmpty()) - { - return overlaps; - } - - /* - * convert positions to equivalent 'word' positions in ranges2 - */ - mapWords(overlaps, wordLength1, wordLength2); + BitSet offsets = getMappedOffsetsForPositions(start, end, fromShifts, + fromRatio, toRatio); /* - * walk ranges2 and record the values found at - * the offsets in 'overlaps' - */ - List mapped = new ArrayList<>(); - final int s1 = overlaps.size(); - final int s2 = ranges2.size(); - int ranges2Index = 0; - - /* - * count of mapped positions preceding ranges2[ranges2Index] + * traverse toShifts and collect positions at the marked offsets */ - int traversed = 0; + List mapped = getPositionsForOffsets(toShifts, offsets); - /* - * for each [from-to] range in overlaps: - * - walk (what remains of) ranges2 - * - record the values at offsets [from-to] - * - stop when past 'to' positions (or at end of ranges2) - */ - for (int i = 0; i < s1; i++) - { - int[] overlap = overlaps.get(i); - final int toAdd = overlap[1] - overlap[0] + 1; - int added = 0; // how much of overlap has been 'found' - for (; added < toAdd && ranges2Index < s2; ranges2Index++) - { - int[] range2 = ranges2.get(ranges2Index); - int rangeStart = range2[0]; - int rangeEnd = range2[1]; - boolean reverseStrand = range2[1] < range2[0]; - int rangeLength = Math.abs(rangeEnd - rangeStart) + 1; - if (traversed + rangeLength <= overlap[0]) - { - /* - * precedes overlap - keep looking - */ - traversed += rangeLength; - continue; - } - int overlapStart = overlap[0] - traversed; - int overlapEnd = Math.min(overlapStart + toAdd - added - 1, - rangeLength - 1); - int mappedFrom = range2[0] + (reverseStrand ? - overlapStart : overlapStart); - int mappedTo = range2[0] + (reverseStrand ? - overlapEnd : overlapEnd); - mapped.add(new int[] { mappedFrom, mappedTo }); - int found = overlapEnd - overlapStart + 1; - added += found; - overlap[0] += found; - traversed += rangeLength; - } - } - - return mapped; + return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped); } /** - * Converts the start-end positions (counted from zero) in the {@code ranges} - * list from one word length to another. Start-end positions are expanded if - * necessary to cover a whole word of length {@code wordLength1}. Positions - * are then divided by {@code wordLength1} and multiplied by - * {@code wordLength2} to give equivalent mapped words. - *

- * Put simply, this converts peptide residue positions to the corresponding - * codon ranges, and codons - including partial codons - to the corresponding - * peptide positions; for example + * Scans the list of {@code ranges} for any values (positions) that lie + * between start and end (inclusive), and records the offsets from + * the start of the list as a BitSet. The offset positions are converted to + * corresponding words in blocks of {@code wordLength2}. * *

-   * [1, 10] with word lengths 3:1 converts (as if bases [0-11]) to [1, 4]
+   * For example:
+   * 1:1 (e.g. gene to CDS):
+   * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1
+   *   for start = 1, end = 9, returns a BitSet with no bits set
+   *   for start = 1, end = 11, returns a BitSet with bits 0-1 set
+   *   for start = 15, end = 35, returns a BitSet with bits 5-15 set
+   * 1:3 (peptide to codon):
+   * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3
+   *   for start = 9, end = 9, returns a BitSet with bits 24-26 set
+   * 3:1 (codon to peptide):
+   * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1
+   *   for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set
+   *   for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set
+   * 3:1 (circular DNA to peptide):
+   * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1
+   *   for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set
    * 
* + * @param start + * @param end * @param ranges - * @param wordLength1 - * @param wordLength2 + * @param wordLengthFrom + * @param wordLengthTo * @return */ - final static void mapWords(List ranges, int wordLength1, - int wordLength2) + protected final static BitSet getMappedOffsetsForPositions(int start, + int end, List ranges, int wordLengthFrom, int wordLengthTo) { - if (wordLength1 == 1 && wordLength2 == 1) - { - return; // nothing to do here - } - int s = ranges.size(); - for (int i = 0; i < s; i++) + BitSet overlaps = new BitSet(); + int offset = 0; + final int s1 = ranges.size(); + for (int i = 0; i < s1; i++) { int[] range = ranges.get(i); + final int offset1 = offset; + int overlapStartOffset = -1; + int overlapEndOffset = -1; - /* - * expand range start to the start of a word, - * and convert to wordLength2 - */ - range[0] -= range[0] % wordLength1; - range[0] = range[0] / wordLength1 * wordLength2; + if (range[1] >= range[0]) + { + /* + * forward direction range + */ + if (start <= range[1] && end >= range[0]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[0]); + overlapStartOffset = offset1 + overlapStart - range[0]; + int overlapEnd = Math.min(end, range[1]); + overlapEndOffset = offset1 + overlapEnd - range[0]; + } + } + else + { + /* + * reverse direction range + */ + if (start <= range[0] && end >= range[1]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[1]); + int overlapEnd = Math.min(end, range[0]); + overlapStartOffset = offset1 + range[0] - overlapEnd; + overlapEndOffset = offset1 + range[0] - overlapStart; + } + } - /* - * similar calculation for range end, adding - * (wordLength2 - 1) for end of mapped word - */ - range[1] -= range[1] % wordLength1; - range[1] = range[1] / wordLength1 * wordLength2; - range[1] += wordLength2 - 1; + if (overlapStartOffset > -1) + { + /* + * found an overlap + */ + if (wordLengthFrom != wordLengthTo) + { + /* + * convert any overlap found to whole words in the target range + * (e.g. treat any partial codon overlap as if the whole codon) + */ + overlapStartOffset -= overlapStartOffset % wordLengthFrom; + overlapStartOffset = overlapStartOffset / wordLengthFrom + * wordLengthTo; + + /* + * similar calculation for range end, adding + * (wordLength2 - 1) for end of mapped word + */ + overlapEndOffset -= overlapEndOffset % wordLengthFrom; + overlapEndOffset = overlapEndOffset / wordLengthFrom + * wordLengthTo; + overlapEndOffset += wordLengthTo - 1; + } + overlaps.set(overlapStartOffset, overlapEndOffset + 1); + } + offset += 1 + Math.abs(range[1] - range[0]); } + return overlaps; } /** - * Helper method that returns a (possibly empty) list of offsets in - * {@code ranges} to subranges that overlap {@code start-end} (where start <= - * end}. The list returned holds counts of the number of positions traversed - * (exclusive) to reach the overlapping positions, not the overlapping values. - * Returns null if there are no overlaps. + * Returns a (possibly empty) list of the [start-end] values (positions) at + * offsets in the {@code ranges} list that are marked by 'on' bits in the + * {@code offsets} bitset. * * @param ranges - * @param start - * @param end + * @param offsets * @return */ - final static List findOverlapPositions(List ranges, - int start, int end) + protected final static List getPositionsForOffsets( + List ranges, BitSet offsets) { - List positions = new ArrayList<>(); - int pos = 0; - int s = ranges.size(); - for (int i = 0; i < s; i++) + List mapped = new ArrayList<>(); + if (offsets.isEmpty()) + { + return mapped; + } + + /* + * count of positions preceding ranges[i] + */ + int traversed = 0; + + /* + * for each [from-to] range in ranges: + * - find subranges (if any) at marked offsets + * - add the start-end values at the marked positions + */ + final int toAdd = offsets.cardinality(); + int added = 0; + final int s2 = ranges.size(); + for (int i = 0; added < toAdd && i < s2; i++) { int[] range = ranges.get(i); - addOverlap(positions, pos, range, start, end); - pos += 1 + Math.abs(range[1] - range[0]); + added += addOffsetPositions(mapped, traversed, range, offsets); + traversed += Math.abs(range[1] - range[0]) + 1; } - return positions; + return mapped; } /** - * A helper method that checks whether {@code range} overlaps - * {@code start-end}, and if so adds the offset of the overlap in - * {@code range}, plus {@code pos}, to {@code positions}. + * Helper method that adds any start-end subranges of {@code range} that are + * at offsets in {@code range} marked by set bits in overlaps. + * {@code mapOffset} is added to {@code range} offset positions. Returns the + * count of positions added. * - * @param positions - * a list of map offsets to add to - * @param pos - * the number of mapped positions already visited + * @param mapped + * @param mapOffset * @param range - * a from-to range (may be forward or reverse) - * @param start - * position to test for overlap in range - * @param end - * position to test for overlap in range + * @param overlaps * @return */ - final static void addOverlap(List positions, int pos, int[] range, - int start, int end) + final static int addOffsetPositions(List mapped, + final int mapOffset, final int[] range, final BitSet overlaps) { - if (range[1] >= range[0]) + final int rangeLength = 1 + Math.abs(range[1] - range[0]); + final int step = range[1] < range[0] ? -1 : 1; + int offsetStart = 0; // offset into range + int added = 0; + + while (offsetStart < rangeLength) { /* - * forward direction range + * find the start of the next marked overlap offset; + * if there is none, or it is beyond range, then finished */ - if (start <= range[1] && end >= range[0]) + int overlapStart = overlaps.nextSetBit(mapOffset + offsetStart); + if (overlapStart == -1 || overlapStart - mapOffset >= rangeLength) { /* - * overlap + * no more overlaps, or no more within range[] */ - int overlapStart = Math.max(start, range[0]); - int overlapStartOffset = pos + overlapStart - range[0]; - int overlapEnd = Math.min(end, range[1]); - int overlapEndOffset = pos + overlapEnd - range[0]; - int[] lastOverlap = positions.isEmpty() ? null - : positions.get(positions.size() - 1); - if (lastOverlap != null && overlapStartOffset == lastOverlap[1] + 1) - { - /* - * just extending the last overlap range - */ - lastOverlap[1] = overlapEndOffset; - } - else - { - /* - * add a new (discontiguous) overlap range - */ - positions.add(new int[] { overlapStartOffset, overlapEndOffset }); - } + return added; } - } - else - { + overlapStart -= mapOffset; + /* - * reverse direction range + * end of the overlap range is just before the next clear bit; + * restrict it to end of range if necessary; + * note we may add a reverse strand range here (end < start) */ - if (start <= range[0] && end >= range[1]) - { - /* - * overlap - */ - int overlapStart = Math.max(start, range[1]); - int overlapEnd = Math.min(end, range[0]); - positions - .add(new int[] - { pos + range[0] - overlapEnd, - pos + range[0] - overlapStart }); - } + int overlapEnd = overlaps.nextClearBit(mapOffset + overlapStart + 1); + overlapEnd = (overlapEnd == -1) ? rangeLength - 1 + : Math.min(rangeLength - 1, overlapEnd - mapOffset - 1); + int startPosition = range[0] + step * overlapStart; + int endPosition = range[0] + step * overlapEnd; + mapped.add(new int[] { startPosition, endPosition }); + offsetStart = overlapEnd + 1; + added += Math.abs(endPosition - startPosition) + 1; } + + return added; } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index fd34902..cf10aba 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -30,6 +30,7 @@ import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.List; import org.testng.annotations.BeforeClass; @@ -289,6 +290,7 @@ public class MapListTest * no overlap */ assertNull(ml.locateInFrom(0, 0)); + } /** @@ -310,6 +312,18 @@ public class MapListTest assertEquals("[10, 10, 12, 12, 14, 14]", Arrays.toString(ml.locateInFrom(3, 3))); assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4))); + + /* + * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8 + */ + ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 }, + new int[] + { 1, 1, 3, 4, 6, 8 }, 3, 1); + assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom(1, 1)); + assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom(1, 3)); + assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom(1, 4)); + assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom(3, 4)); + } @Test(groups = { "Functional" }) @@ -880,7 +894,7 @@ public class MapListTest toRanges = compound.getToRanges(); assertEquals(2, toRanges.size()); assertArrayEquals(new int[] { 931, 901 }, toRanges.get(0)); - assertArrayEquals(new int[] { 600, 582 }, toRanges.get(1)); + assertArrayEquals(new int[] { 600, 582}, toRanges.get(1)); /* * 1:1 plus 1:3 should result in 1:3 @@ -1198,176 +1212,220 @@ public class MapListTest } /** - * Tests for helper method that adds any overlap (plus offset) to a list of + * Tests for helper method that adds any overlap (plus offset) to a set of * overlaps */ @Test(groups = { "Functional" }) - public void testAddOverlap() + public void testAddOffsetPositions() { - List overlaps = new ArrayList<>(); - int[] candidate = new int[] { 10, 19 }; - MapList.addOverlap(overlaps, 5, candidate, 20, 30); // doesn't overlap - assertTrue(overlaps.isEmpty()); - MapList.addOverlap(overlaps, 5, candidate, 31, 40); // doesn't overlap - assertTrue(overlaps.isEmpty()); + List mapped = new ArrayList<>(); + int[] range = new int[] {10, 20}; + BitSet offsets = new BitSet(); - /* - * 10-19 overlaps 15-25 at 15-19, which is offset 5-9 in 10-19 - * + 5 initial offset - */ - MapList.addOverlap(overlaps, 5, candidate, 15, 25); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 10, 14 }, overlaps.get(0)); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // nothing marked for overlap + + offsets.set(11); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // no offset 11 in range + + offsets.set(4, 6); // this sets bits 4 and 5 + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertEquals(1, mapped.size()); + assertArrayEquals(new int[] { 14, 15 }, mapped.get(0)); + + mapped.clear(); + offsets.set(10); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] { 14, 15 }, mapped.get(0)); + assertArrayEquals(new int[] { 20, 20 }, mapped.get(1)); /* - * reverse range overlap: - * 300-20 overlaps 15-25 at 25-20, which is offset 275-280 in 300-20 - * + 8 initial offset + * reverse range */ - overlaps.clear(); - candidate = new int[] { 300, 20 }; - MapList.addOverlap(overlaps, 8, candidate, 15, 25); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 283, 288 }, overlaps.get(0)); + range = new int[] { 20, 10 }; + mapped.clear(); + offsets.clear(); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // nothing marked for overlap + offsets.set(11); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // no offset 11 in range + offsets.set(0); + offsets.set(10); + offsets.set(6, 8); // sets bits 6 and 7 + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertEquals(3, mapped.size()); + assertArrayEquals(new int[] { 20, 20 }, mapped.get(0)); + assertArrayEquals(new int[] { 14, 13 }, mapped.get(1)); + assertArrayEquals(new int[] { 10, 10 }, mapped.get(2)); } - + @Test(groups = { "Functional" }) - public void testFindOverlapPositions() + public void testGetPositionsForOffsets() { List ranges = new ArrayList<>(); - List overlaps = MapList.findOverlapPositions(ranges, 20, 30); - assertTrue(overlaps.isEmpty()); // nothing to overlap - - ranges.add(new int[] { 15, 25 }); - overlaps = MapList.findOverlapPositions(ranges, 5, 10); - assertTrue(overlaps.isEmpty()); // no overlap - - overlaps = MapList.findOverlapPositions(ranges, 20, 20); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 5, 5 }, overlaps.get(0)); - - overlaps = MapList.findOverlapPositions(ranges, 5, 19); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 0, 4 }, overlaps.get(0)); - - ranges.add(new int[] { 35, 45 }); - overlaps = MapList.findOverlapPositions(ranges, 26, 34); - assertTrue(overlaps.isEmpty()); - + BitSet offsets = new BitSet(); + List mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertTrue(mapped.isEmpty()); // no ranges and no offsets! + + offsets.set(5, 1000); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertTrue(mapped.isEmpty()); // no ranges + /* - * 24-37 overlaps the end of 15-25 and the start of 35-45 - * - offset positions are contiguous in the map so merged + * one range with overlap of offsets */ - overlaps = MapList.findOverlapPositions(ranges, 24, 37); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 9, 13 }, overlaps.get(0)); - + ranges.add(new int[] {15, 25}); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(1, mapped.size()); + assertArrayEquals(new int[] {20, 25}, mapped.get(0)); + /* - * EMBL:MN908947 https://www.ebi.ac.uk/ena/browser/api/embl/MN908947 - * (Covid-SARS-2) CDS mapping with 'slippage' - * (base 13468 is used twice in transcription) + * two ranges */ - ranges.clear(); - ranges.add(new int[] { 266, 13468 }); - ranges.add(new int[] { 13468, 21555 }); - - // 13468 occupies two offsets in the range list - overlaps = MapList.findOverlapPositions(ranges, 13468, 13468); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 13202, 13203 }, overlaps.get(0)); - overlaps = MapList.findOverlapPositions(ranges, 13469, 13470); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 13204, 13205 }, overlaps.get(0)); - + ranges.add(new int[] {300, 320}); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] {20, 25}, mapped.get(0)); + assertArrayEquals(new int[] {300, 320}, mapped.get(1)); + /* - * EMBL:J03321 https://www.ebi.ac.uk/ena/browser/api/embl/J03321 - * circular dna: CDS at [7022-7502, 1-437] - * = 481 + 437 = 918 bases = 305 aa's + stop codon + * boundary case - right end of first range overlaps */ - ranges.clear(); - ranges.add(new int[] { 7022, 7502 }); - ranges.add(new int[] { 1, 437 }); - - overlaps = MapList.findOverlapPositions(ranges, 438, 7021); - assertTrue(overlaps.isEmpty()); - - // overlap first exon only: - overlaps = MapList.findOverlapPositions(ranges, 7000, 7100); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 0, 78 }, overlaps.get(0)); - - // overlap second exon only: offset to mapping includes first exon - overlaps = MapList.findOverlapPositions(ranges, 400, 500); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 880, 917 }, overlaps.get(0)); - - // overlap both exons: first exon overlap precedes second exon overlap - // offsets of overlaps are not contiguous - overlaps = MapList.findOverlapPositions(ranges, 200, 7500); - assertEquals(2, overlaps.size()); - // first overlap is offsets of 7022-7500 in exon 1 (7022-7502): - assertArrayEquals(new int[] { 0, 478 }, overlaps.get(0)); - // second overlap offsets is (first exon length = 481) + (200-437) - assertArrayEquals(new int[] { 680, 917 }, overlaps.get(1)); - } - - @Test(groups = { "Functional" }) - public void testMapWords() - { - List ranges = new ArrayList<>(); - + offsets.clear(); + offsets.set(10); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(1, mapped.size()); + assertArrayEquals(new int[] {25, 25}, mapped.get(0)); + /* - * 1:1 (trivial) case + * boundary case - left end of second range overlaps */ - ranges.add(new int[] { 2, 4 }); - ranges.add(new int[] { 6, 9 }); - MapList.mapWords(ranges, 1, 1); - assertEquals(ranges.size(), 2); - assertArrayEquals(new int[] { 2, 4 }, ranges.get(0)); - assertArrayEquals(new int[] { 6, 9 }, ranges.get(1)); - + offsets.set(11); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] {25, 25}, mapped.get(0)); + assertArrayEquals(new int[] {300, 300}, mapped.get(1)); + /* - * 1:3 case (peptide to codon ranges) + * offsets into a circular range are reported in + * the order in which they are traversed */ - MapList.mapWords(ranges, 1, 3); - assertEquals(ranges.size(), 2); - assertArrayEquals(new int[] { 6, 14 }, ranges.get(0)); - assertArrayEquals(new int[] { 18, 29 }, ranges.get(1)); + ranges.clear(); + ranges.add(new int[] {100, 150}); + ranges.add(new int[] {60, 80}); + offsets.clear(); + offsets.set(45, 55); // sets bits 45 to 54 + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] {145, 150}, mapped.get(0)); // offsets 45-50 + assertArrayEquals(new int[] {60, 63}, mapped.get(1)); // offsets 51-54 /* - * 3:1 case (codon or part codon to peptide) + * reverse range overlap is reported with start < end */ ranges.clear(); - ranges.add(new int[] { 0, 5 }); // 2 whole codons - ranges.add(new int[] { 7, 11 }); // part + whole codon - ranges.add(new int[] { 15, 19 }); // whole + part codon - ranges.add(new int[] { 23, 27 }); // part + part codon - ranges.add(new int[] { 30, 30 }); // first base of codon - ranges.add(new int[] { 31, 31 }); // second base of codon - ranges.add(new int[] { 32, 32 }); // third base of codon - MapList.mapWords(ranges, 3, 1); - assertEquals(ranges.size(), 7); - assertArrayEquals(new int[] { 0, 1 }, ranges.get(0)); - assertArrayEquals(new int[] { 2, 3 }, ranges.get(1)); - assertArrayEquals(new int[] { 5, 6 }, ranges.get(2)); - assertArrayEquals(new int[] { 7, 9 }, ranges.get(3)); - assertArrayEquals(new int[] { 10, 10 }, ranges.get(4)); - assertArrayEquals(new int[] { 10, 10 }, ranges.get(5)); - assertArrayEquals(new int[] { 10, 10 }, ranges.get(6)); + ranges.add(new int[] {4321, 4000}); + offsets.clear(); + offsets.set(20, 22); // sets bits 20 and 21 + offsets.set(30); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] {4301, 4300}, mapped.get(0)); + assertArrayEquals(new int[] {4291, 4291}, mapped.get(1)); } - + @Test(groups = { "Functional" }) - public void testLocateInFrom2() + public void testGetMappedOffsetsForPositions() { /* - * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8 + * start by verifying the examples in the method's Javadoc! */ - MapList ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 }, - new int[] - { 1, 1, 3, 4, 6, 8 }, 3, 1); - assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom2(1, 1)); - assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom2(1, 3)); - assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom2(1, 4)); - assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom2(3, 4)); + List ranges = new ArrayList<>(); + ranges.add(new int[] {10, 20}); + ranges.add(new int[] {31, 40}); + BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1, 1); + assertTrue(overlaps.isEmpty()); + overlaps = MapList.getMappedOffsetsForPositions(1, 11, ranges, 1, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(0)); + assertTrue(overlaps.get(1)); + overlaps = MapList.getMappedOffsetsForPositions(15, 35, ranges, 1, 1); + assertEquals(11, overlaps.cardinality()); + for (int i = 5 ; i <= 11 ; i++) + { + assertTrue(overlaps.get(i)); + } + + ranges.clear(); + ranges.add(new int[] {1, 200}); + overlaps = MapList.getMappedOffsetsForPositions(9, 9, ranges, 1, 3); + assertEquals(3, overlaps.cardinality()); + assertTrue(overlaps.get(24)); + assertTrue(overlaps.get(25)); + assertTrue(overlaps.get(26)); + + ranges.clear(); + ranges.add(new int[] {101, 150}); + ranges.add(new int[] {171, 180}); + overlaps = MapList.getMappedOffsetsForPositions(101, 102, ranges, 3, 1); + assertEquals(1, overlaps.cardinality()); + assertTrue(overlaps.get(0)); + overlaps = MapList.getMappedOffsetsForPositions(150, 171, ranges, 3, 1); + assertEquals(1, overlaps.cardinality()); + assertTrue(overlaps.get(16)); + + ranges.clear(); + ranges.add(new int[] {101, 150}); + ranges.add(new int[] {21, 30}); + overlaps = MapList.getMappedOffsetsForPositions(24, 40, ranges, 3, 1); + assertEquals(3, overlaps.cardinality()); + assertTrue(overlaps.get(17)); + assertTrue(overlaps.get(18)); + assertTrue(overlaps.get(19)); + + /* + * reverse range 1:1 (e.g. reverse strand gene to transcript) + */ + ranges.clear(); + ranges.add(new int[] {20, 10}); + overlaps = MapList.getMappedOffsetsForPositions(12, 13, ranges, 1, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(7)); + assertTrue(overlaps.get(8)); + + /* + * reverse range 3:1 (e.g. reverse strand gene to peptide) + * from EMBL:J03321 to P0CE20 + */ + ranges.clear(); + ranges.add(new int[] {1480, 488}); + overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3, 1); + // 1460 is the end of the 7th codon + assertEquals(1, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + // add one base (part codon) + overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + // add second base (part codon) + overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + // add third base (whole codon) + overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + // add one more base (part codon) + overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3, 1); + assertEquals(3, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + assertTrue(overlaps.get(8)); } }