X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FMapList.java;h=0f3fc2e4cfea5d718281c4988fee2c9871253f43;hb=a1984b1c8c273ed33c7ce9283039f4027dcae2de;hp=e095106e95cc197f2059233a6f959b52a7218043;hpb=468df74e8febbe5066f9544bf7e0aa07311586ed;p=jalview.git diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index e095106..0f3fc2e 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -22,16 +22,17 @@ package jalview.util; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.List; +import jalview.bin.Cache; + /** * A simple way of bijectively mapping a non-contiguous linear range to another * non-contiguous linear range. * * Use at your own risk! * - * TODO: efficient implementation of private posMap method - * * TODO: test/ensure that sense of from and to ratio start position is conserved * (codon start position recovery) */ @@ -308,7 +309,7 @@ public class MapList if (range.length != 2) { // throw new IllegalArgumentException(range); - System.err.println("Invalid format for fromRange " + Cache.error("Invalid format for fromRange " + Arrays.toString(range) + " may cause errors"); } fromLowest = Math.min(fromLowest, Math.min(range[0], range[1])); @@ -322,7 +323,7 @@ public class MapList if (range.length != 2) { // throw new IllegalArgumentException(range); - System.err.println("Invalid format for toRange " + Cache.error("Invalid format for toRange " + Arrays.toString(range) + " may cause errors"); } toLowest = Math.min(toLowest, Math.min(range[0], range[1])); @@ -406,7 +407,7 @@ public class MapList */ protected int[][] makeFromMap() { - // TODO not used - remove?? + // TODO only used for test - remove?? return posMap(fromShifts, fromRatio, toShifts, toRatio); } @@ -417,7 +418,7 @@ public class MapList */ protected int[][] makeToMap() { - // TODO not used - remove?? + // TODO only used for test - remove?? return posMap(toShifts, toRatio, fromShifts, fromRatio); } @@ -428,10 +429,10 @@ public class MapList * @return int[] { from, to pos in range }, int[range.to-range.from+1] * returning mapped position */ - private int[][] posMap(List shiftTo, int ratio, - List shiftFrom, int toRatio) + private int[][] posMap(List shiftTo, int sourceRatio, + List shiftFrom, int targetRatio) { - // TODO not used - remove?? + // TODO only used for test - remove?? int iv = 0, ivSize = shiftTo.size(); if (iv >= ivSize) { @@ -468,7 +469,8 @@ public class MapList int mp[][] = new int[to - from + 2][]; for (int i = 0; i < mp.length; i++) { - int[] m = shift(i + from, shiftTo, ratio, shiftFrom, toRatio); + int[] m = shift(i + from, shiftTo, sourceRatio, shiftFrom, + targetRatio); if (m != null) { if (i == 0) @@ -562,7 +564,7 @@ public class MapList List shiftFrom, int toRatio) { // TODO: javadoc; tests - int[] fromCount = countPos(shiftTo, pos); + int[] fromCount = countPositions(shiftTo, pos); if (fromCount == null) { return null; @@ -572,27 +574,41 @@ public class MapList int[] toPos = traverseToPosition(shiftFrom, toCount); if (toPos == null) { - return null; // throw new Error("Bad Mapping!"); + return null; } - // System.out.println(fromCount[0]+" "+fromCount[1]+" "+toCount); return new int[] { toPos[0], fromRemainder, toPos[1] }; } /** - * count how many positions pos is along the series of intervals. + * Counts how many positions pos is along the series of intervals. Returns an + * array of two values: + *
    + *
  • the number of positions traversed (inclusive) to reach {@code pos}
  • + *
  • +1 if the last interval traversed is forward, -1 if in a negative + * direction
  • + *
+ * Returns null if {@code pos} does not lie in any of the given intervals. * - * @param shiftTo + * @param intervals + * a list of start-end intervals * @param pos - * @return number of positions or null if pos is not within intervals + * a position that may lie in one (or more) of the intervals + * @return */ - protected static int[] countPos(List shiftTo, int pos) + protected static int[] countPositions(List intervals, int pos) { - int count = 0, intv[], iv = 0, ivSize = shiftTo.size(); + int count = 0; + int iv = 0; + int ivSize = intervals.size(); + while (iv < ivSize) { - intv = shiftTo.get(iv++); + int[] intv = intervals.get(iv++); if (intv[0] <= intv[1]) { + /* + * forwards interval + */ if (pos >= intv[0] && pos <= intv[1]) { return new int[] { count + pos - intv[0] + 1, +1 }; @@ -604,6 +620,9 @@ public class MapList } else { + /* + * reverse interval + */ if (pos >= intv[1] && pos <= intv[0]) { return new int[] { count + intv[0] - pos + 1, -1 }; @@ -676,41 +695,6 @@ public class MapList } /** - * find series of intervals mapping from start-end in the From map. - * - * @param start - * position mapped 'to' - * @param end - * position mapped 'to' - * @return series of [start, end] ranges in sequence mapped 'from' - */ - public int[] locateInFrom(int start, int end) - { - // inefficient implementation - int fromStart[] = shiftTo(start); - // needs to be inclusive of end of symbol position - int fromEnd[] = shiftTo(end); - - return getIntervals(fromShifts, fromStart, fromEnd, fromRatio); - } - - /** - * find series of intervals mapping from start-end in the to map. - * - * @param start - * position mapped 'from' - * @param end - * position mapped 'from' - * @return series of [start, end] ranges in sequence mapped 'to' - */ - public int[] locateInTo(int start, int end) - { - int toStart[] = shiftFrom(start); - int toEnd[] = shiftFrom(end); - return getIntervals(toShifts, toStart, toEnd, toRatio); - } - - /** * like shift - except returns the intervals in the given vector of shifts * which were spanned in traversing fromStart to fromEnd * @@ -886,7 +870,6 @@ public class MapList */ public int getToPosition(int mpos) { - // TODO not used - remove?? int[] mp = shiftTo(mpos); if (mp != null) { @@ -896,53 +879,6 @@ public class MapList } /** - * get range of positions in To frame for the mpos word in From - * - * @param mpos - * position in From - * @return null or int[] first position in To for mpos, last position in to - * for Mpos - */ - public int[] getToWord(int mpos) - { - int[] mp = shiftTo(mpos); - if (mp != null) - { - return new int[] { mp[0], mp[0] + mp[2] * (getFromRatio() - 1) }; - } - return null; - } - - /** - * get From position in the associated reference frame for position pos in the - * associated sequence. - * - * @param pos - * @return - */ - public int getMappedPosition(int pos) - { - // TODO not used - remove?? - int[] mp = shiftFrom(pos); - if (mp != null) - { - return mp[0]; - } - return pos; - } - - public int[] getMappedWord(int pos) - { - // TODO not used - remove?? - int[] mp = shiftFrom(pos); - if (mp != null) - { - return new int[] { mp[0], mp[0] + mp[2] * (getToRatio() - 1) }; - } - return null; - } - - /** * * @return a MapList whose From range is this maplist's To Range, and vice * versa @@ -954,33 +890,6 @@ public class MapList } /** - * test for containment rather than equivalence to another mapping - * - * @param map - * to be tested for containment - * @return true if local or mapped range map contains or is contained by this - * mapping - */ - public boolean containsEither(boolean local, MapList map) - { - // TODO not used - remove? - if (local) - { - return ((getFromLowest() >= map.getFromLowest() - && getFromHighest() <= map.getFromHighest()) - || (getFromLowest() <= map.getFromLowest() - && getFromHighest() >= map.getFromHighest())); - } - else - { - return ((getToLowest() >= map.getToLowest() - && getToHighest() <= map.getToHighest()) - || (getToLowest() <= map.getToLowest() - && getToHighest() >= map.getToHighest())); - } - } - - /** * String representation - for debugging, not guaranteed not to change */ @Override @@ -1195,6 +1104,7 @@ public class MapList List toRanges = new ArrayList<>(); for (int[] range : getToRanges()) { + int fromLength = Math.abs(range[1] - range[0]) + 1; int[] transferred = map.locateInTo(range[0], range[1]); if (transferred == null || transferred.length % 2 != 0) { @@ -1205,11 +1115,21 @@ public class MapList * convert [start1, end1, start2, end2, ...] * to [[start1, end1], [start2, end2], ...] */ + int toLength = 0; for (int i = 0; i < transferred.length;) { toRanges.add(new int[] { transferred[i], transferred[i + 1] }); + toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1; i += 2; } + + /* + * check we mapped the full range - if not, abort + */ + if (fromLength * map.getToRatio() != toLength * map.getFromRatio()) + { + return null; + } } return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio); @@ -1225,4 +1145,315 @@ public class MapList { return fromShifts.size() == 1 && toShifts.size() == 1; } + + /** + * <<<<<<< HEAD Returns the [start1, end1, start2, end2, ...] positions in the + * 'from' range that map to positions between {@code start} and {@code end} in + * the 'to' range. Note that for a reverse strand mapping this will return + * ranges with end < start. Returns null if no mapped positions are found in + * start-end. + * + * @param start + * @param end + * @return + */ + public int[] locateInFrom(int start, int end) + { + return mapPositions(start, end, toShifts, fromShifts, toRatio, + fromRatio); + } + + /** + * Returns the [start1, end1, start2, end2, ...] positions in the 'to' range + * that map to positions between {@code start} and {@code end} in the 'from' + * range. Note that for a reverse strand mapping this will return ranges with + * end < start. Returns null if no mapped positions are found in start-end. + * + * @param start + * @param end + * @return + */ + public int[] locateInTo(int start, int end) + { + return mapPositions(start, end, fromShifts, toShifts, fromRatio, + toRatio); + } + + /** + * Helper method that returns the [start1, end1, start2, end2, ...] positions + * in {@code targetRange} that map to positions between {@code start} and + * {@code end} in {@code sourceRange}. Note that for a reverse strand mapping + * this will return ranges with end < start. Returns null if no mapped + * positions are found in start-end. + * + * @param start + * @param end + * @param sourceRange + * @param targetRange + * @param sourceWordLength + * @param targetWordLength + * @return + */ + final static int[] mapPositions(int start, int end, + List sourceRange, List targetRange, + int sourceWordLength, int targetWordLength) + { + if (end < start) + { + int tmp = end; + end = start; + start = tmp; + } + + /* + * traverse sourceRange and mark offsets in targetRange + * of any positions that lie in [start, end] + */ + BitSet offsets = getMappedOffsetsForPositions(start, end, sourceRange, + sourceWordLength, targetWordLength); + + /* + * traverse targetRange and collect positions at the marked offsets + */ + List mapped = getPositionsForOffsets(targetRange, offsets); + + // TODO: or just return the List and adjust calling code to match + return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped); + } + + /** + * Scans the list of {@code ranges} for any values (positions) that lie + * between start and end (inclusive), and records the offsets from + * the start of the list as a BitSet. The offset positions are converted to + * corresponding words in blocks of {@code wordLength2}. + * + *
+   * For example:
+   * 1:1 (e.g. gene to CDS):
+   * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1
+   *   for start = 1, end = 9, returns a BitSet with no bits set
+   *   for start = 1, end = 11, returns a BitSet with bits 0-1 set
+   *   for start = 15, end = 35, returns a BitSet with bits 5-15 set
+   * 1:3 (peptide to codon):
+   * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3
+   *   for start = 9, end = 9, returns a BitSet with bits 24-26 set
+   * 3:1 (codon to peptide):
+   * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1
+   *   for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set
+   *   for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set
+   * 3:1 (circular DNA to peptide):
+   * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1
+   *   for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set
+   * 
+ * + * @param start + * @param end + * @param sourceRange + * @param sourceWordLength + * @param targetWordLength + * @return + */ + protected final static BitSet getMappedOffsetsForPositions(int start, + int end, List sourceRange, int sourceWordLength, + int targetWordLength) + { + BitSet overlaps = new BitSet(); + int offset = 0; + final int s1 = sourceRange.size(); + for (int i = 0; i < s1; i++) + { + int[] range = sourceRange.get(i); + final int offset1 = offset; + int overlapStartOffset = -1; + int overlapEndOffset = -1; + + if (range[1] >= range[0]) + { + /* + * forward direction range + */ + if (start <= range[1] && end >= range[0]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[0]); + overlapStartOffset = offset1 + overlapStart - range[0]; + int overlapEnd = Math.min(end, range[1]); + overlapEndOffset = offset1 + overlapEnd - range[0]; + } + } + else + { + /* + * reverse direction range + */ + if (start <= range[0] && end >= range[1]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[1]); + int overlapEnd = Math.min(end, range[0]); + overlapStartOffset = offset1 + range[0] - overlapEnd; + overlapEndOffset = offset1 + range[0] - overlapStart; + } + } + + if (overlapStartOffset > -1) + { + /* + * found an overlap + */ + if (sourceWordLength != targetWordLength) + { + /* + * convert any overlap found to whole words in the target range + * (e.g. treat any partial codon overlap as if the whole codon) + */ + overlapStartOffset -= overlapStartOffset % sourceWordLength; + overlapStartOffset = overlapStartOffset / sourceWordLength + * targetWordLength; + + /* + * similar calculation for range end, adding + * (wordLength2 - 1) for end of mapped word + */ + overlapEndOffset -= overlapEndOffset % sourceWordLength; + overlapEndOffset = overlapEndOffset / sourceWordLength + * targetWordLength; + overlapEndOffset += targetWordLength - 1; + } + overlaps.set(overlapStartOffset, overlapEndOffset + 1); + } + offset += 1 + Math.abs(range[1] - range[0]); + } + return overlaps; + } + + /** + * Returns a (possibly empty) list of the [start-end] values (positions) at + * offsets in the {@code targetRange} list that are marked by 'on' bits in the + * {@code offsets} bitset. + * + * @param targetRange + * @param offsets + * @return + */ + protected final static List getPositionsForOffsets( + List targetRange, BitSet offsets) + { + List mapped = new ArrayList<>(); + if (offsets.isEmpty()) + { + return mapped; + } + + /* + * count of positions preceding ranges[i] + */ + int traversed = 0; + + /* + * for each [from-to] range in ranges: + * - find subranges (if any) at marked offsets + * - add the start-end values at the marked positions + */ + final int toAdd = offsets.cardinality(); + int added = 0; + final int s2 = targetRange.size(); + for (int i = 0; added < toAdd && i < s2; i++) + { + int[] range = targetRange.get(i); + added += addOffsetPositions(mapped, traversed, range, offsets); + traversed += Math.abs(range[1] - range[0]) + 1; + } + return mapped; + } + + /** + * Helper method that adds any start-end subranges of {@code range} that are + * at offsets in {@code range} marked by set bits in overlaps. + * {@code mapOffset} is added to {@code range} offset positions. Returns the + * count of positions added. + * + * @param mapped + * @param mapOffset + * @param range + * @param overlaps + * @return + */ + final static int addOffsetPositions(List mapped, + final int mapOffset, final int[] range, final BitSet overlaps) + { + final int rangeLength = 1 + Math.abs(range[1] - range[0]); + final int step = range[1] < range[0] ? -1 : 1; + int offsetStart = 0; // offset into range + int added = 0; + + while (offsetStart < rangeLength) + { + /* + * find the start of the next marked overlap offset; + * if there is none, or it is beyond range, then finished + */ + int overlapStart = overlaps.nextSetBit(mapOffset + offsetStart); + if (overlapStart == -1 || overlapStart - mapOffset >= rangeLength) + { + /* + * no more overlaps, or no more within range[] + */ + return added; + } + overlapStart -= mapOffset; + + /* + * end of the overlap range is just before the next clear bit; + * restrict it to end of range if necessary; + * note we may add a reverse strand range here (end < start) + */ + int overlapEnd = overlaps.nextClearBit(mapOffset + overlapStart + 1); + overlapEnd = (overlapEnd == -1) ? rangeLength - 1 + : Math.min(rangeLength - 1, overlapEnd - mapOffset - 1); + int startPosition = range[0] + step * overlapStart; + int endPosition = range[0] + step * overlapEnd; + mapped.add(new int[] { startPosition, endPosition }); + offsetStart = overlapEnd + 1; + added += Math.abs(endPosition - startPosition) + 1; + } + + return added; + } + + /* + * Returns the [start, end...] positions in the range mapped from, that are + * mapped to by part or all of the given begin-end of the range mapped to. + * Returns null if begin-end does not overlap any position mapped to. + * + * @param begin + * @param end + * @return + */ + public int[] getOverlapsInFrom(final int begin, final int end) + { + int[] overlaps = MappingUtils.findOverlap(toShifts, begin, end); + + return overlaps == null ? null : locateInFrom(overlaps[0], overlaps[1]); + } + + /** + * Returns the [start, end...] positions in the range mapped to, that are + * mapped to by part or all of the given begin-end of the range mapped from. + * Returns null if begin-end does not overlap any position mapped from. + * + * @param begin + * @param end + * @return + */ + public int[] getOverlapsInTo(final int begin, final int end) + { + int[] overlaps = MappingUtils.findOverlap(fromShifts, begin, end); + + return overlaps == null ? null : locateInTo(overlaps[0], overlaps[1]); + } }