X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FMapList.java;h=0d71bb49b360993afd3ddc21a0ab58791c693ee3;hb=d89bd711a41eb3c1cd66d01c0067e1ddb5261ef9;hp=3ce0bb3047b15540264f4130fbe60d9b7957338e;hpb=a738a37240b13fa22aa7a3333c0a58c71759ab22;p=jalview.git diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 3ce0bb3..0d71bb4 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -77,8 +77,8 @@ public class MapList */ public MapList() { - fromShifts = new ArrayList(); - toShifts = new ArrayList(); + fromShifts = new ArrayList<>(); + toShifts = new ArrayList<>(); } /** @@ -116,8 +116,17 @@ public class MapList { int hashCode = 31 * fromRatio; hashCode = 31 * hashCode + toRatio; - hashCode = 31 * hashCode + fromShifts.toArray().hashCode(); - hashCode = 31 * hashCode + toShifts.toArray().hashCode(); + for (int[] shift : fromShifts) + { + hashCode = 31 * hashCode + shift[0]; + hashCode = 31 * hashCode + shift[1]; + } + for (int[] shift : toShifts) + { + hashCode = 31 * hashCode + shift[0]; + hashCode = 31 * hashCode + shift[1]; + } + return hashCode; } @@ -200,8 +209,7 @@ public class MapList /** * Constructor given from and to ranges as [start1, end1, start2, end2,...]. - * If any end is equal to the next start, the ranges will be merged. There is - * no validation check that the ranges do not overlap each other. + * There is no validation check that the ranges do not overlap each other. * * @param from * contiguous regions as [start1, end1, start2, end2, ...] @@ -219,7 +227,6 @@ public class MapList this.toRatio = toRatio; fromLowest = Integer.MAX_VALUE; fromHighest = Integer.MIN_VALUE; - int added = 0; for (int i = 0; i < from.length; i += 2) { @@ -229,36 +236,16 @@ public class MapList */ fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1])); fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1])); - if (added > 0 && from[i] == fromShifts.get(added - 1)[1]) - { - /* - * this range starts where the last ended - just extend it - */ - fromShifts.get(added - 1)[1] = from[i + 1]; - } - else - { - fromShifts.add(new int[] { from[i], from[i + 1] }); - added++; - } + fromShifts.add(new int[] { from[i], from[i + 1] }); } toLowest = Integer.MAX_VALUE; toHighest = Integer.MIN_VALUE; - added = 0; for (int i = 0; i < to.length; i += 2) { toLowest = Math.min(toLowest, Math.min(to[i], to[i + 1])); toHighest = Math.max(toHighest, Math.max(to[i], to[i + 1])); - if (added > 0 && to[i] == toShifts.get(added - 1)[1]) - { - toShifts.get(added - 1)[1] = to[i + 1]; - } - else - { - toShifts.add(new int[] { to[i], to[i + 1] }); - added++; - } + toShifts.add(new int[] { to[i], to[i + 1] }); } } @@ -318,6 +305,12 @@ public class MapList fromHighest = Integer.MIN_VALUE; for (int[] range : fromRange) { + if (range.length != 2) + { + // throw new IllegalArgumentException(range); + System.err.println("Invalid format for fromRange " + + Arrays.toString(range) + " may cause errors"); + } fromLowest = Math.min(fromLowest, Math.min(range[0], range[1])); fromHighest = Math.max(fromHighest, Math.max(range[0], range[1])); } @@ -326,6 +319,12 @@ public class MapList toHighest = Integer.MIN_VALUE; for (int[] range : toRange) { + if (range.length != 2) + { + // throw new IllegalArgumentException(range); + System.err.println("Invalid format for toRange " + + Arrays.toString(range) + " may cause errors"); + } toLowest = Math.min(toLowest, Math.min(range[0], range[1])); toHighest = Math.max(toHighest, Math.max(range[0], range[1])); } @@ -334,6 +333,17 @@ public class MapList /** * Consolidates a list of ranges so that any contiguous ranges are merged. * This assumes the ranges are already in start order (does not sort them). + *

+ * The main use case for this method is when mapping cDNA sequence to its + * protein product, based on CDS feature ranges which derive from spliced + * exons, but are contiguous on the cDNA sequence. For example + * + *

+   *   CDS 1-20  // from exon1
+   *   CDS 21-35 // from exon2
+   *   CDS 36-71 // from exon3
+   * 'coalesce' to range 1-71
+   * 
* * @param ranges * @return the same list (if unchanged), else a new merged list, leaving the @@ -347,7 +357,7 @@ public class MapList } boolean changed = false; - List merged = new ArrayList(); + List merged = new ArrayList<>(); int[] lastRange = ranges.get(0); int lastDirection = lastRange[1] >= lastRange[0] ? 1 : -1; lastRange = new int[] { lastRange[0], lastRange[1] }; @@ -361,27 +371,6 @@ public class MapList first = false; continue; } - if (range[0] == lastRange[0] && range[1] == lastRange[1]) - { - // drop duplicate range - changed = true; - continue; - } - - /* - * drop this range if it lies within the last range - */ - if ((lastDirection == 1 && range[0] >= lastRange[0] - && range[0] <= lastRange[1] && range[1] >= lastRange[0] - && range[1] <= lastRange[1]) - || (lastDirection == -1 && range[0] <= lastRange[0] - && range[0] >= lastRange[1] - && range[1] <= lastRange[0] - && range[1] >= lastRange[1])) - { - changed = true; - continue; - } int direction = range[1] >= range[0] ? 1 : -1; @@ -392,11 +381,7 @@ public class MapList boolean sameDirection = range[1] == range[0] || direction == lastDirection; boolean extending = range[0] == lastRange[1] + lastDirection; - boolean overlapping = (lastDirection == 1 && range[0] >= lastRange[0] - && range[0] <= lastRange[1]) - || (lastDirection == -1 && range[0] <= lastRange[0] - && range[0] >= lastRange[1]); - if (sameDirection && (overlapping || extending)) + if (sameDirection && extending) { lastRange[1] = range[1]; changed = true; @@ -577,37 +562,51 @@ public class MapList List shiftFrom, int toRatio) { // TODO: javadoc; tests - int[] fromCount = countPos(shiftTo, pos); + int[] fromCount = countPositions(shiftTo, pos); if (fromCount == null) { return null; } int fromRemainder = (fromCount[0] - 1) % fromRatio; int toCount = 1 + (((fromCount[0] - 1) / fromRatio) * toRatio); - int[] toPos = countToPos(shiftFrom, toCount); + int[] toPos = traverseToPosition(shiftFrom, toCount); if (toPos == null) { - return null; // throw new Error("Bad Mapping!"); + return null; } - // System.out.println(fromCount[0]+" "+fromCount[1]+" "+toCount); return new int[] { toPos[0], fromRemainder, toPos[1] }; } /** - * count how many positions pos is along the series of intervals. + * Counts how many positions pos is along the series of intervals. Returns an + * array of two values: + *
    + *
  • the number of positions traversed (inclusive) to reach {@code pos}
  • + *
  • +1 if the last interval traversed is forward, -1 if in a negative + * direction
  • + *
+ * Returns null if {@code pos} does not lie in any of the given intervals. * - * @param shiftTo + * @param intervals + * a list of start-end intervals * @param pos - * @return number of positions or null if pos is not within intervals + * a position that may lie in one (or more) of the intervals + * @return */ - protected static int[] countPos(List shiftTo, int pos) + protected static int[] countPositions(List intervals, int pos) { - int count = 0, intv[], iv = 0, ivSize = shiftTo.size(); + int count = 0; + int iv = 0; + int ivSize = intervals.size(); + while (iv < ivSize) { - intv = shiftTo.get(iv++); + int[] intv = intervals.get(iv++); if (intv[0] <= intv[1]) { + /* + * forwards interval + */ if (pos >= intv[0] && pos <= intv[1]) { return new int[] { count + pos - intv[0] + 1, +1 }; @@ -619,6 +618,9 @@ public class MapList } else { + /* + * reverse interval + */ if (pos >= intv[1] && pos <= intv[0]) { return new int[] { count + intv[0] - pos + 1, -1 }; @@ -633,44 +635,61 @@ public class MapList } /** - * count out pos positions into a series of intervals and return the position + * Reads through the given intervals until {@code count} positions have been + * traversed, and returns an array consisting of two values: + *
    + *
  • the value at the {@code count'th} position
  • + *
  • +1 if the last interval read is forwards, -1 if reverse direction
  • + *
+ * Returns null if the ranges include less than {@code count} positions, or if + * {@code count < 1}. * - * @param shiftFrom - * @param pos - * @return position pos in interval set + * @param intervals + * a list of [start, end] ranges + * @param count + * the number of positions to traverse + * @return */ - protected static int[] countToPos(List shiftFrom, int pos) + protected static int[] traverseToPosition(List intervals, + final int count) { - int count = 0, diff = 0, iv = 0, ivSize = shiftFrom.size(); - int[] intv = { 0, 0 }; + int traversed = 0; + int ivSize = intervals.size(); + int iv = 0; + + if (count < 1) + { + return null; + } + while (iv < ivSize) { - intv = shiftFrom.get(iv++); - diff = intv[1] - intv[0]; + int[] intv = intervals.get(iv++); + int diff = intv[1] - intv[0]; if (diff >= 0) { - if (pos <= count + 1 + diff) + if (count <= traversed + 1 + diff) { - return new int[] { pos - count - 1 + intv[0], +1 }; + return new int[] { intv[0] + (count - traversed - 1), +1 }; } else { - count += 1 + diff; + traversed += 1 + diff; } } else { - if (pos <= count + 1 - diff) + if (count <= traversed + 1 - diff) { - return new int[] { intv[0] - (pos - count - 1), -1 }; + return new int[] { intv[0] - (count - traversed - 1), -1 }; } else { - count += 1 - diff; + traversed += 1 - diff; } } } - return null;// (diff<0) ? (intv[1]-1) : (intv[0]+1); + return null; } /** @@ -684,12 +703,13 @@ public class MapList */ public int[] locateInFrom(int start, int end) { + return locateInFrom2(start, end); + // inefficient implementation - int fromStart[] = shiftTo(start); + // int fromStart[] = shiftTo(start); // needs to be inclusive of end of symbol position - int fromEnd[] = shiftTo(end); - - return getIntervals(fromShifts, fromStart, fromEnd, fromRatio); + // int fromEnd[] = shiftTo(end); + // return getIntervals(fromShifts, fromStart, fromEnd, fromRatio); } /** @@ -703,9 +723,11 @@ public class MapList */ public int[] locateInTo(int start, int end) { - int toStart[] = shiftFrom(start); - int toEnd[] = shiftFrom(end); - return getIntervals(toShifts, toStart, toEnd, toRatio); + return locateInTo2(start, end); + + // int toStart[] = shiftFrom(start); + // int toEnd[] = shiftFrom(end); + // return getIntervals(toShifts, toStart, toEnd, toRatio); } /** @@ -803,7 +825,7 @@ public class MapList { return null; } - List ranges = new ArrayList(); + List ranges = new ArrayList<>(); if (fs <= fe) { intv = fs; @@ -1094,8 +1116,33 @@ public class MapList */ public boolean isFromForwardStrand() { + return isForwardStrand(getFromRanges()); + } + + /** + * Returns true if mapping is to forward strand, false if to reverse strand. + * Result is just based on the first 'to' range that is not a single position. + * Default is true unless proven to be false. Behaviour is not well defined if + * the mapping has a mixture of forward and reverse ranges. + * + * @return + */ + public boolean isToForwardStrand() + { + return isForwardStrand(getToRanges()); + } + + /** + * A helper method that returns true unless at least one range has start > + * end. Behaviour is undefined for a mixture of forward and reverse ranges. + * + * @param ranges + * @return + */ + private boolean isForwardStrand(List ranges) + { boolean forwardStrand = true; - for (int[] range : getFromRanges()) + for (int[] range : ranges) { if (range[1] > range[0]) { @@ -1168,15 +1215,323 @@ public class MapList List toRanges = new ArrayList<>(); for (int[] range : getToRanges()) { + int fromLength = Math.abs(range[1] - range[0]) + 1; int[] transferred = map.locateInTo(range[0], range[1]); - if (transferred == null) + if (transferred == null || transferred.length % 2 != 0) + { + return null; + } + + /* + * convert [start1, end1, start2, end2, ...] + * to [[start1, end1], [start2, end2], ...] + */ + int toLength = 0; + for (int i = 0; i < transferred.length;) + { + toRanges.add(new int[] { transferred[i], transferred[i + 1] }); + toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1; + i += 2; + } + + /* + * check we mapped the full range - if not, abort + */ + if (fromLength * map.getToRatio() != toLength * map.getFromRatio()) { return null; } - toRanges.add(transferred); } return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio); } + /** + * Answers true if the mapping is from one contiguous range to another, else + * false + * + * @return + */ + public boolean isContiguous() + { + return fromShifts.size() == 1 && toShifts.size() == 1; + } + + /** + * Returns the [start, end, start, end, ...] ranges in the 'from' range that + * map to positions between {@code start} and {@code end} in the 'to' range. + * Returns null if no mapped positions are found in start-end. + * + * @param start + * @param end + * @return + */ + public int[] locateInFrom2(int start, int end) + { + List ranges = mapBetween(start, end, toShifts, fromShifts, + toRatio, fromRatio); + + // TODO: or just return the List and adjust calling code to match + return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges); + } + + /** + * Returns the [start, end, start, end, ...] ranges in the 'to' range that map + * to the given start-end in the 'from' range. Returns null if either + * {@code start} or {@code end} is not a mapped 'from' range position. + * + * @param start + * @param end + * @return + */ + public int[] locateInTo2(int start, int end) + { + List ranges = mapBetween(start, end, fromShifts, toShifts, + fromRatio, toRatio); + + return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges); + } + + /** + * A helper method for navigating the mapping. Returns a (possibly empty) list + * of [start-end] positions in {@code ranges2} that map to positions in + * {@code ranges1} between {@code start} and {@code end}. + * + * @param start + * @param end + * @param ranges1 + * @param ranges2 + * @param wordLength1 + * @param wordLength2 + * @return + */ + final static List mapBetween(int start, int end, + List ranges1, List ranges2, int wordLength1, + int wordLength2) + { + if (end < start) + { + int tmp = end; + end = start; + start = tmp; + } + + /* + * first traverse ranges1 and record count of mapped positions + * to any that overlap start-end + */ + List overlaps = findOverlapPositions(ranges1, start, end); + if (overlaps.isEmpty()) + { + return overlaps; + } + + /* + * convert positions to equivalent 'word' positions in ranges2 + */ + mapWords(overlaps, wordLength1, wordLength2); + + /* + * walk ranges2 and record the values found at + * the offsets in 'overlaps' + */ + List mapped = new ArrayList<>(); + final int s1 = overlaps.size(); + final int s2 = ranges2.size(); + int ranges2Index = 0; + + /* + * count of mapped positions preceding ranges2[ranges2Index] + */ + int traversed = 0; + + /* + * for each [from-to] range in overlaps: + * - walk (what remains of) ranges2 + * - record the values at offsets [from-to] + * - stop when past 'to' positions (or at end of ranges2) + */ + for (int i = 0; i < s1; i++) + { + int[] overlap = overlaps.get(i); + final int toAdd = overlap[1] - overlap[0] + 1; + int added = 0; // how much of overlap has been 'found' + for (; added < toAdd && ranges2Index < s2; ranges2Index++) + { + int[] range2 = ranges2.get(ranges2Index); + int rangeStart = range2[0]; + int rangeEnd = range2[1]; + boolean reverseStrand = range2[1] < range2[0]; + int rangeLength = Math.abs(rangeEnd - rangeStart) + 1; + if (traversed + rangeLength <= overlap[0]) + { + /* + * precedes overlap - keep looking + */ + traversed += rangeLength; + continue; + } + int overlapStart = overlap[0] - traversed; + int overlapEnd = Math.min(overlapStart + toAdd - added - 1, + rangeLength - 1); + int mappedFrom = range2[0] + (reverseStrand ? - overlapStart : overlapStart); + int mappedTo = range2[0] + (reverseStrand ? - overlapEnd : overlapEnd); + mapped.add(new int[] { mappedFrom, mappedTo }); + int found = overlapEnd - overlapStart + 1; + added += found; + overlap[0] += found; + traversed += rangeLength; + } + } + + return mapped; + } + + /** + * Converts the start-end positions (counted from zero) in the {@code ranges} + * list from one word length to another. Start-end positions are expanded if + * necessary to cover a whole word of length {@code wordLength1}. Positions + * are then divided by {@code wordLength1} and multiplied by + * {@code wordLength2} to give equivalent mapped words. + *

+ * Put simply, this converts peptide residue positions to the corresponding + * codon ranges, and codons - including partial codons - to the corresponding + * peptide positions; for example + * + *

+   * [1, 10] with word lengths 3:1 converts (as if bases [0-11]) to [1, 4]
+   * 
+ * + * @param ranges + * @param wordLength1 + * @param wordLength2 + * @return + */ + final static void mapWords(List ranges, int wordLength1, + int wordLength2) + { + if (wordLength1 == 1 && wordLength2 == 1) + { + return; // nothing to do here + } + int s = ranges.size(); + for (int i = 0; i < s; i++) + { + int[] range = ranges.get(i); + + /* + * expand range start to the start of a word, + * and convert to wordLength2 + */ + range[0] -= range[0] % wordLength1; + range[0] = range[0] / wordLength1 * wordLength2; + + /* + * similar calculation for range end, adding + * (wordLength2 - 1) for end of mapped word + */ + range[1] -= range[1] % wordLength1; + range[1] = range[1] / wordLength1 * wordLength2; + range[1] += wordLength2 - 1; + } + } + + /** + * Helper method that returns a (possibly empty) list of offsets in + * {@code ranges} to subranges that overlap {@code start-end} (where start <= + * end}. The list returned holds counts of the number of positions traversed + * (exclusive) to reach the overlapping positions, not the overlapping values. + * Returns null if there are no overlaps. + * + * @param ranges + * @param start + * @param end + * @return + */ + final static List findOverlapPositions(List ranges, + int start, int end) + { + List positions = new ArrayList<>(); + int pos = 0; + int s = ranges.size(); + for (int i = 0; i < s; i++) + { + int[] range = ranges.get(i); + addOverlap(positions, pos, range, start, end); + pos += 1 + Math.abs(range[1] - range[0]); + } + return positions; + } + + /** + * A helper method that checks whether {@code range} overlaps + * {@code start-end}, and if so adds the offset of the overlap in + * {@code range}, plus {@code pos}, to {@code positions}. + * + * @param positions + * a list of map offsets to add to + * @param pos + * the number of mapped positions already visited + * @param range + * a from-to range (may be forward or reverse) + * @param start + * position to test for overlap in range + * @param end + * position to test for overlap in range + * @return + */ + final static void addOverlap(List positions, int pos, int[] range, + int start, int end) + { + if (range[1] >= range[0]) + { + /* + * forward direction range + */ + if (start <= range[1] && end >= range[0]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[0]); + int overlapStartOffset = pos + overlapStart - range[0]; + int overlapEnd = Math.min(end, range[1]); + int overlapEndOffset = pos + overlapEnd - range[0]; + int[] lastOverlap = positions.isEmpty() ? null + : positions.get(positions.size() - 1); + if (lastOverlap != null && overlapStartOffset == lastOverlap[1] + 1) + { + /* + * just extending the last overlap range + */ + lastOverlap[1] = overlapEndOffset; + } + else + { + /* + * add a new (discontiguous) overlap range + */ + positions.add(new int[] { overlapStartOffset, overlapEndOffset }); + } + } + } + else + { + /* + * reverse direction range + */ + if (start <= range[0] && end >= range[1]) + { + /* + * overlap + */ + int overlapStart = Math.max(start, range[1]); + int overlapEnd = Math.min(end, range[0]); + positions + .add(new int[] + { pos + range[0] - overlapEnd, + pos + range[0] - overlapStart }); + } + } + } }