+ }
+ }
+ i++;
+ }
+ if (fs == fe && fe == -1)
+ {
+ return null;
+ }
+ List<int[]> ranges = new ArrayList<>();
+ if (fs <= fe)
+ {
+ intv = fs;
+ i = fs;
+ // truncate initial interval
+ iv = shiftFrom.get(intv++);
+ iv = new int[] { iv[0], iv[1] };// clone
+ if (i == fs)
+ {
+ iv[0] = startpos;
+ }
+ while (i != fe)
+ {
+ ranges.add(iv); // add initial range
+ iv = shiftFrom.get(intv++); // get next interval
+ iv = new int[] { iv[0], iv[1] };// clone
+ i++;
+ }
+ if (i == fe)
+ {
+ iv[1] = endpos;
+ }
+ ranges.add(iv); // add only - or final range
+ }
+ else
+ {
+ // walk from end of interval.
+ i = shiftFrom.size() - 1;
+ while (i > fs)
+ {
+ i--;
+ }
+ iv = shiftFrom.get(i);
+ iv = new int[] { iv[1], iv[0] };// reverse and clone
+ // truncate initial interval
+ if (i == fs)
+ {
+ iv[0] = startpos;
+ }
+ while (--i != fe)
+ { // fix apparent logic bug when fe==-1
+ ranges.add(iv); // add (truncated) reversed interval
+ iv = shiftFrom.get(i);
+ iv = new int[] { iv[1], iv[0] }; // reverse and clone
+ }
+ if (i == fe)
+ {
+ // interval is already reversed
+ iv[1] = endpos;
+ }
+ ranges.add(iv); // add only - or final range
+ }
+ // create array of start end intervals.
+ int[] range = null;
+ if (ranges != null && ranges.size() > 0)
+ {
+ range = new int[ranges.size() * 2];
+ intv = 0;
+ intvSize = ranges.size();
+ i = 0;
+ while (intv < intvSize)
+ {
+ iv = ranges.get(intv);
+ range[i++] = iv[0];
+ range[i++] = iv[1];
+ ranges.set(intv++, null); // remove
+ }
+ }
+ return range;
+ }
+
+ /**
+ * get the 'initial' position of mpos in To
+ *
+ * @param mpos
+ * position in from
+ * @return position of first word in to reference frame
+ */
+ public int getToPosition(int mpos)
+ {
+ int[] mp = shiftTo(mpos);
+ if (mp != null)
+ {
+ return mp[0];
+ }
+ return mpos;
+ }
+
+ /**
+ *
+ * @return a MapList whose From range is this maplist's To Range, and vice
+ * versa
+ */
+ public MapList getInverse()
+ {
+ return new MapList(getToRanges(), getFromRanges(), getToRatio(),
+ getFromRatio());
+ }
+
+ /**
+ * String representation - for debugging, not guaranteed not to change
+ */
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder(64);
+ sb.append("[");
+ for (int[] shift : fromShifts)
+ {
+ sb.append(" ").append(Arrays.toString(shift));
+ }
+ sb.append(" ] ");
+ sb.append(fromRatio).append(":").append(toRatio);
+ sb.append(" to [");
+ for (int[] shift : toShifts)
+ {
+ sb.append(" ").append(Arrays.toString(shift));
+ }
+ sb.append(" ]");
+ return sb.toString();
+ }
+
+ /**
+ * Extend this map list by adding the given map's ranges. There is no
+ * validation check that the ranges do not overlap existing ranges (or each
+ * other), but contiguous ranges are merged.
+ *
+ * @param map
+ */
+ public void addMapList(MapList map)
+ {
+ if (this.equals(map))
+ {
+ return;
+ }
+ this.fromLowest = Math.min(fromLowest, map.fromLowest);
+ this.toLowest = Math.min(toLowest, map.toLowest);
+ this.fromHighest = Math.max(fromHighest, map.fromHighest);
+ this.toHighest = Math.max(toHighest, map.toHighest);
+
+ for (int[] range : map.getFromRanges())
+ {
+ MappingUtils.addRange(range, fromShifts);
+ }
+ for (int[] range : map.getToRanges())
+ {
+ MappingUtils.addRange(range, toShifts);
+ }
+ }
+
+ /**
+ * Returns true if mapping is from forward strand, false if from reverse
+ * strand. Result is just based on the first 'from' range that is not a single
+ * position. Default is true unless proven to be false. Behaviour is not well
+ * defined if the mapping has a mixture of forward and reverse ranges.
+ *
+ * @return
+ */
+ public boolean isFromForwardStrand()
+ {
+ return isForwardStrand(getFromRanges());
+ }
+
+ /**
+ * Returns true if mapping is to forward strand, false if to reverse strand.
+ * Result is just based on the first 'to' range that is not a single position.
+ * Default is true unless proven to be false. Behaviour is not well defined if
+ * the mapping has a mixture of forward and reverse ranges.
+ *
+ * @return
+ */
+ public boolean isToForwardStrand()
+ {
+ return isForwardStrand(getToRanges());
+ }
+
+ /**
+ * A helper method that returns true unless at least one range has start >
+ * end. Behaviour is undefined for a mixture of forward and reverse ranges.
+ *
+ * @param ranges
+ * @return
+ */
+ private boolean isForwardStrand(List<int[]> ranges)
+ {
+ boolean forwardStrand = true;
+ for (int[] range : ranges)
+ {
+ if (range[1] > range[0])
+ {
+ break; // forward strand confirmed
+ }
+ else if (range[1] < range[0])
+ {
+ forwardStrand = false;
+ break; // reverse strand confirmed
+ }
+ }
+ return forwardStrand;
+ }
+
+ /**
+ *
+ * @return true if from, or to is a three to 1 mapping
+ */
+ public boolean isTripletMap()
+ {
+ return (toRatio == 3 && fromRatio == 1)
+ || (fromRatio == 3 && toRatio == 1);
+ }
+
+ /**
+ * Returns a map which is the composite of this one and the input map. That
+ * is, the output map has the fromRanges of this map, and its toRanges are the
+ * toRanges of this map as transformed by the input map.
+ * <p>
+ * Returns null if the mappings cannot be traversed (not all toRanges of this
+ * map correspond to fromRanges of the input), or if this.toRatio does not
+ * match map.fromRatio.
+ *
+ * <pre>
+ * Example 1:
+ * this: from [1-100] to [501-600]
+ * input: from [10-40] to [60-90]
+ * output: from [10-40] to [560-590]
+ * Example 2 ('reverse strand exons'):
+ * this: from [1-100] to [2000-1951], [1000-951] // transcript to loci
+ * input: from [1-50] to [41-90] // CDS to transcript
+ * output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci
+ * </pre>
+ *
+ * @param map
+ * @return
+ */
+ public MapList traverse(MapList map)
+ {
+ if (map == null)
+ {
+ return null;
+ }
+
+ /*
+ * compound the ratios by this rule:
+ * A:B with M:N gives A*M:B*N
+ * reduced by greatest common divisor
+ * so 1:3 with 3:3 is 3:9 or 1:3
+ * 1:3 with 3:1 is 3:3 or 1:1
+ * 1:3 with 1:3 is 1:9
+ * 2:5 with 3:7 is 6:35
+ */
+ int outFromRatio = getFromRatio() * map.getFromRatio();
+ int outToRatio = getToRatio() * map.getToRatio();
+ int gcd = MathUtils.gcd(outFromRatio, outToRatio);
+ outFromRatio /= gcd;
+ outToRatio /= gcd;
+
+ List<int[]> toRanges = new ArrayList<>();
+ for (int[] range : getToRanges())
+ {
+ int fromLength = Math.abs(range[1] - range[0]) + 1;
+ int[] transferred = map.locateInTo(range[0], range[1]);
+ if (transferred == null || transferred.length % 2 != 0)
+ {
+ return null;
+ }
+
+ /*
+ * convert [start1, end1, start2, end2, ...]
+ * to [[start1, end1], [start2, end2], ...]
+ */
+ int toLength = 0;
+ for (int i = 0; i < transferred.length;)
+ {
+ toRanges.add(new int[] { transferred[i], transferred[i + 1] });
+ toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1;
+ i += 2;
+ }
+
+ /*
+ * check we mapped the full range - if not, abort
+ */
+ if (fromLength * map.getToRatio() != toLength * map.getFromRatio())
+ {
+ return null;
+ }
+ }
+
+ return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio);
+ }
+
+ /**
+ * Answers true if the mapping is from one contiguous range to another, else
+ * false
+ *
+ * @return
+ */
+ public boolean isContiguous()
+ {
+ return fromShifts.size() == 1 && toShifts.size() == 1;
+ }
+
+ /**
+ * Returns the [start1, end1, start2, end2, ...] positions in the 'from' range
+ * that map to positions between {@code start} and {@code end} in the 'to'
+ * range. Note that for a reverse strand mapping this will return ranges with
+ * end < start. Returns null if no mapped positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInFrom(int start, int end)
+ {
+ return mapPositions(start, end, toShifts, fromShifts,
+ toRatio, fromRatio);
+ }
+
+ /**
+ * Returns the [start1, end1, start2, end2, ...] positions in the 'to' range
+ * that map to positions between {@code start} and {@code end} in the 'from'
+ * range. Note that for a reverse strand mapping this will return ranges with
+ * end < start. Returns null if no mapped positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInTo(int start, int end)
+ {
+ return mapPositions(start, end, fromShifts, toShifts,
+ fromRatio, toRatio);
+ }
+
+ /**
+ * Helper method that returns the [start1, end1, start2, end2, ...] positions
+ * in {@code targetRange} that map to positions between {@code start} and
+ * {@code end} in {@code sourceRange}. Note that for a reverse strand mapping
+ * this will return ranges with end < start. Returns null if no mapped
+ * positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @param sourceRange
+ * @param targetRange
+ * @param sourceWordLength
+ * @param targetWordLength
+ * @return
+ */
+ final static int[] mapPositions(int start, int end,
+ List<int[]> sourceRange, List<int[]> targetRange,
+ int sourceWordLength, int targetWordLength)
+ {
+ if (end < start)
+ {
+ int tmp = end;
+ end = start;
+ start = tmp;
+ }
+
+ /*
+ * traverse sourceRange and mark offsets in targetRange
+ * of any positions that lie in [start, end]
+ */
+ BitSet offsets = getMappedOffsetsForPositions(start, end, sourceRange,
+ sourceWordLength, targetWordLength);
+
+ /*
+ * traverse targetRange and collect positions at the marked offsets
+ */
+ List<int[]> mapped = getPositionsForOffsets(targetRange, offsets);
+
+ // TODO: or just return the List and adjust calling code to match
+ return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped);
+ }
+
+ /**
+ * Scans the list of {@code ranges} for any values (positions) that lie
+ * between start and end (inclusive), and records the <em>offsets</em> from
+ * the start of the list as a BitSet. The offset positions are converted to
+ * corresponding words in blocks of {@code wordLength2}.
+ *
+ * <pre>
+ * For example:
+ * 1:1 (e.g. gene to CDS):
+ * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1
+ * for start = 1, end = 9, returns a BitSet with no bits set
+ * for start = 1, end = 11, returns a BitSet with bits 0-1 set
+ * for start = 15, end = 35, returns a BitSet with bits 5-15 set
+ * 1:3 (peptide to codon):
+ * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3
+ * for start = 9, end = 9, returns a BitSet with bits 24-26 set
+ * 3:1 (codon to peptide):
+ * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set
+ * for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set
+ * 3:1 (circular DNA to peptide):
+ * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set
+ * </pre>
+ *
+ * @param start
+ * @param end
+ * @param sourceRange
+ * @param sourceWordLength
+ * @param targetWordLength
+ * @return
+ */
+ protected final static BitSet getMappedOffsetsForPositions(int start,
+ int end, List<int[]> sourceRange, int sourceWordLength, int targetWordLength)
+ {
+ BitSet overlaps = new BitSet();
+ int offset = 0;
+ final int s1 = sourceRange.size();
+ for (int i = 0; i < s1; i++)
+ {
+ int[] range = sourceRange.get(i);
+ final int offset1 = offset;
+ int overlapStartOffset = -1;
+ int overlapEndOffset = -1;
+
+ if (range[1] >= range[0])
+ {
+ /*
+ * forward direction range
+ */
+ if (start <= range[1] && end >= range[0])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[0]);
+ overlapStartOffset = offset1 + overlapStart - range[0];
+ int overlapEnd = Math.min(end, range[1]);
+ overlapEndOffset = offset1 + overlapEnd - range[0];
+ }
+ }
+ else
+ {
+ /*
+ * reverse direction range
+ */
+ if (start <= range[0] && end >= range[1])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[1]);
+ int overlapEnd = Math.min(end, range[0]);
+ overlapStartOffset = offset1 + range[0] - overlapEnd;
+ overlapEndOffset = offset1 + range[0] - overlapStart;
+ }
+ }
+
+ if (overlapStartOffset > -1)
+ {
+ /*
+ * found an overlap
+ */
+ if (sourceWordLength != targetWordLength)
+ {
+ /*
+ * convert any overlap found to whole words in the target range
+ * (e.g. treat any partial codon overlap as if the whole codon)
+ */
+ overlapStartOffset -= overlapStartOffset % sourceWordLength;
+ overlapStartOffset = overlapStartOffset / sourceWordLength
+ * targetWordLength;
+
+ /*
+ * similar calculation for range end, adding
+ * (wordLength2 - 1) for end of mapped word
+ */
+ overlapEndOffset -= overlapEndOffset % sourceWordLength;
+ overlapEndOffset = overlapEndOffset / sourceWordLength
+ * targetWordLength;
+ overlapEndOffset += targetWordLength - 1;
+ }
+ overlaps.set(overlapStartOffset, overlapEndOffset + 1);
+ }
+ offset += 1 + Math.abs(range[1] - range[0]);
+ }
+ return overlaps;
+ }
+
+ /**
+ * Returns a (possibly empty) list of the [start-end] values (positions) at
+ * offsets in the {@code targetRange} list that are marked by 'on' bits in the
+ * {@code offsets} bitset.
+ *
+ * @param targetRange
+ * @param offsets
+ * @return
+ */
+ protected final static List<int[]> getPositionsForOffsets(
+ List<int[]> targetRange, BitSet offsets)
+ {
+ List<int[]> mapped = new ArrayList<>();
+ if (offsets.isEmpty())
+ {
+ return mapped;
+ }
+
+ /*
+ * count of positions preceding ranges[i]
+ */
+ int traversed = 0;
+
+ /*
+ * for each [from-to] range in ranges:
+ * - find subranges (if any) at marked offsets
+ * - add the start-end values at the marked positions
+ */
+ final int toAdd = offsets.cardinality();
+ int added = 0;
+ final int s2 = targetRange.size();
+ for (int i = 0; added < toAdd && i < s2; i++)
+ {
+ int[] range = targetRange.get(i);
+ added += addOffsetPositions(mapped, traversed, range, offsets);
+ traversed += Math.abs(range[1] - range[0]) + 1;
+ }
+ return mapped;
+ }
+
+ /**
+ * Helper method that adds any start-end subranges of {@code range} that are
+ * at offsets in {@code range} marked by set bits in overlaps.
+ * {@code mapOffset} is added to {@code range} offset positions. Returns the
+ * count of positions added.
+ *
+ * @param mapped
+ * @param mapOffset
+ * @param range
+ * @param overlaps
+ * @return
+ */
+ final static int addOffsetPositions(List<int[]> mapped,
+ final int mapOffset, final int[] range, final BitSet overlaps)
+ {
+ final int rangeLength = 1 + Math.abs(range[1] - range[0]);
+ final int step = range[1] < range[0] ? -1 : 1;
+ int offsetStart = 0; // offset into range
+ int added = 0;
+
+ while (offsetStart < rangeLength)
+ {