mapped = getPositionsForOffsets(targetRange, offsets);
+
+ // TODO: or just return the List and adjust calling code to match
+ return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped);
+ }
+
+ /**
+ * Scans the list of {@code ranges} for any values (positions) that lie
+ * between start and end (inclusive), and records the offsets from
+ * the start of the list as a BitSet. The offset positions are converted to
+ * corresponding words in blocks of {@code wordLength2}.
+ *
+ *
+ * For example:
+ * 1:1 (e.g. gene to CDS):
+ * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1
+ * for start = 1, end = 9, returns a BitSet with no bits set
+ * for start = 1, end = 11, returns a BitSet with bits 0-1 set
+ * for start = 15, end = 35, returns a BitSet with bits 5-15 set
+ * 1:3 (peptide to codon):
+ * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3
+ * for start = 9, end = 9, returns a BitSet with bits 24-26 set
+ * 3:1 (codon to peptide):
+ * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set
+ * for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set
+ * 3:1 (circular DNA to peptide):
+ * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1
+ * for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set
+ *
+ *
+ * @param start
+ * @param end
+ * @param sourceRange
+ * @param sourceWordLength
+ * @param targetWordLength
+ * @return
+ */
+ protected final static BitSet getMappedOffsetsForPositions(int start,
+ int end, List sourceRange, int sourceWordLength,
+ int targetWordLength)
+ {
+ BitSet overlaps = new BitSet();
+ int offset = 0;
+ final int s1 = sourceRange.size();
+ for (int i = 0; i < s1; i++)
+ {
+ int[] range = sourceRange.get(i);
+ final int offset1 = offset;
+ int overlapStartOffset = -1;
+ int overlapEndOffset = -1;
+
+ if (range[1] >= range[0])
+ {
+ /*
+ * forward direction range
+ */
+ if (start <= range[1] && end >= range[0])
{
- System.out.println("No range!");
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[0]);
+ overlapStartOffset = offset1 + overlapStart - range[0];
+ int overlapEnd = Math.min(end, range[1]);
+ overlapEndOffset = offset1 + overlapEnd - range[0];
}
- System.out.print("\nReversed\n");
- rng = ml.locateInFrom(t,f);
- if (rng!=null)
+ }
+ else
+ {
+ /*
+ * reverse direction range
+ */
+ if (start <= range[0] && end >= range[1])
{
- for (int i=0; i -1)
+ {
+ /*
+ * found an overlap
+ */
+ if (sourceWordLength != targetWordLength)
{
- System.out.println("No range!");
+ /*
+ * convert any overlap found to whole words in the target range
+ * (e.g. treat any partial codon overlap as if the whole codon)
+ */
+ overlapStartOffset -= overlapStartOffset % sourceWordLength;
+ overlapStartOffset = overlapStartOffset / sourceWordLength
+ * targetWordLength;
+
+ /*
+ * similar calculation for range end, adding
+ * (wordLength2 - 1) for end of mapped word
+ */
+ overlapEndOffset -= overlapEndOffset % sourceWordLength;
+ overlapEndOffset = overlapEndOffset / sourceWordLength
+ * targetWordLength;
+ overlapEndOffset += targetWordLength - 1;
}
- System.out.print("\n");
- f++;t--;
+ overlaps.set(overlapStartOffset, overlapEndOffset + 1);
}
+ offset += 1 + Math.abs(range[1] - range[0]);
}
- System.out.print("\n");
- mmap = ml.makeToMap();
- System.out.println("ToMap : (" + mmap[0][0] + " " + mmap[0][1] + " " +
- mmap[0][2] + " " + mmap[0][3] + " ");
- for (int i = 1; i <= mmap[1].length; i++)
+ return overlaps;
+ }
+
+ /**
+ * Returns a (possibly empty) list of the [start-end] values (positions) at
+ * offsets in the {@code targetRange} list that are marked by 'on' bits in the
+ * {@code offsets} bitset.
+ *
+ * @param targetRange
+ * @param offsets
+ * @return
+ */
+ protected final static List getPositionsForOffsets(
+ List targetRange, BitSet offsets)
+ {
+ List mapped = new ArrayList<>();
+ if (offsets.isEmpty())
{
- if (mmap[1][i - 1] == -1)
- {
- System.out.print(i+"=XXX");
+ return mapped;
+ }
- }
- else
- {
- System.out.print(i+"="+(mmap[0][2]+mmap[1][i-1]));
- }
- if (i % 20==0)
- {
- System.out.print("\n");
- }
- else
- {
- System.out.print(",");
- }
+ /*
+ * count of positions preceding ranges[i]
+ */
+ int traversed = 0;
+
+ /*
+ * for each [from-to] range in ranges:
+ * - find subranges (if any) at marked offsets
+ * - add the start-end values at the marked positions
+ */
+ final int toAdd = offsets.cardinality();
+ int added = 0;
+ final int s2 = targetRange.size();
+ for (int i = 0; added < toAdd && i < s2; i++)
+ {
+ int[] range = targetRange.get(i);
+ added += addOffsetPositions(mapped, traversed, range, offsets);
+ traversed += Math.abs(range[1] - range[0]) + 1;
}
- System.out.print("\n");
- //test range function
- System.out.print("\nTest locateInTo\n");
+ return mapped;
+ }
+
+ /**
+ * Helper method that adds any start-end subranges of {@code range} that are
+ * at offsets in {@code range} marked by set bits in overlaps.
+ * {@code mapOffset} is added to {@code range} offset positions. Returns the
+ * count of positions added.
+ *
+ * @param mapped
+ * @param mapOffset
+ * @param range
+ * @param overlaps
+ * @return
+ */
+ final static int addOffsetPositions(List mapped,
+ final int mapOffset, final int[] range, final BitSet overlaps)
+ {
+ final int rangeLength = 1 + Math.abs(range[1] - range[0]);
+ final int step = range[1] < range[0] ? -1 : 1;
+ int offsetStart = 0; // offset into range
+ int added = 0;
+
+ while (offsetStart < rangeLength)
{
- int f=mmap[0][2],t=mmap[0][3];
- while (f<=t) {
- System.out.println("Range "+f+" to "+t);
- int rng[] = ml.locateInTo(f,t);
- if (rng!=null) {
- for (int i=0; i= rangeLength)
+ {
+ /*
+ * no more overlaps, or no more within range[]
+ */
+ return added;
}
+ overlapStart -= mapOffset;
+
+ /*
+ * end of the overlap range is just before the next clear bit;
+ * restrict it to end of range if necessary;
+ * note we may add a reverse strand range here (end < start)
+ */
+ int overlapEnd = overlaps.nextClearBit(mapOffset + overlapStart + 1);
+ overlapEnd = (overlapEnd == -1) ? rangeLength - 1
+ : Math.min(rangeLength - 1, overlapEnd - mapOffset - 1);
+ int startPosition = range[0] + step * overlapStart;
+ int endPosition = range[0] + step * overlapEnd;
+ mapped.add(new int[] { startPosition, endPosition });
+ offsetStart = overlapEnd + 1;
+ added += Math.abs(endPosition - startPosition) + 1;
}
+ return added;
}
- public static void main(String argv[])
+ /*
+ * Returns the [start, end...] positions in the range mapped from, that are
+ * mapped to by part or all of the given begin-end of the range mapped to.
+ * Returns null if begin-end does not overlap any position mapped to.
+ *
+ * @param begin
+ * @param end
+ * @return
+ */
+ public int[] getOverlapsInFrom(final int begin, final int end)
{
- MapList ml = new MapList(new int[]
- {1, 5, 10, 15, 25, 20},
- new int[]
- {51, 1}, 1, 3);
- MapList ml1 = new MapList(new int[]
- {1, 3, 17, 4},
- new int[]
- {51, 1}, 1, 3);
- MapList ml2 = new MapList(new int[] { 1, 60 },
- new int[] { 1, 20 }, 3, 1);
- // test internal consistency
- int to[] = new int[51];
- MapList.testMap(ml, 1, 60);
- /*
- for (int from=1; from<=51; from++) {
- int[] too=ml.shiftTo(from);
- int[] toofrom=ml.shiftFrom(too[0]);
- System.out.println("ShiftFrom("+from+")=="+too[0]+" % "+too[1]+"\t+-+\tShiftTo("+too[0]+")=="+toofrom[0]+" % "+toofrom[1]);
- }*/
- System.out.print("Success?\n"); // if we get here - something must be working!
+ int[] overlaps = MappingUtils.findOverlap(toShifts, begin, end);
+
+ return overlaps == null ? null : locateInFrom(overlaps[0], overlaps[1]);
+ }
+
+ /**
+ * Returns the [start, end...] positions in the range mapped to, that are
+ * mapped to by part or all of the given begin-end of the range mapped from.
+ * Returns null if begin-end does not overlap any position mapped from.
+ *
+ * @param begin
+ * @param end
+ * @return
+ */
+ public int[] getOverlapsInTo(final int begin, final int end)
+ {
+ int[] overlaps = MappingUtils.findOverlap(fromShifts, begin, end);
+
+ return overlaps == null ? null : locateInTo(overlaps[0], overlaps[1]);
}
}