ranges)
+ {
+ boolean forwardStrand = true;
+ for (int[] range : ranges)
+ {
+ if (range[1] > range[0])
{
- System.out.print("\n");
+ break; // forward strand confirmed
}
- else
+ else if (range[1] < range[0])
{
- System.out.print(",");
+ forwardStrand = false;
+ break; // reverse strand confirmed
}
}
- // test range function
- System.out.print("\nTest locateInFrom\n");
+ return forwardStrand;
+ }
+
+ /**
+ *
+ * @return true if from, or to is a three to 1 mapping
+ */
+ public boolean isTripletMap()
+ {
+ return (toRatio == 3 && fromRatio == 1)
+ || (fromRatio == 3 && toRatio == 1);
+ }
+
+ /**
+ * Returns a map which is the composite of this one and the input map. That
+ * is, the output map has the fromRanges of this map, and its toRanges are the
+ * toRanges of this map as transformed by the input map.
+ *
+ * Returns null if the mappings cannot be traversed (not all toRanges of this
+ * map correspond to fromRanges of the input), or if this.toRatio does not
+ * match map.fromRatio.
+ *
+ *
+ * Example 1:
+ * this: from [1-100] to [501-600]
+ * input: from [10-40] to [60-90]
+ * output: from [10-40] to [560-590]
+ * Example 2 ('reverse strand exons'):
+ * this: from [1-100] to [2000-1951], [1000-951] // transcript to loci
+ * input: from [1-50] to [41-90] // CDS to transcript
+ * output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci
+ *
+ *
+ * @param map
+ * @return
+ */
+ public MapList traverse(MapList map)
+ {
+ if (map == null)
{
- int f = mmap[0][2], t = mmap[0][3];
- while (f <= t)
- {
- System.out.println("Range " + f + " to " + t);
- int rng[] = ml.locateInFrom(f, t);
- if (rng != null)
- {
- for (int i = 0; i < rng.length; i++)
- {
- System.out.print(rng[i] + ((i % 2 == 0) ? "," : ";"));
- }
- }
- else
- {
- System.out.println("No range!");
- }
- System.out.print("\nReversed\n");
- rng = ml.locateInFrom(t, f);
- if (rng != null)
- {
- for (int i = 0; i < rng.length; i++)
- {
- System.out.print(rng[i] + ((i % 2 == 0) ? "," : ";"));
- }
- }
- else
- {
- System.out.println("No range!");
- }
- System.out.print("\n");
- f++;
- t--;
- }
+ return null;
}
- System.out.print("\n");
- mmap = ml.makeToMap();
- System.out.println("ToMap : (" + mmap[0][0] + " " + mmap[0][1] + " "
- + mmap[0][2] + " " + mmap[0][3] + " ");
- for (int i = 1; i <= mmap[1].length; i++)
- {
- if (mmap[1][i - 1] == -1)
- {
- System.out.print(i + "=XXX");
- }
- else
+ /*
+ * compound the ratios by this rule:
+ * A:B with M:N gives A*M:B*N
+ * reduced by greatest common divisor
+ * so 1:3 with 3:3 is 3:9 or 1:3
+ * 1:3 with 3:1 is 3:3 or 1:1
+ * 1:3 with 1:3 is 1:9
+ * 2:5 with 3:7 is 6:35
+ */
+ int outFromRatio = getFromRatio() * map.getFromRatio();
+ int outToRatio = getToRatio() * map.getToRatio();
+ int gcd = MathUtils.gcd(outFromRatio, outToRatio);
+ outFromRatio /= gcd;
+ outToRatio /= gcd;
+
+ List toRanges = new ArrayList<>();
+ for (int[] range : getToRanges())
+ {
+ int fromLength = Math.abs(range[1] - range[0]) + 1;
+ int[] transferred = map.locateInTo(range[0], range[1]);
+ if (transferred == null || transferred.length % 2 != 0)
{
- System.out.print(i + "=" + (mmap[0][2] + mmap[1][i - 1]));
+ return null;
}
- if (i % 20 == 0)
+
+ /*
+ * convert [start1, end1, start2, end2, ...]
+ * to [[start1, end1], [start2, end2], ...]
+ */
+ int toLength = 0;
+ for (int i = 0; i < transferred.length;)
{
- System.out.print("\n");
+ toRanges.add(new int[] { transferred[i], transferred[i + 1] });
+ toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1;
+ i += 2;
}
- else
+
+ /*
+ * check we mapped the full range - if not, abort
+ */
+ if (fromLength * map.getToRatio() != toLength * map.getFromRatio())
{
- System.out.print(",");
+ return null;
}
}
- System.out.print("\n");
- // test range function
- System.out.print("\nTest locateInTo\n");
+
+ return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio);
+ }
+
+ /**
+ * Answers true if the mapping is from one contiguous range to another, else
+ * false
+ *
+ * @return
+ */
+ public boolean isContiguous()
+ {
+ return fromShifts.size() == 1 && toShifts.size() == 1;
+ }
+
+ /**
+ * Returns the [start, end, start, end, ...] ranges in the 'from' range that
+ * map to positions between {@code start} and {@code end} in the 'to' range.
+ * Returns null if no mapped positions are found in start-end.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInFrom2(int start, int end)
+ {
+ List ranges = mapBetween(start, end, toShifts, fromShifts,
+ toRatio, fromRatio);
+
+ // TODO: or just return the List and adjust calling code to match
+ return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges);
+ }
+
+ /**
+ * Returns the [start, end, start, end, ...] ranges in the 'to' range that map
+ * to the given start-end in the 'from' range. Returns null if either
+ * {@code start} or {@code end} is not a mapped 'from' range position.
+ *
+ * @param start
+ * @param end
+ * @return
+ */
+ public int[] locateInTo2(int start, int end)
+ {
+ List ranges = mapBetween(start, end, fromShifts, toShifts,
+ fromRatio, toRatio);
+
+ return ranges.isEmpty() ? null : MappingUtils.rangeListToArray(ranges);
+ }
+
+ /**
+ * A helper method for navigating the mapping. Returns a (possibly empty) list
+ * of [start-end] positions in {@code ranges2} that map to positions in
+ * {@code ranges1} between {@code start} and {@code end}.
+ *
+ * @param start
+ * @param end
+ * @param ranges1
+ * @param ranges2
+ * @param wordLength1
+ * @param wordLength2
+ * @return
+ */
+ final static List mapBetween(int start, int end,
+ List ranges1, List ranges2, int wordLength1,
+ int wordLength2)
+ {
+ if (end < start)
+ {
+ int tmp = end;
+ end = start;
+ start = tmp;
+ }
+
+ /*
+ * first traverse ranges1 and record count of mapped positions
+ * to any that overlap start-end
+ */
+ List overlaps = findOverlapPositions(ranges1, start, end);
+ if (overlaps.isEmpty())
+ {
+ return overlaps;
+ }
+
+ /*
+ * convert positions to equivalent 'word' positions in ranges2
+ */
+ mapWords(overlaps, wordLength1, wordLength2);
+
+ /*
+ * walk ranges2 and record the values found at
+ * the offsets in 'overlaps'
+ */
+ List mapped = new ArrayList<>();
+ final int s1 = overlaps.size();
+ final int s2 = ranges2.size();
+ int ranges2Index = 0;
+
+ /*
+ * count of mapped positions preceding ranges2[ranges2Index]
+ */
+ int traversed = 0;
+
+ /*
+ * for each [from-to] range in overlaps:
+ * - walk (what remains of) ranges2
+ * - record the values at offsets [from-to]
+ * - stop when past 'to' positions (or at end of ranges2)
+ */
+ for (int i = 0; i < s1; i++)
{
- int f = mmap[0][2], t = mmap[0][3];
- while (f <= t)
+ int[] overlap = overlaps.get(i);
+ final int toAdd = overlap[1] - overlap[0] + 1;
+ int added = 0; // how much of overlap has been 'found'
+ for (; added < toAdd && ranges2Index < s2; ranges2Index++)
{
- System.out.println("Range " + f + " to " + t);
- int rng[] = ml.locateInTo(f, t);
- if (rng != null)
- {
- for (int i = 0; i < rng.length; i++)
- {
- System.out.print(rng[i] + ((i % 2 == 0) ? "," : ";"));
- }
- }
- else
- {
- System.out.println("No range!");
- }
- System.out.print("\nReversed\n");
- rng = ml.locateInTo(t, f);
- if (rng != null)
+ int[] range2 = ranges2.get(ranges2Index);
+ int rangeStart = range2[0];
+ int rangeEnd = range2[1];
+ boolean reverseStrand = range2[1] < range2[0];
+ int rangeLength = Math.abs(rangeEnd - rangeStart) + 1;
+ if (traversed + rangeLength <= overlap[0])
{
- for (int i = 0; i < rng.length; i++)
- {
- System.out.print(rng[i] + ((i % 2 == 0) ? "," : ";"));
- }
+ /*
+ * precedes overlap - keep looking
+ */
+ traversed += rangeLength;
+ continue;
}
- else
- {
- System.out.println("No range!");
- }
- f++;
- t--;
- System.out.print("\n");
- }
- }
-
- }
-
- public static void main(String argv[])
- {
- MapList ml = new MapList(new int[]
- { 1, 5, 10, 15, 25, 20 }, new int[]
- { 51, 1 }, 1, 3);
- MapList ml1 = new MapList(new int[]
- { 1, 3, 17, 4 }, new int[]
- { 51, 1 }, 1, 3);
- MapList ml2 = new MapList(new int[]
- { 1, 60 }, new int[]
- { 1, 20 }, 3, 1);
- // test internal consistency
- int to[] = new int[51];
- MapList.testMap(ml, 1, 60);
- MapList mldna = new MapList(new int[]
- { 2, 2, 6, 8, 12, 16 }, new int[]
- { 1, 3 }, 3, 1);
- int[] frm = mldna.locateInFrom(1, 1);
- testLocateFrom(mldna, 1, 1, new int[]
- { 2, 2, 6, 7 });
- MapList.testMap(mldna, 1, 3);
- /*
- * for (int from=1; from<=51; from++) { int[] too=ml.shiftTo(from); int[]
- * toofrom=ml.shiftFrom(too[0]);
- * System.out.println("ShiftFrom("+from+")=="+too[0]+" %
- * "+too[1]+"\t+-+\tShiftTo("+too[0]+")=="+toofrom[0]+" % "+toofrom[1]); }
- */
- System.out.print("Success?\n"); // if we get here - something must be
- // working!
+ int overlapStart = overlap[0] - traversed;
+ int overlapEnd = Math.min(overlapStart + toAdd - added - 1,
+ rangeLength - 1);
+ int mappedFrom = range2[0] + (reverseStrand ? - overlapStart : overlapStart);
+ int mappedTo = range2[0] + (reverseStrand ? - overlapEnd : overlapEnd);
+ mapped.add(new int[] { mappedFrom, mappedTo });
+ int found = overlapEnd - overlapStart + 1;
+ added += found;
+ overlap[0] += found;
+ traversed += rangeLength;
+ }
+ }
+
+ return mapped;
}
- private static void testLocateFrom(MapList mldna, int i, int j, int[] ks)
+ /**
+ * Converts the start-end positions (counted from zero) in the {@code ranges}
+ * list from one word length to another. Start-end positions are expanded if
+ * necessary to cover a whole word of length {@code wordLength1}. Positions
+ * are then divided by {@code wordLength1} and multiplied by
+ * {@code wordLength2} to give equivalent mapped words.
+ *
+ * Put simply, this converts peptide residue positions to the corresponding
+ * codon ranges, and codons - including partial codons - to the corresponding
+ * peptide positions; for example
+ *
+ *
+ * [1, 10] with word lengths 3:1 converts (as if bases [0-11]) to [1, 4]
+ *
+ *
+ * @param ranges
+ * @param wordLength1
+ * @param wordLength2
+ * @return
+ */
+ final static void mapWords(List ranges, int wordLength1,
+ int wordLength2)
{
- int[] frm = mldna.locateInFrom(i, j);
- if (frm == ks || java.util.Arrays.equals(frm, ks))
+ if (wordLength1 == 1 && wordLength2 == 1)
{
- System.out.println("Success test locate from " + i + " to " + j);
+ return; // nothing to do here
}
- else
+ int s = ranges.size();
+ for (int i = 0; i < s; i++)
{
- System.err.println("Failed test locate from " + i + " to " + j);
- for (int c = 0; c < frm.length; c++)
- {
- System.err.print(frm[c] + ((c % 2 == 0) ? "," : ";"));
- }
- System.err.println("Expected");
- for (int c = 0; c < ks.length; c++)
- {
- System.err.print(ks[c] + ((c % 2 == 0) ? "," : ";"));
- }
+ int[] range = ranges.get(i);
+
+ /*
+ * expand range start to the start of a word,
+ * and convert to wordLength2
+ */
+ range[0] -= range[0] % wordLength1;
+ range[0] = range[0] / wordLength1 * wordLength2;
+
+ /*
+ * similar calculation for range end, adding
+ * (wordLength2 - 1) for end of mapped word
+ */
+ range[1] -= range[1] % wordLength1;
+ range[1] = range[1] / wordLength1 * wordLength2;
+ range[1] += wordLength2 - 1;
}
}
/**
+ * Helper method that returns a (possibly empty) list of offsets in
+ * {@code ranges} to subranges that overlap {@code start-end} (where start <=
+ * end}. The list returned holds counts of the number of positions traversed
+ * (exclusive) to reach the overlapping positions, not the overlapping values.
+ * Returns null if there are no overlaps.
*
- * @return a MapList whose From range is this maplist's To Range, and vice
- * versa
+ * @param ranges
+ * @param start
+ * @param end
+ * @return
*/
- public MapList getInverse()
+ final static List findOverlapPositions(List ranges,
+ int start, int end)
{
- return new MapList(getToRanges(), getFromRanges(), getToRatio(),
- getFromRatio());
+ List positions = new ArrayList<>();
+ int pos = 0;
+ int s = ranges.size();
+ for (int i = 0; i < s; i++)
+ {
+ int[] range = ranges.get(i);
+ addOverlap(positions, pos, range, start, end);
+ pos += 1 + Math.abs(range[1] - range[0]);
+ }
+ return positions;
}
/**
- * test for containment rather than equivalence to another mapping
+ * A helper method that checks whether {@code range} overlaps
+ * {@code start-end}, and if so adds the offset of the overlap in
+ * {@code range}, plus {@code pos}, to {@code positions}.
*
- * @param map
- * to be tested for containment
- * @return true if local or mapped range map contains or is contained by this
- * mapping
+ * @param positions
+ * a list of map offsets to add to
+ * @param pos
+ * the number of mapped positions already visited
+ * @param range
+ * a from-to range (may be forward or reverse)
+ * @param start
+ * position to test for overlap in range
+ * @param end
+ * position to test for overlap in range
+ * @return
*/
- public boolean containsEither(boolean local, MapList map)
+ final static void addOverlap(List positions, int pos, int[] range,
+ int start, int end)
{
- if (local)
+ if (range[1] >= range[0])
{
- return ((getFromLowest() >= map.getFromLowest() && getFromHighest() <= map
- .getFromHighest()) || (getFromLowest() <= map.getFromLowest() && getFromHighest() >= map
- .getFromHighest()));
+ /*
+ * forward direction range
+ */
+ if (start <= range[1] && end >= range[0])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[0]);
+ int overlapStartOffset = pos + overlapStart - range[0];
+ int overlapEnd = Math.min(end, range[1]);
+ int overlapEndOffset = pos + overlapEnd - range[0];
+ int[] lastOverlap = positions.isEmpty() ? null
+ : positions.get(positions.size() - 1);
+ if (lastOverlap != null && overlapStartOffset == lastOverlap[1] + 1)
+ {
+ /*
+ * just extending the last overlap range
+ */
+ lastOverlap[1] = overlapEndOffset;
+ }
+ else
+ {
+ /*
+ * add a new (discontiguous) overlap range
+ */
+ positions.add(new int[] { overlapStartOffset, overlapEndOffset });
+ }
+ }
}
else
{
- return ((getToLowest() >= map.getToLowest() && getToHighest() <= map
- .getToHighest()) || (getToLowest() <= map.getToLowest() && getToHighest() >= map
- .getToHighest()));
+ /*
+ * reverse direction range
+ */
+ if (start <= range[0] && end >= range[1])
+ {
+ /*
+ * overlap
+ */
+ int overlapStart = Math.max(start, range[1]);
+ int overlapEnd = Math.min(end, range[0]);
+ positions
+ .add(new int[]
+ { pos + range[0] - overlapEnd,
+ pos + range[0] - overlapStart });
+ }
}
}
}