From: gmungoc Date: Thu, 27 Aug 2020 14:44:45 +0000 (+0100) Subject: JAL-3725 helper methods for computing mapped feature range overlap X-Git-Tag: Release_2_11_2_0~13^2~4^2~9 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d52ea4bd9e9692c73c5a427b3aa0ad2fd988a26e;p=jalview.git JAL-3725 helper methods for computing mapped feature range overlap --- diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 3555e52..8efe42b 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -25,6 +25,8 @@ import java.util.Arrays; import java.util.BitSet; import java.util.List; +import jalview.bin.Cache; + /** * A simple way of bijectively mapping a non-contiguous linear range to another * non-contiguous linear range. @@ -307,7 +309,7 @@ public class MapList if (range.length != 2) { // throw new IllegalArgumentException(range); - System.err.println("Invalid format for fromRange " + Cache.log.error("Invalid format for fromRange " + Arrays.toString(range) + " may cause errors"); } fromLowest = Math.min(fromLowest, Math.min(range[0], range[1])); @@ -321,7 +323,7 @@ public class MapList if (range.length != 2) { // throw new IllegalArgumentException(range); - System.err.println("Invalid format for toRange " + Cache.log.error("Invalid format for toRange " + Arrays.toString(range) + " may cause errors"); } toLowest = Math.min(toLowest, Math.min(range[0], range[1])); @@ -467,7 +469,8 @@ public class MapList int mp[][] = new int[to - from + 2][]; for (int i = 0; i < mp.length; i++) { - int[] m = shift(i + from, shiftTo, sourceRatio, shiftFrom, targetRatio); + int[] m = shift(i + from, shiftTo, sourceRatio, shiftFrom, + targetRatio); if (m != null) { if (i == 0) @@ -1144,10 +1147,11 @@ public class MapList } /** - * Returns the [start1, end1, start2, end2, ...] positions in the 'from' range - * that map to positions between {@code start} and {@code end} in the 'to' - * range. Note that for a reverse strand mapping this will return ranges with - * end < start. Returns null if no mapped positions are found in start-end. + * <<<<<<< HEAD Returns the [start1, end1, start2, end2, ...] positions in the + * 'from' range that map to positions between {@code start} and {@code end} in + * the 'to' range. Note that for a reverse strand mapping this will return + * ranges with end < start. Returns null if no mapped positions are found in + * start-end. * * @param start * @param end @@ -1155,8 +1159,8 @@ public class MapList */ public int[] locateInFrom(int start, int end) { - return mapPositions(start, end, toShifts, fromShifts, - toRatio, fromRatio); + return mapPositions(start, end, toShifts, fromShifts, toRatio, + fromRatio); } /** @@ -1171,8 +1175,8 @@ public class MapList */ public int[] locateInTo(int start, int end) { - return mapPositions(start, end, fromShifts, toShifts, - fromRatio, toRatio); + return mapPositions(start, end, fromShifts, toShifts, fromRatio, + toRatio); } /** @@ -1250,7 +1254,8 @@ public class MapList * @return */ protected final static BitSet getMappedOffsetsForPositions(int start, - int end, List sourceRange, int sourceWordLength, int targetWordLength) + int end, List sourceRange, int sourceWordLength, + int targetWordLength) { BitSet overlaps = new BitSet(); int offset = 0; @@ -1419,4 +1424,36 @@ public class MapList return added; } + + /* + * Returns the [start, end...] positions in the range mapped from, that are + * mapped to by part or all of the given begin-end of the range mapped to. + * Returns null if begin-end does not overlap any position mapped to. + * + * @param begin + * @param end + * @return + */ + public int[] getOverlapsInFrom(final int begin, final int end) + { + int[] overlaps = MappingUtils.findOverlap(toShifts, begin, end); + + return overlaps == null ? null : locateInFrom(overlaps[0], overlaps[1]); + } + + /** + * Returns the [start, end...] positions in the range mapped to, that are + * mapped to by part or all of the given begin-end of the range mapped from. + * Returns null if begin-end does not overlap any position mapped from. + * + * @param begin + * @param end + * @return + */ + public int[] getOverlapsInTo(final int begin, final int end) + { + int[] overlaps = MappingUtils.findOverlap(fromShifts, begin, end); + + return overlaps == null ? null : locateInTo(overlaps[0], overlaps[1]); + } } diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 4e07a08..33decb4 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -29,6 +29,7 @@ import java.util.Map; import jalview.analysis.AlignmentSorter; import jalview.api.AlignViewportI; +import jalview.bin.Cache; import jalview.commands.CommandI; import jalview.commands.EditCommand; import jalview.commands.EditCommand.Action; @@ -78,7 +79,7 @@ public final class MappingUtils action = action.getUndoAction(); } // TODO write this - System.err.println("MappingUtils.mapCutOrPaste not yet implemented"); + Cache.log.error("MappingUtils.mapCutOrPaste not yet implemented"); } /** @@ -835,7 +836,7 @@ public final class MappingUtils { if (range.length % 2 != 0) { - System.err.println( + Cache.log.error( "Error unbalance start/end ranges: " + ranges.toString()); return 0; } @@ -991,7 +992,7 @@ public final class MappingUtils /* * not coded for [start1, end1, start2, end2, ...] */ - System.err.println( + Cache.log.error( "MappingUtils.removeEndPositions doesn't handle multiple ranges"); return; } @@ -1002,7 +1003,7 @@ public final class MappingUtils /* * not coded for a reverse strand range (end < start) */ - System.err.println( + Cache.log.error( "MappingUtils.removeEndPositions doesn't handle reverse strand"); return; } @@ -1039,4 +1040,66 @@ public final class MappingUtils } return result; } + + /* + * Returns the maximal start-end positions in the given (ordered) list of + * ranges which is overlapped by the given begin-end range, or null if there + * is no overlap. + * + *
+   * Examples:
+   *   if ranges is {[4, 8], [10, 12], [16, 19]}
+   * then
+   *   findOverlap(ranges, 1, 20) == [4, 19]
+   *   findOverlap(ranges, 6, 11) == [6, 11]
+   *   findOverlap(ranges, 9, 15) == [10, 12]
+   *   findOverlap(ranges, 13, 15) == null
+   * 
+ * + * @param ranges + * @param begin + * @param end + * @return + */ + protected static int[] findOverlap(List ranges, final int begin, + final int end) + { + boolean foundStart = false; + int from = 0; + int to = 0; + + /* + * traverse the ranges to find the first position (if any) >= begin, + * and the last position (if any) <= end + */ + for (int[] range : ranges) + { + if (!foundStart) + { + if (range[0] >= begin) + { + /* + * first range that starts with, or follows, begin + */ + foundStart = true; + from = Math.max(range[0], begin); + } + else if (range[1] >= begin) + { + /* + * first range that contains begin + */ + foundStart = true; + from = begin; + } + } + + if (range[0] <= end) + { + to = Math.min(end, range[1]); + } + } + + return foundStart && to >= from ? new int[] { from, to } : null; + } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index cf10aba..fb0cdae 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -290,7 +290,7 @@ public class MapListTest * no overlap */ assertNull(ml.locateInFrom(0, 0)); - + } /** @@ -312,7 +312,7 @@ public class MapListTest assertEquals("[10, 10, 12, 12, 14, 14]", Arrays.toString(ml.locateInFrom(3, 3))); assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4))); - + /* * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8 */ @@ -337,6 +337,86 @@ public class MapListTest } /** + * Tests for method that locates the overlap of the ranges in the 'from' map + * for given range in the 'to' map + */ + @Test(groups = { "Functional" }) + public void testGetOverlapsInFrom_withIntrons() + { + /* + * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [16, 17, 18] i.e. + * 2-3, 5-7, 9-10, 12-12, 14-14, 16-18 + */ + int[] codons = { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + int[] protein = { 11, 14 }; + MapList ml = new MapList(codons, protein, 3, 1); + + assertEquals("[2, 3, 5, 5]", + Arrays.toString(ml.getOverlapsInFrom(11, 11))); + assertEquals("[2, 3, 5, 7, 9, 9]", + Arrays.toString(ml.getOverlapsInFrom(11, 12))); + // out of range 5' : + assertEquals("[2, 3, 5, 7, 9, 9]", + Arrays.toString(ml.getOverlapsInFrom(8, 12))); + // out of range 3' : + assertEquals("[10, 10, 12, 12, 14, 14, 16, 18]", + Arrays.toString(ml.getOverlapsInFrom(13, 16))); + // out of range both : + assertEquals("[2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18]", + Arrays.toString(ml.getOverlapsInFrom(1, 16))); + // no overlap: + assertNull(ml.getOverlapsInFrom(20, 25)); + } + + /** + * Tests for method that locates the overlap of the ranges in the 'to' map for + * given range in the 'from' map + */ + @Test(groups = { "Functional" }) + public void testGetOverlapsInTo_withIntrons() + { + /* + * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [17, 18, 19] i.e. + * 2-3, 5-7, 9-10, 12-12, 14-14, 17-19 + */ + int[] codons = { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 17, 19 }; + /* + * Mapped proteins at positions 1, 3, 4, 6 in the sequence + */ + int[] protein = { 1, 1, 3, 4, 6, 6 }; + MapList ml = new MapList(codons, protein, 3, 1); + + /* + * Can't map from an unmapped position + */ + assertNull(ml.getOverlapsInTo(1, 1)); + assertNull(ml.getOverlapsInTo(4, 4)); + assertNull(ml.getOverlapsInTo(15, 16)); + + /* + * nor from a range that includes no mapped position (exon) + */ + assertNull(ml.getOverlapsInTo(15, 16)); + + // end of codon 1 maps to first peptide + assertEquals("[1, 1]", Arrays.toString(ml.getOverlapsInTo(2, 2))); + // end of codon 1 and start of codon 2 maps to first 2 peptides + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(3, 7))); + + // range overlaps 5' end of dna: + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(1, 6))); + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(1, 8))); + + // range overlaps 3' end of dna: + assertEquals("[6, 6]", Arrays.toString(ml.getOverlapsInTo(17, 24))); + assertEquals("[6, 6]", Arrays.toString(ml.getOverlapsInTo(16, 24))); + + // dna positions 8, 11 are intron but include end of exon 2 and start of + // exon 3 + assertEquals("[3, 4]", Arrays.toString(ml.getOverlapsInTo(8, 11))); + } + + /** * Tests for method that locates ranges in the 'to' map for given range in the * 'from' map. */ @@ -376,7 +456,7 @@ public class MapListTest */ assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(1, 13))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(-1, 2))); - + /* * no overlap */ @@ -422,7 +502,7 @@ public class MapListTest assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 2))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 4))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 4))); - + /* * no overlap */ @@ -894,7 +974,7 @@ public class MapListTest toRanges = compound.getToRanges(); assertEquals(2, toRanges.size()); assertArrayEquals(new int[] { 931, 901 }, toRanges.get(0)); - assertArrayEquals(new int[] { 600, 582}, toRanges.get(1)); + assertArrayEquals(new int[] { 600, 582 }, toRanges.get(1)); /* * 1:1 plus 1:3 should result in 1:3 @@ -1084,7 +1164,7 @@ public class MapListTest * no overlap */ assertNull(ml.locateInTo(0, 0)); - + /* * partial overlap */ @@ -1100,7 +1180,7 @@ public class MapListTest ml = new MapList(gene, cds, 1, 1); assertEquals("[13203, 13204]", Arrays.toString(ml.locateInTo(13468, 13468))); - + /* * gene to protein * the base at 13468 is in the codon for 4401N and also 4402R @@ -1219,7 +1299,7 @@ public class MapListTest public void testAddOffsetPositions() { List mapped = new ArrayList<>(); - int[] range = new int[] {10, 20}; + int[] range = new int[] { 10, 20 }; BitSet offsets = new BitSet(); MapList.addOffsetPositions(mapped, 0, range, offsets); @@ -1261,7 +1341,7 @@ public class MapListTest assertArrayEquals(new int[] { 14, 13 }, mapped.get(1)); assertArrayEquals(new int[] { 10, 10 }, mapped.get(2)); } - + @Test(groups = { "Functional" }) public void testGetPositionsForOffsets() { @@ -1269,28 +1349,28 @@ public class MapListTest BitSet offsets = new BitSet(); List mapped = MapList.getPositionsForOffsets(ranges, offsets); assertTrue(mapped.isEmpty()); // no ranges and no offsets! - + offsets.set(5, 1000); mapped = MapList.getPositionsForOffsets(ranges, offsets); assertTrue(mapped.isEmpty()); // no ranges - + /* * one range with overlap of offsets */ - ranges.add(new int[] {15, 25}); + ranges.add(new int[] { 15, 25 }); mapped = MapList.getPositionsForOffsets(ranges, offsets); assertEquals(1, mapped.size()); - assertArrayEquals(new int[] {20, 25}, mapped.get(0)); - + assertArrayEquals(new int[] { 20, 25 }, mapped.get(0)); + /* * two ranges */ - ranges.add(new int[] {300, 320}); + ranges.add(new int[] { 300, 320 }); mapped = MapList.getPositionsForOffsets(ranges, offsets); assertEquals(2, mapped.size()); - assertArrayEquals(new int[] {20, 25}, mapped.get(0)); - assertArrayEquals(new int[] {300, 320}, mapped.get(1)); - + assertArrayEquals(new int[] { 20, 25 }, mapped.get(0)); + assertArrayEquals(new int[] { 300, 320 }, mapped.get(1)); + /* * boundary case - right end of first range overlaps */ @@ -1298,45 +1378,45 @@ public class MapListTest offsets.set(10); mapped = MapList.getPositionsForOffsets(ranges, offsets); assertEquals(1, mapped.size()); - assertArrayEquals(new int[] {25, 25}, mapped.get(0)); - + assertArrayEquals(new int[] { 25, 25 }, mapped.get(0)); + /* * boundary case - left end of second range overlaps */ offsets.set(11); mapped = MapList.getPositionsForOffsets(ranges, offsets); assertEquals(2, mapped.size()); - assertArrayEquals(new int[] {25, 25}, mapped.get(0)); - assertArrayEquals(new int[] {300, 300}, mapped.get(1)); - + assertArrayEquals(new int[] { 25, 25 }, mapped.get(0)); + assertArrayEquals(new int[] { 300, 300 }, mapped.get(1)); + /* * offsets into a circular range are reported in * the order in which they are traversed */ ranges.clear(); - ranges.add(new int[] {100, 150}); - ranges.add(new int[] {60, 80}); + ranges.add(new int[] { 100, 150 }); + ranges.add(new int[] { 60, 80 }); offsets.clear(); offsets.set(45, 55); // sets bits 45 to 54 mapped = MapList.getPositionsForOffsets(ranges, offsets); assertEquals(2, mapped.size()); - assertArrayEquals(new int[] {145, 150}, mapped.get(0)); // offsets 45-50 - assertArrayEquals(new int[] {60, 63}, mapped.get(1)); // offsets 51-54 + assertArrayEquals(new int[] { 145, 150 }, mapped.get(0)); // offsets 45-50 + assertArrayEquals(new int[] { 60, 63 }, mapped.get(1)); // offsets 51-54 /* * reverse range overlap is reported with start < end */ ranges.clear(); - ranges.add(new int[] {4321, 4000}); + ranges.add(new int[] { 4321, 4000 }); offsets.clear(); offsets.set(20, 22); // sets bits 20 and 21 offsets.set(30); mapped = MapList.getPositionsForOffsets(ranges, offsets); assertEquals(2, mapped.size()); - assertArrayEquals(new int[] {4301, 4300}, mapped.get(0)); - assertArrayEquals(new int[] {4291, 4291}, mapped.get(1)); + assertArrayEquals(new int[] { 4301, 4300 }, mapped.get(0)); + assertArrayEquals(new int[] { 4291, 4291 }, mapped.get(1)); } - + @Test(groups = { "Functional" }) public void testGetMappedOffsetsForPositions() { @@ -1344,9 +1424,10 @@ public class MapListTest * start by verifying the examples in the method's Javadoc! */ List ranges = new ArrayList<>(); - ranges.add(new int[] {10, 20}); - ranges.add(new int[] {31, 40}); - BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1, 1); + ranges.add(new int[] { 10, 20 }); + ranges.add(new int[] { 31, 40 }); + BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1, + 1); assertTrue(overlaps.isEmpty()); overlaps = MapList.getMappedOffsetsForPositions(1, 11, ranges, 1, 1); assertEquals(2, overlaps.cardinality()); @@ -1354,75 +1435,80 @@ public class MapListTest assertTrue(overlaps.get(1)); overlaps = MapList.getMappedOffsetsForPositions(15, 35, ranges, 1, 1); assertEquals(11, overlaps.cardinality()); - for (int i = 5 ; i <= 11 ; i++) + for (int i = 5; i <= 11; i++) { assertTrue(overlaps.get(i)); } - + ranges.clear(); - ranges.add(new int[] {1, 200}); + ranges.add(new int[] { 1, 200 }); overlaps = MapList.getMappedOffsetsForPositions(9, 9, ranges, 1, 3); assertEquals(3, overlaps.cardinality()); assertTrue(overlaps.get(24)); assertTrue(overlaps.get(25)); assertTrue(overlaps.get(26)); - + ranges.clear(); - ranges.add(new int[] {101, 150}); - ranges.add(new int[] {171, 180}); + ranges.add(new int[] { 101, 150 }); + ranges.add(new int[] { 171, 180 }); overlaps = MapList.getMappedOffsetsForPositions(101, 102, ranges, 3, 1); assertEquals(1, overlaps.cardinality()); assertTrue(overlaps.get(0)); overlaps = MapList.getMappedOffsetsForPositions(150, 171, ranges, 3, 1); assertEquals(1, overlaps.cardinality()); assertTrue(overlaps.get(16)); - + ranges.clear(); - ranges.add(new int[] {101, 150}); - ranges.add(new int[] {21, 30}); + ranges.add(new int[] { 101, 150 }); + ranges.add(new int[] { 21, 30 }); overlaps = MapList.getMappedOffsetsForPositions(24, 40, ranges, 3, 1); assertEquals(3, overlaps.cardinality()); assertTrue(overlaps.get(17)); assertTrue(overlaps.get(18)); assertTrue(overlaps.get(19)); - + /* * reverse range 1:1 (e.g. reverse strand gene to transcript) */ ranges.clear(); - ranges.add(new int[] {20, 10}); + ranges.add(new int[] { 20, 10 }); overlaps = MapList.getMappedOffsetsForPositions(12, 13, ranges, 1, 1); assertEquals(2, overlaps.cardinality()); assertTrue(overlaps.get(7)); assertTrue(overlaps.get(8)); - + /* * reverse range 3:1 (e.g. reverse strand gene to peptide) * from EMBL:J03321 to P0CE20 */ ranges.clear(); - ranges.add(new int[] {1480, 488}); - overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3, 1); + ranges.add(new int[] { 1480, 488 }); + overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3, + 1); // 1460 is the end of the 7th codon assertEquals(1, overlaps.cardinality()); assertTrue(overlaps.get(6)); // add one base (part codon) - overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3, 1); + overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3, + 1); assertEquals(2, overlaps.cardinality()); assertTrue(overlaps.get(6)); assertTrue(overlaps.get(7)); // add second base (part codon) - overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3, 1); + overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3, + 1); assertEquals(2, overlaps.cardinality()); assertTrue(overlaps.get(6)); assertTrue(overlaps.get(7)); // add third base (whole codon) - overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3, 1); + overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3, + 1); assertEquals(2, overlaps.cardinality()); assertTrue(overlaps.get(6)); assertTrue(overlaps.get(7)); // add one more base (part codon) - overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3, 1); + overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3, + 1); assertEquals(3, overlaps.cardinality()); assertTrue(overlaps.get(6)); assertTrue(overlaps.get(7)); diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 3418f3c..4b7c75c 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -22,9 +22,10 @@ package jalview.util; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; -import static org.testng.AssertJUnit.fail; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import java.awt.Color; import java.io.IOException; @@ -1329,31 +1330,20 @@ public class MappingUtilsTest } @Test(groups = "Functional") - public void testListToArray() + public void testFindOverlap() { List ranges = new ArrayList<>(); - - int[] result = MappingUtils.rangeListToArray(ranges); - assertEquals(result.length, 0); - ranges.add(new int[] { 24, 12 }); - result = MappingUtils.rangeListToArray(ranges); - assertEquals(result.length, 2); - assertEquals(result[0], 24); - assertEquals(result[1], 12); - ranges.add(new int[] { -7, 30 }); - result = MappingUtils.rangeListToArray(ranges); - assertEquals(result.length, 4); - assertEquals(result[0], 24); - assertEquals(result[1], 12); - assertEquals(result[2], -7); - assertEquals(result[3], 30); - try - { - MappingUtils.rangeListToArray(null); - fail("Expected exception"); - } catch (NullPointerException e) - { - // expected - } + ranges.add(new int[] { 4, 8 }); + ranges.add(new int[] { 10, 12 }); + ranges.add(new int[] { 16, 19 }); + + int[] overlap = MappingUtils.findOverlap(ranges, 5, 13); + assertArrayEquals(overlap, new int[] { 5, 12 }); + overlap = MappingUtils.findOverlap(ranges, -100, 100); + assertArrayEquals(overlap, new int[] { 4, 19 }); + overlap = MappingUtils.findOverlap(ranges, 7, 17); + assertArrayEquals(overlap, new int[] { 7, 17 }); + overlap = MappingUtils.findOverlap(ranges, 13, 15); + assertNull(overlap); } }