X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Futil%2FMapListTest.java;h=fb0cdae2d1391ed2957a58edecf188b63c0da5cc;hb=217bacee24ea08305c3187a0c54bbaf867e25082;hp=fd34902ac74d54916a176570be9ba223c8892240;hpb=d89bd711a41eb3c1cd66d01c0067e1ddb5261ef9;p=jalview.git diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index fd34902..fb0cdae 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -30,6 +30,7 @@ import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.List; import org.testng.annotations.BeforeClass; @@ -289,6 +290,7 @@ public class MapListTest * no overlap */ assertNull(ml.locateInFrom(0, 0)); + } /** @@ -310,6 +312,18 @@ public class MapListTest assertEquals("[10, 10, 12, 12, 14, 14]", Arrays.toString(ml.locateInFrom(3, 3))); assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4))); + + /* + * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8 + */ + ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 }, + new int[] + { 1, 1, 3, 4, 6, 8 }, 3, 1); + assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom(1, 1)); + assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom(1, 3)); + assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom(1, 4)); + assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom(3, 4)); + } @Test(groups = { "Functional" }) @@ -323,6 +337,86 @@ public class MapListTest } /** + * Tests for method that locates the overlap of the ranges in the 'from' map + * for given range in the 'to' map + */ + @Test(groups = { "Functional" }) + public void testGetOverlapsInFrom_withIntrons() + { + /* + * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [16, 17, 18] i.e. + * 2-3, 5-7, 9-10, 12-12, 14-14, 16-18 + */ + int[] codons = { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + int[] protein = { 11, 14 }; + MapList ml = new MapList(codons, protein, 3, 1); + + assertEquals("[2, 3, 5, 5]", + Arrays.toString(ml.getOverlapsInFrom(11, 11))); + assertEquals("[2, 3, 5, 7, 9, 9]", + Arrays.toString(ml.getOverlapsInFrom(11, 12))); + // out of range 5' : + assertEquals("[2, 3, 5, 7, 9, 9]", + Arrays.toString(ml.getOverlapsInFrom(8, 12))); + // out of range 3' : + assertEquals("[10, 10, 12, 12, 14, 14, 16, 18]", + Arrays.toString(ml.getOverlapsInFrom(13, 16))); + // out of range both : + assertEquals("[2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18]", + Arrays.toString(ml.getOverlapsInFrom(1, 16))); + // no overlap: + assertNull(ml.getOverlapsInFrom(20, 25)); + } + + /** + * Tests for method that locates the overlap of the ranges in the 'to' map for + * given range in the 'from' map + */ + @Test(groups = { "Functional" }) + public void testGetOverlapsInTo_withIntrons() + { + /* + * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [17, 18, 19] i.e. + * 2-3, 5-7, 9-10, 12-12, 14-14, 17-19 + */ + int[] codons = { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 17, 19 }; + /* + * Mapped proteins at positions 1, 3, 4, 6 in the sequence + */ + int[] protein = { 1, 1, 3, 4, 6, 6 }; + MapList ml = new MapList(codons, protein, 3, 1); + + /* + * Can't map from an unmapped position + */ + assertNull(ml.getOverlapsInTo(1, 1)); + assertNull(ml.getOverlapsInTo(4, 4)); + assertNull(ml.getOverlapsInTo(15, 16)); + + /* + * nor from a range that includes no mapped position (exon) + */ + assertNull(ml.getOverlapsInTo(15, 16)); + + // end of codon 1 maps to first peptide + assertEquals("[1, 1]", Arrays.toString(ml.getOverlapsInTo(2, 2))); + // end of codon 1 and start of codon 2 maps to first 2 peptides + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(3, 7))); + + // range overlaps 5' end of dna: + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(1, 6))); + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(1, 8))); + + // range overlaps 3' end of dna: + assertEquals("[6, 6]", Arrays.toString(ml.getOverlapsInTo(17, 24))); + assertEquals("[6, 6]", Arrays.toString(ml.getOverlapsInTo(16, 24))); + + // dna positions 8, 11 are intron but include end of exon 2 and start of + // exon 3 + assertEquals("[3, 4]", Arrays.toString(ml.getOverlapsInTo(8, 11))); + } + + /** * Tests for method that locates ranges in the 'to' map for given range in the * 'from' map. */ @@ -362,7 +456,7 @@ public class MapListTest */ assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(1, 13))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(-1, 2))); - + /* * no overlap */ @@ -408,7 +502,7 @@ public class MapListTest assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 2))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 4))); assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 4))); - + /* * no overlap */ @@ -1070,7 +1164,7 @@ public class MapListTest * no overlap */ assertNull(ml.locateInTo(0, 0)); - + /* * partial overlap */ @@ -1086,7 +1180,7 @@ public class MapListTest ml = new MapList(gene, cds, 1, 1); assertEquals("[13203, 13204]", Arrays.toString(ml.locateInTo(13468, 13468))); - + /* * gene to protein * the base at 13468 is in the codon for 4401N and also 4402R @@ -1198,176 +1292,226 @@ public class MapListTest } /** - * Tests for helper method that adds any overlap (plus offset) to a list of + * Tests for helper method that adds any overlap (plus offset) to a set of * overlaps */ @Test(groups = { "Functional" }) - public void testAddOverlap() + public void testAddOffsetPositions() { - List overlaps = new ArrayList<>(); - int[] candidate = new int[] { 10, 19 }; - MapList.addOverlap(overlaps, 5, candidate, 20, 30); // doesn't overlap - assertTrue(overlaps.isEmpty()); - MapList.addOverlap(overlaps, 5, candidate, 31, 40); // doesn't overlap - assertTrue(overlaps.isEmpty()); + List mapped = new ArrayList<>(); + int[] range = new int[] { 10, 20 }; + BitSet offsets = new BitSet(); - /* - * 10-19 overlaps 15-25 at 15-19, which is offset 5-9 in 10-19 - * + 5 initial offset - */ - MapList.addOverlap(overlaps, 5, candidate, 15, 25); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 10, 14 }, overlaps.get(0)); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // nothing marked for overlap + + offsets.set(11); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // no offset 11 in range + + offsets.set(4, 6); // this sets bits 4 and 5 + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertEquals(1, mapped.size()); + assertArrayEquals(new int[] { 14, 15 }, mapped.get(0)); + + mapped.clear(); + offsets.set(10); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] { 14, 15 }, mapped.get(0)); + assertArrayEquals(new int[] { 20, 20 }, mapped.get(1)); /* - * reverse range overlap: - * 300-20 overlaps 15-25 at 25-20, which is offset 275-280 in 300-20 - * + 8 initial offset + * reverse range */ - overlaps.clear(); - candidate = new int[] { 300, 20 }; - MapList.addOverlap(overlaps, 8, candidate, 15, 25); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 283, 288 }, overlaps.get(0)); + range = new int[] { 20, 10 }; + mapped.clear(); + offsets.clear(); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // nothing marked for overlap + offsets.set(11); + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertTrue(mapped.isEmpty()); // no offset 11 in range + offsets.set(0); + offsets.set(10); + offsets.set(6, 8); // sets bits 6 and 7 + MapList.addOffsetPositions(mapped, 0, range, offsets); + assertEquals(3, mapped.size()); + assertArrayEquals(new int[] { 20, 20 }, mapped.get(0)); + assertArrayEquals(new int[] { 14, 13 }, mapped.get(1)); + assertArrayEquals(new int[] { 10, 10 }, mapped.get(2)); } @Test(groups = { "Functional" }) - public void testFindOverlapPositions() + public void testGetPositionsForOffsets() { List ranges = new ArrayList<>(); - List overlaps = MapList.findOverlapPositions(ranges, 20, 30); - assertTrue(overlaps.isEmpty()); // nothing to overlap + BitSet offsets = new BitSet(); + List mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertTrue(mapped.isEmpty()); // no ranges and no offsets! - ranges.add(new int[] { 15, 25 }); - overlaps = MapList.findOverlapPositions(ranges, 5, 10); - assertTrue(overlaps.isEmpty()); // no overlap + offsets.set(5, 1000); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertTrue(mapped.isEmpty()); // no ranges - overlaps = MapList.findOverlapPositions(ranges, 20, 20); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 5, 5 }, overlaps.get(0)); + /* + * one range with overlap of offsets + */ + ranges.add(new int[] { 15, 25 }); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(1, mapped.size()); + assertArrayEquals(new int[] { 20, 25 }, mapped.get(0)); - overlaps = MapList.findOverlapPositions(ranges, 5, 19); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 0, 4 }, overlaps.get(0)); + /* + * two ranges + */ + ranges.add(new int[] { 300, 320 }); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] { 20, 25 }, mapped.get(0)); + assertArrayEquals(new int[] { 300, 320 }, mapped.get(1)); - ranges.add(new int[] { 35, 45 }); - overlaps = MapList.findOverlapPositions(ranges, 26, 34); - assertTrue(overlaps.isEmpty()); + /* + * boundary case - right end of first range overlaps + */ + offsets.clear(); + offsets.set(10); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(1, mapped.size()); + assertArrayEquals(new int[] { 25, 25 }, mapped.get(0)); /* - * 24-37 overlaps the end of 15-25 and the start of 35-45 - * - offset positions are contiguous in the map so merged + * boundary case - left end of second range overlaps */ - overlaps = MapList.findOverlapPositions(ranges, 24, 37); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 9, 13 }, overlaps.get(0)); + offsets.set(11); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] { 25, 25 }, mapped.get(0)); + assertArrayEquals(new int[] { 300, 300 }, mapped.get(1)); /* - * EMBL:MN908947 https://www.ebi.ac.uk/ena/browser/api/embl/MN908947 - * (Covid-SARS-2) CDS mapping with 'slippage' - * (base 13468 is used twice in transcription) + * offsets into a circular range are reported in + * the order in which they are traversed */ ranges.clear(); - ranges.add(new int[] { 266, 13468 }); - ranges.add(new int[] { 13468, 21555 }); - - // 13468 occupies two offsets in the range list - overlaps = MapList.findOverlapPositions(ranges, 13468, 13468); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 13202, 13203 }, overlaps.get(0)); - overlaps = MapList.findOverlapPositions(ranges, 13469, 13470); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 13204, 13205 }, overlaps.get(0)); + ranges.add(new int[] { 100, 150 }); + ranges.add(new int[] { 60, 80 }); + offsets.clear(); + offsets.set(45, 55); // sets bits 45 to 54 + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] { 145, 150 }, mapped.get(0)); // offsets 45-50 + assertArrayEquals(new int[] { 60, 63 }, mapped.get(1)); // offsets 51-54 /* - * EMBL:J03321 https://www.ebi.ac.uk/ena/browser/api/embl/J03321 - * circular dna: CDS at [7022-7502, 1-437] - * = 481 + 437 = 918 bases = 305 aa's + stop codon + * reverse range overlap is reported with start < end */ ranges.clear(); - ranges.add(new int[] { 7022, 7502 }); - ranges.add(new int[] { 1, 437 }); - - overlaps = MapList.findOverlapPositions(ranges, 438, 7021); - assertTrue(overlaps.isEmpty()); - - // overlap first exon only: - overlaps = MapList.findOverlapPositions(ranges, 7000, 7100); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 0, 78 }, overlaps.get(0)); - - // overlap second exon only: offset to mapping includes first exon - overlaps = MapList.findOverlapPositions(ranges, 400, 500); - assertEquals(1, overlaps.size()); - assertArrayEquals(new int[] { 880, 917 }, overlaps.get(0)); - - // overlap both exons: first exon overlap precedes second exon overlap - // offsets of overlaps are not contiguous - overlaps = MapList.findOverlapPositions(ranges, 200, 7500); - assertEquals(2, overlaps.size()); - // first overlap is offsets of 7022-7500 in exon 1 (7022-7502): - assertArrayEquals(new int[] { 0, 478 }, overlaps.get(0)); - // second overlap offsets is (first exon length = 481) + (200-437) - assertArrayEquals(new int[] { 680, 917 }, overlaps.get(1)); + ranges.add(new int[] { 4321, 4000 }); + offsets.clear(); + offsets.set(20, 22); // sets bits 20 and 21 + offsets.set(30); + mapped = MapList.getPositionsForOffsets(ranges, offsets); + assertEquals(2, mapped.size()); + assertArrayEquals(new int[] { 4301, 4300 }, mapped.get(0)); + assertArrayEquals(new int[] { 4291, 4291 }, mapped.get(1)); } @Test(groups = { "Functional" }) - public void testMapWords() + public void testGetMappedOffsetsForPositions() { - List ranges = new ArrayList<>(); - /* - * 1:1 (trivial) case + * start by verifying the examples in the method's Javadoc! */ - ranges.add(new int[] { 2, 4 }); - ranges.add(new int[] { 6, 9 }); - MapList.mapWords(ranges, 1, 1); - assertEquals(ranges.size(), 2); - assertArrayEquals(new int[] { 2, 4 }, ranges.get(0)); - assertArrayEquals(new int[] { 6, 9 }, ranges.get(1)); + List ranges = new ArrayList<>(); + ranges.add(new int[] { 10, 20 }); + ranges.add(new int[] { 31, 40 }); + BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1, + 1); + assertTrue(overlaps.isEmpty()); + overlaps = MapList.getMappedOffsetsForPositions(1, 11, ranges, 1, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(0)); + assertTrue(overlaps.get(1)); + overlaps = MapList.getMappedOffsetsForPositions(15, 35, ranges, 1, 1); + assertEquals(11, overlaps.cardinality()); + for (int i = 5; i <= 11; i++) + { + assertTrue(overlaps.get(i)); + } - /* - * 1:3 case (peptide to codon ranges) - */ - MapList.mapWords(ranges, 1, 3); - assertEquals(ranges.size(), 2); - assertArrayEquals(new int[] { 6, 14 }, ranges.get(0)); - assertArrayEquals(new int[] { 18, 29 }, ranges.get(1)); + ranges.clear(); + ranges.add(new int[] { 1, 200 }); + overlaps = MapList.getMappedOffsetsForPositions(9, 9, ranges, 1, 3); + assertEquals(3, overlaps.cardinality()); + assertTrue(overlaps.get(24)); + assertTrue(overlaps.get(25)); + assertTrue(overlaps.get(26)); + + ranges.clear(); + ranges.add(new int[] { 101, 150 }); + ranges.add(new int[] { 171, 180 }); + overlaps = MapList.getMappedOffsetsForPositions(101, 102, ranges, 3, 1); + assertEquals(1, overlaps.cardinality()); + assertTrue(overlaps.get(0)); + overlaps = MapList.getMappedOffsetsForPositions(150, 171, ranges, 3, 1); + assertEquals(1, overlaps.cardinality()); + assertTrue(overlaps.get(16)); + + ranges.clear(); + ranges.add(new int[] { 101, 150 }); + ranges.add(new int[] { 21, 30 }); + overlaps = MapList.getMappedOffsetsForPositions(24, 40, ranges, 3, 1); + assertEquals(3, overlaps.cardinality()); + assertTrue(overlaps.get(17)); + assertTrue(overlaps.get(18)); + assertTrue(overlaps.get(19)); /* - * 3:1 case (codon or part codon to peptide) + * reverse range 1:1 (e.g. reverse strand gene to transcript) */ ranges.clear(); - ranges.add(new int[] { 0, 5 }); // 2 whole codons - ranges.add(new int[] { 7, 11 }); // part + whole codon - ranges.add(new int[] { 15, 19 }); // whole + part codon - ranges.add(new int[] { 23, 27 }); // part + part codon - ranges.add(new int[] { 30, 30 }); // first base of codon - ranges.add(new int[] { 31, 31 }); // second base of codon - ranges.add(new int[] { 32, 32 }); // third base of codon - MapList.mapWords(ranges, 3, 1); - assertEquals(ranges.size(), 7); - assertArrayEquals(new int[] { 0, 1 }, ranges.get(0)); - assertArrayEquals(new int[] { 2, 3 }, ranges.get(1)); - assertArrayEquals(new int[] { 5, 6 }, ranges.get(2)); - assertArrayEquals(new int[] { 7, 9 }, ranges.get(3)); - assertArrayEquals(new int[] { 10, 10 }, ranges.get(4)); - assertArrayEquals(new int[] { 10, 10 }, ranges.get(5)); - assertArrayEquals(new int[] { 10, 10 }, ranges.get(6)); - } + ranges.add(new int[] { 20, 10 }); + overlaps = MapList.getMappedOffsetsForPositions(12, 13, ranges, 1, 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(7)); + assertTrue(overlaps.get(8)); - @Test(groups = { "Functional" }) - public void testLocateInFrom2() - { /* - * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8 + * reverse range 3:1 (e.g. reverse strand gene to peptide) + * from EMBL:J03321 to P0CE20 */ - MapList ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 }, - new int[] - { 1, 1, 3, 4, 6, 8 }, 3, 1); - assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom2(1, 1)); - assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom2(1, 3)); - assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom2(1, 4)); - assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom2(3, 4)); + ranges.clear(); + ranges.add(new int[] { 1480, 488 }); + overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3, + 1); + // 1460 is the end of the 7th codon + assertEquals(1, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + // add one base (part codon) + overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3, + 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + // add second base (part codon) + overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3, + 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + // add third base (whole codon) + overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3, + 1); + assertEquals(2, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + // add one more base (part codon) + overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3, + 1); + assertEquals(3, overlaps.cardinality()); + assertTrue(overlaps.get(6)); + assertTrue(overlaps.get(7)); + assertTrue(overlaps.get(8)); } }