import java.util.ArrayList;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.List;
import org.testng.annotations.BeforeClass;
* no overlap
*/
assertNull(ml.locateInFrom(0, 0));
+
}
/**
assertEquals("[10, 10, 12, 12, 14, 14]",
Arrays.toString(ml.locateInFrom(3, 3)));
assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4)));
+
+ /*
+ * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8
+ */
+ ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 },
+ new int[]
+ { 1, 1, 3, 4, 6, 8 }, 3, 1);
+ assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom(1, 1));
+ assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom(1, 3));
+ assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom(1, 4));
+ assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom(3, 4));
+
}
@Test(groups = { "Functional" })
toRanges = compound.getToRanges();
assertEquals(2, toRanges.size());
assertArrayEquals(new int[] { 931, 901 }, toRanges.get(0));
- assertArrayEquals(new int[] { 600, 582 }, toRanges.get(1));
+ assertArrayEquals(new int[] { 600, 582}, toRanges.get(1));
/*
* 1:1 plus 1:3 should result in 1:3
}
/**
- * Tests for helper method that adds any overlap (plus offset) to a list of
+ * Tests for helper method that adds any overlap (plus offset) to a set of
* overlaps
*/
@Test(groups = { "Functional" })
- public void testAddOverlap()
+ public void testAddOffsetPositions()
{
- List<int[]> overlaps = new ArrayList<>();
- int[] candidate = new int[] { 10, 19 };
- MapList.addOverlap(overlaps, 5, candidate, 20, 30); // doesn't overlap
- assertTrue(overlaps.isEmpty());
- MapList.addOverlap(overlaps, 5, candidate, 31, 40); // doesn't overlap
- assertTrue(overlaps.isEmpty());
+ List<int[]> mapped = new ArrayList<>();
+ int[] range = new int[] {10, 20};
+ BitSet offsets = new BitSet();
- /*
- * 10-19 overlaps 15-25 at 15-19, which is offset 5-9 in 10-19
- * + 5 initial offset
- */
- MapList.addOverlap(overlaps, 5, candidate, 15, 25);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 10, 14 }, overlaps.get(0));
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // nothing marked for overlap
+
+ offsets.set(11);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // no offset 11 in range
+
+ offsets.set(4, 6); // this sets bits 4 and 5
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] { 14, 15 }, mapped.get(0));
+
+ mapped.clear();
+ offsets.set(10);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 14, 15 }, mapped.get(0));
+ assertArrayEquals(new int[] { 20, 20 }, mapped.get(1));
/*
- * reverse range overlap:
- * 300-20 overlaps 15-25 at 25-20, which is offset 275-280 in 300-20
- * + 8 initial offset
+ * reverse range
*/
- overlaps.clear();
- candidate = new int[] { 300, 20 };
- MapList.addOverlap(overlaps, 8, candidate, 15, 25);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 283, 288 }, overlaps.get(0));
+ range = new int[] { 20, 10 };
+ mapped.clear();
+ offsets.clear();
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // nothing marked for overlap
+ offsets.set(11);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // no offset 11 in range
+ offsets.set(0);
+ offsets.set(10);
+ offsets.set(6, 8); // sets bits 6 and 7
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(3, mapped.size());
+ assertArrayEquals(new int[] { 20, 20 }, mapped.get(0));
+ assertArrayEquals(new int[] { 14, 13 }, mapped.get(1));
+ assertArrayEquals(new int[] { 10, 10 }, mapped.get(2));
}
-
+
@Test(groups = { "Functional" })
- public void testFindOverlapPositions()
+ public void testGetPositionsForOffsets()
{
List<int[]> ranges = new ArrayList<>();
- List<int[]> overlaps = MapList.findOverlapPositions(ranges, 20, 30);
- assertTrue(overlaps.isEmpty()); // nothing to overlap
-
- ranges.add(new int[] { 15, 25 });
- overlaps = MapList.findOverlapPositions(ranges, 5, 10);
- assertTrue(overlaps.isEmpty()); // no overlap
-
- overlaps = MapList.findOverlapPositions(ranges, 20, 20);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 5, 5 }, overlaps.get(0));
-
- overlaps = MapList.findOverlapPositions(ranges, 5, 19);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 0, 4 }, overlaps.get(0));
-
- ranges.add(new int[] { 35, 45 });
- overlaps = MapList.findOverlapPositions(ranges, 26, 34);
- assertTrue(overlaps.isEmpty());
-
+ BitSet offsets = new BitSet();
+ List<int[]> mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertTrue(mapped.isEmpty()); // no ranges and no offsets!
+
+ offsets.set(5, 1000);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertTrue(mapped.isEmpty()); // no ranges
+
/*
- * 24-37 overlaps the end of 15-25 and the start of 35-45
- * - offset positions are contiguous in the map so merged
+ * one range with overlap of offsets
*/
- overlaps = MapList.findOverlapPositions(ranges, 24, 37);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 9, 13 }, overlaps.get(0));
-
+ ranges.add(new int[] {15, 25});
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] {20, 25}, mapped.get(0));
+
/*
- * EMBL:MN908947 https://www.ebi.ac.uk/ena/browser/api/embl/MN908947
- * (Covid-SARS-2) CDS mapping with 'slippage'
- * (base 13468 is used twice in transcription)
+ * two ranges
*/
- ranges.clear();
- ranges.add(new int[] { 266, 13468 });
- ranges.add(new int[] { 13468, 21555 });
-
- // 13468 occupies two offsets in the range list
- overlaps = MapList.findOverlapPositions(ranges, 13468, 13468);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 13202, 13203 }, overlaps.get(0));
- overlaps = MapList.findOverlapPositions(ranges, 13469, 13470);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 13204, 13205 }, overlaps.get(0));
-
+ ranges.add(new int[] {300, 320});
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {20, 25}, mapped.get(0));
+ assertArrayEquals(new int[] {300, 320}, mapped.get(1));
+
/*
- * EMBL:J03321 https://www.ebi.ac.uk/ena/browser/api/embl/J03321
- * circular dna: CDS at [7022-7502, 1-437]
- * = 481 + 437 = 918 bases = 305 aa's + stop codon
+ * boundary case - right end of first range overlaps
*/
- ranges.clear();
- ranges.add(new int[] { 7022, 7502 });
- ranges.add(new int[] { 1, 437 });
-
- overlaps = MapList.findOverlapPositions(ranges, 438, 7021);
- assertTrue(overlaps.isEmpty());
-
- // overlap first exon only:
- overlaps = MapList.findOverlapPositions(ranges, 7000, 7100);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 0, 78 }, overlaps.get(0));
-
- // overlap second exon only: offset to mapping includes first exon
- overlaps = MapList.findOverlapPositions(ranges, 400, 500);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 880, 917 }, overlaps.get(0));
-
- // overlap both exons: first exon overlap precedes second exon overlap
- // offsets of overlaps are not contiguous
- overlaps = MapList.findOverlapPositions(ranges, 200, 7500);
- assertEquals(2, overlaps.size());
- // first overlap is offsets of 7022-7500 in exon 1 (7022-7502):
- assertArrayEquals(new int[] { 0, 478 }, overlaps.get(0));
- // second overlap offsets is (first exon length = 481) + (200-437)
- assertArrayEquals(new int[] { 680, 917 }, overlaps.get(1));
- }
-
- @Test(groups = { "Functional" })
- public void testMapWords()
- {
- List<int[]> ranges = new ArrayList<>();
-
+ offsets.clear();
+ offsets.set(10);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] {25, 25}, mapped.get(0));
+
/*
- * 1:1 (trivial) case
+ * boundary case - left end of second range overlaps
*/
- ranges.add(new int[] { 2, 4 });
- ranges.add(new int[] { 6, 9 });
- MapList.mapWords(ranges, 1, 1);
- assertEquals(ranges.size(), 2);
- assertArrayEquals(new int[] { 2, 4 }, ranges.get(0));
- assertArrayEquals(new int[] { 6, 9 }, ranges.get(1));
-
+ offsets.set(11);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {25, 25}, mapped.get(0));
+ assertArrayEquals(new int[] {300, 300}, mapped.get(1));
+
/*
- * 1:3 case (peptide to codon ranges)
+ * offsets into a circular range are reported in
+ * the order in which they are traversed
*/
- MapList.mapWords(ranges, 1, 3);
- assertEquals(ranges.size(), 2);
- assertArrayEquals(new int[] { 6, 14 }, ranges.get(0));
- assertArrayEquals(new int[] { 18, 29 }, ranges.get(1));
+ ranges.clear();
+ ranges.add(new int[] {100, 150});
+ ranges.add(new int[] {60, 80});
+ offsets.clear();
+ offsets.set(45, 55); // sets bits 45 to 54
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {145, 150}, mapped.get(0)); // offsets 45-50
+ assertArrayEquals(new int[] {60, 63}, mapped.get(1)); // offsets 51-54
/*
- * 3:1 case (codon or part codon to peptide)
+ * reverse range overlap is reported with start < end
*/
ranges.clear();
- ranges.add(new int[] { 0, 5 }); // 2 whole codons
- ranges.add(new int[] { 7, 11 }); // part + whole codon
- ranges.add(new int[] { 15, 19 }); // whole + part codon
- ranges.add(new int[] { 23, 27 }); // part + part codon
- ranges.add(new int[] { 30, 30 }); // first base of codon
- ranges.add(new int[] { 31, 31 }); // second base of codon
- ranges.add(new int[] { 32, 32 }); // third base of codon
- MapList.mapWords(ranges, 3, 1);
- assertEquals(ranges.size(), 7);
- assertArrayEquals(new int[] { 0, 1 }, ranges.get(0));
- assertArrayEquals(new int[] { 2, 3 }, ranges.get(1));
- assertArrayEquals(new int[] { 5, 6 }, ranges.get(2));
- assertArrayEquals(new int[] { 7, 9 }, ranges.get(3));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(4));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(5));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(6));
+ ranges.add(new int[] {4321, 4000});
+ offsets.clear();
+ offsets.set(20, 22); // sets bits 20 and 21
+ offsets.set(30);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] {4301, 4300}, mapped.get(0));
+ assertArrayEquals(new int[] {4291, 4291}, mapped.get(1));
}
-
+
@Test(groups = { "Functional" })
- public void testLocateInFrom2()
+ public void testGetMappedOffsetsForPositions()
{
/*
- * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8
+ * start by verifying the examples in the method's Javadoc!
*/
- MapList ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 },
- new int[]
- { 1, 1, 3, 4, 6, 8 }, 3, 1);
- assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom2(1, 1));
- assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom2(1, 3));
- assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom2(1, 4));
- assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom2(3, 4));
+ List<int[]> ranges = new ArrayList<>();
+ ranges.add(new int[] {10, 20});
+ ranges.add(new int[] {31, 40});
+ BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1, 1);
+ assertTrue(overlaps.isEmpty());
+ overlaps = MapList.getMappedOffsetsForPositions(1, 11, ranges, 1, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(0));
+ assertTrue(overlaps.get(1));
+ overlaps = MapList.getMappedOffsetsForPositions(15, 35, ranges, 1, 1);
+ assertEquals(11, overlaps.cardinality());
+ for (int i = 5 ; i <= 11 ; i++)
+ {
+ assertTrue(overlaps.get(i));
+ }
+
+ ranges.clear();
+ ranges.add(new int[] {1, 200});
+ overlaps = MapList.getMappedOffsetsForPositions(9, 9, ranges, 1, 3);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(24));
+ assertTrue(overlaps.get(25));
+ assertTrue(overlaps.get(26));
+
+ ranges.clear();
+ ranges.add(new int[] {101, 150});
+ ranges.add(new int[] {171, 180});
+ overlaps = MapList.getMappedOffsetsForPositions(101, 102, ranges, 3, 1);
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(0));
+ overlaps = MapList.getMappedOffsetsForPositions(150, 171, ranges, 3, 1);
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(16));
+
+ ranges.clear();
+ ranges.add(new int[] {101, 150});
+ ranges.add(new int[] {21, 30});
+ overlaps = MapList.getMappedOffsetsForPositions(24, 40, ranges, 3, 1);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(17));
+ assertTrue(overlaps.get(18));
+ assertTrue(overlaps.get(19));
+
+ /*
+ * reverse range 1:1 (e.g. reverse strand gene to transcript)
+ */
+ ranges.clear();
+ ranges.add(new int[] {20, 10});
+ overlaps = MapList.getMappedOffsetsForPositions(12, 13, ranges, 1, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(7));
+ assertTrue(overlaps.get(8));
+
+ /*
+ * reverse range 3:1 (e.g. reverse strand gene to peptide)
+ * from EMBL:J03321 to P0CE20
+ */
+ ranges.clear();
+ ranges.add(new int[] {1480, 488});
+ overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3, 1);
+ // 1460 is the end of the 7th codon
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ // add one base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add second base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add third base (whole codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add one more base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3, 1);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ assertTrue(overlaps.get(8));
}
}