import java.util.ArrayList;
import java.util.Arrays;
+import java.util.BitSet;
import java.util.List;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
-import jalview.bin.Cache;
+import jalview.bin.Console;
import jalview.gui.JvOptionPane;
public class MapListTest
@BeforeClass(alwaysRun = true)
public void setUp()
{
- Cache.initLogger();
+ Console.initLogger();
}
@BeforeClass(alwaysRun = true)
* no overlap
*/
assertNull(ml.locateInFrom(0, 0));
+
}
/**
assertEquals("[10, 10, 12, 12, 14, 14]",
Arrays.toString(ml.locateInFrom(3, 3)));
assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4)));
+
+ /*
+ * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8
+ */
+ ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 },
+ new int[]
+ { 1, 1, 3, 4, 6, 8 }, 3, 1);
+ assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom(1, 1));
+ assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom(1, 3));
+ assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom(1, 4));
+ assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom(3, 4));
+
}
@Test(groups = { "Functional" })
}
/**
+ * Tests for method that locates the overlap of the ranges in the 'from' map
+ * for given range in the 'to' map
+ */
+ @Test(groups = { "Functional" })
+ public void testGetOverlapsInFrom_withIntrons()
+ {
+ /*
+ * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [16, 17, 18] i.e.
+ * 2-3, 5-7, 9-10, 12-12, 14-14, 16-18
+ */
+ int[] codons = { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 };
+ int[] protein = { 11, 14 };
+ MapList ml = new MapList(codons, protein, 3, 1);
+
+ assertEquals("[2, 3, 5, 5]",
+ Arrays.toString(ml.getOverlapsInFrom(11, 11)));
+ assertEquals("[2, 3, 5, 7, 9, 9]",
+ Arrays.toString(ml.getOverlapsInFrom(11, 12)));
+ // out of range 5' :
+ assertEquals("[2, 3, 5, 7, 9, 9]",
+ Arrays.toString(ml.getOverlapsInFrom(8, 12)));
+ // out of range 3' :
+ assertEquals("[10, 10, 12, 12, 14, 14, 16, 18]",
+ Arrays.toString(ml.getOverlapsInFrom(13, 16)));
+ // out of range both :
+ assertEquals("[2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18]",
+ Arrays.toString(ml.getOverlapsInFrom(1, 16)));
+ // no overlap:
+ assertNull(ml.getOverlapsInFrom(20, 25));
+ }
+
+ /**
+ * Tests for method that locates the overlap of the ranges in the 'to' map for
+ * given range in the 'from' map
+ */
+ @Test(groups = { "Functional" })
+ public void testGetOverlapsInTo_withIntrons()
+ {
+ /*
+ * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [17, 18, 19] i.e.
+ * 2-3, 5-7, 9-10, 12-12, 14-14, 17-19
+ */
+ int[] codons = { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 17, 19 };
+ /*
+ * Mapped proteins at positions 1, 3, 4, 6 in the sequence
+ */
+ int[] protein = { 1, 1, 3, 4, 6, 6 };
+ MapList ml = new MapList(codons, protein, 3, 1);
+
+ /*
+ * Can't map from an unmapped position
+ */
+ assertNull(ml.getOverlapsInTo(1, 1));
+ assertNull(ml.getOverlapsInTo(4, 4));
+ assertNull(ml.getOverlapsInTo(15, 16));
+
+ /*
+ * nor from a range that includes no mapped position (exon)
+ */
+ assertNull(ml.getOverlapsInTo(15, 16));
+
+ // end of codon 1 maps to first peptide
+ assertEquals("[1, 1]", Arrays.toString(ml.getOverlapsInTo(2, 2)));
+ // end of codon 1 and start of codon 2 maps to first 2 peptides
+ assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(3, 7)));
+
+ // range overlaps 5' end of dna:
+ assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(1, 6)));
+ assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.getOverlapsInTo(1, 8)));
+
+ // range overlaps 3' end of dna:
+ assertEquals("[6, 6]", Arrays.toString(ml.getOverlapsInTo(17, 24)));
+ assertEquals("[6, 6]", Arrays.toString(ml.getOverlapsInTo(16, 24)));
+
+ // dna positions 8, 11 are intron but include end of exon 2 and start of
+ // exon 3
+ assertEquals("[3, 4]", Arrays.toString(ml.getOverlapsInTo(8, 11)));
+ }
+
+ /**
* Tests for method that locates ranges in the 'to' map for given range in the
* 'from' map.
*/
*/
assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(1, 13)));
assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(-1, 2)));
-
+
/*
* no overlap
*/
assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 2)));
assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 4)));
assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 4)));
-
+
/*
* no overlap
*/
ranges.clear();
ranges.add(new int[] { 10, 30 });
ranges.add(new int[] { 15, 25 });
+
merged = MapList.coalesceRanges(ranges);
assertEquals(2, merged.size());
assertArrayEquals(new int[] { 10, 30 }, merged.get(0));
* no overlap
*/
assertNull(ml.locateInTo(0, 0));
-
+
/*
* partial overlap
*/
ml = new MapList(gene, cds, 1, 1);
assertEquals("[13203, 13204]",
Arrays.toString(ml.locateInTo(13468, 13468)));
-
+
/*
* gene to protein
* the base at 13468 is in the codon for 4401N and also 4402R
}
/**
- * Tests for helper method that adds any overlap (plus offset) to a list of
+ * Tests for helper method that adds any overlap (plus offset) to a set of
* overlaps
*/
@Test(groups = { "Functional" })
- public void testAddOverlap()
+ public void testAddOffsetPositions()
{
- List<int[]> overlaps = new ArrayList<>();
- int[] candidate = new int[] { 10, 19 };
- MapList.addOverlap(overlaps, 5, candidate, 20, 30); // doesn't overlap
- assertTrue(overlaps.isEmpty());
- MapList.addOverlap(overlaps, 5, candidate, 31, 40); // doesn't overlap
- assertTrue(overlaps.isEmpty());
+ List<int[]> mapped = new ArrayList<>();
+ int[] range = new int[] { 10, 20 };
+ BitSet offsets = new BitSet();
- /*
- * 10-19 overlaps 15-25 at 15-19, which is offset 5-9 in 10-19
- * + 5 initial offset
- */
- MapList.addOverlap(overlaps, 5, candidate, 15, 25);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 10, 14 }, overlaps.get(0));
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // nothing marked for overlap
+
+ offsets.set(11);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // no offset 11 in range
+
+ offsets.set(4, 6); // this sets bits 4 and 5
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] { 14, 15 }, mapped.get(0));
+
+ mapped.clear();
+ offsets.set(10);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 14, 15 }, mapped.get(0));
+ assertArrayEquals(new int[] { 20, 20 }, mapped.get(1));
/*
- * reverse range overlap:
- * 300-20 overlaps 15-25 at 25-20, which is offset 275-280 in 300-20
- * + 8 initial offset
+ * reverse range
*/
- overlaps.clear();
- candidate = new int[] { 300, 20 };
- MapList.addOverlap(overlaps, 8, candidate, 15, 25);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 283, 288 }, overlaps.get(0));
+ range = new int[] { 20, 10 };
+ mapped.clear();
+ offsets.clear();
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // nothing marked for overlap
+ offsets.set(11);
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertTrue(mapped.isEmpty()); // no offset 11 in range
+ offsets.set(0);
+ offsets.set(10);
+ offsets.set(6, 8); // sets bits 6 and 7
+ MapList.addOffsetPositions(mapped, 0, range, offsets);
+ assertEquals(3, mapped.size());
+ assertArrayEquals(new int[] { 20, 20 }, mapped.get(0));
+ assertArrayEquals(new int[] { 14, 13 }, mapped.get(1));
+ assertArrayEquals(new int[] { 10, 10 }, mapped.get(2));
}
@Test(groups = { "Functional" })
- public void testFindOverlapPositions()
+ public void testGetPositionsForOffsets()
{
List<int[]> ranges = new ArrayList<>();
- List<int[]> overlaps = MapList.findOverlapPositions(ranges, 20, 30);
- assertTrue(overlaps.isEmpty()); // nothing to overlap
+ BitSet offsets = new BitSet();
+ List<int[]> mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertTrue(mapped.isEmpty()); // no ranges and no offsets!
- ranges.add(new int[] { 15, 25 });
- overlaps = MapList.findOverlapPositions(ranges, 5, 10);
- assertTrue(overlaps.isEmpty()); // no overlap
+ offsets.set(5, 1000);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertTrue(mapped.isEmpty()); // no ranges
- overlaps = MapList.findOverlapPositions(ranges, 20, 20);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 5, 5 }, overlaps.get(0));
+ /*
+ * one range with overlap of offsets
+ */
+ ranges.add(new int[] { 15, 25 });
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] { 20, 25 }, mapped.get(0));
- overlaps = MapList.findOverlapPositions(ranges, 5, 19);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 0, 4 }, overlaps.get(0));
+ /*
+ * two ranges
+ */
+ ranges.add(new int[] { 300, 320 });
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 20, 25 }, mapped.get(0));
+ assertArrayEquals(new int[] { 300, 320 }, mapped.get(1));
- ranges.add(new int[] { 35, 45 });
- overlaps = MapList.findOverlapPositions(ranges, 26, 34);
- assertTrue(overlaps.isEmpty());
+ /*
+ * boundary case - right end of first range overlaps
+ */
+ offsets.clear();
+ offsets.set(10);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(1, mapped.size());
+ assertArrayEquals(new int[] { 25, 25 }, mapped.get(0));
/*
- * 24-37 overlaps the end of 15-25 and the start of 35-45
- * - offset positions are contiguous in the map so merged
+ * boundary case - left end of second range overlaps
*/
- overlaps = MapList.findOverlapPositions(ranges, 24, 37);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 9, 13 }, overlaps.get(0));
+ offsets.set(11);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 25, 25 }, mapped.get(0));
+ assertArrayEquals(new int[] { 300, 300 }, mapped.get(1));
/*
- * EMBL:MN908947 https://www.ebi.ac.uk/ena/browser/api/embl/MN908947
- * (Covid-SARS-2) CDS mapping with 'slippage'
- * (base 13468 is used twice in transcription)
+ * offsets into a circular range are reported in
+ * the order in which they are traversed
*/
ranges.clear();
- ranges.add(new int[] { 266, 13468 });
- ranges.add(new int[] { 13468, 21555 });
-
- // 13468 occupies two offsets in the range list
- overlaps = MapList.findOverlapPositions(ranges, 13468, 13468);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 13202, 13203 }, overlaps.get(0));
- overlaps = MapList.findOverlapPositions(ranges, 13469, 13470);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 13204, 13205 }, overlaps.get(0));
+ ranges.add(new int[] { 100, 150 });
+ ranges.add(new int[] { 60, 80 });
+ offsets.clear();
+ offsets.set(45, 55); // sets bits 45 to 54
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 145, 150 }, mapped.get(0)); // offsets 45-50
+ assertArrayEquals(new int[] { 60, 63 }, mapped.get(1)); // offsets 51-54
/*
- * EMBL:J03321 https://www.ebi.ac.uk/ena/browser/api/embl/J03321
- * circular dna: CDS at [7022-7502, 1-437]
- * = 481 + 437 = 918 bases = 305 aa's + stop codon
+ * reverse range overlap is reported with start < end
*/
ranges.clear();
- ranges.add(new int[] { 7022, 7502 });
- ranges.add(new int[] { 1, 437 });
-
- overlaps = MapList.findOverlapPositions(ranges, 438, 7021);
- assertTrue(overlaps.isEmpty());
-
- // overlap first exon only:
- overlaps = MapList.findOverlapPositions(ranges, 7000, 7100);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 0, 78 }, overlaps.get(0));
-
- // overlap second exon only: offset to mapping includes first exon
- overlaps = MapList.findOverlapPositions(ranges, 400, 500);
- assertEquals(1, overlaps.size());
- assertArrayEquals(new int[] { 880, 917 }, overlaps.get(0));
-
- // overlap both exons: first exon overlap precedes second exon overlap
- // offsets of overlaps are not contiguous
- overlaps = MapList.findOverlapPositions(ranges, 200, 7500);
- assertEquals(2, overlaps.size());
- // first overlap is offsets of 7022-7500 in exon 1 (7022-7502):
- assertArrayEquals(new int[] { 0, 478 }, overlaps.get(0));
- // second overlap offsets is (first exon length = 481) + (200-437)
- assertArrayEquals(new int[] { 680, 917 }, overlaps.get(1));
+ ranges.add(new int[] { 4321, 4000 });
+ offsets.clear();
+ offsets.set(20, 22); // sets bits 20 and 21
+ offsets.set(30);
+ mapped = MapList.getPositionsForOffsets(ranges, offsets);
+ assertEquals(2, mapped.size());
+ assertArrayEquals(new int[] { 4301, 4300 }, mapped.get(0));
+ assertArrayEquals(new int[] { 4291, 4291 }, mapped.get(1));
}
@Test(groups = { "Functional" })
- public void testMapWords()
+ public void testGetMappedOffsetsForPositions()
{
- List<int[]> ranges = new ArrayList<>();
-
/*
- * 1:1 (trivial) case
+ * start by verifying the examples in the method's Javadoc!
*/
- ranges.add(new int[] { 2, 4 });
- ranges.add(new int[] { 6, 9 });
- MapList.mapWords(ranges, 1, 1);
- assertEquals(ranges.size(), 2);
- assertArrayEquals(new int[] { 2, 4 }, ranges.get(0));
- assertArrayEquals(new int[] { 6, 9 }, ranges.get(1));
+ List<int[]> ranges = new ArrayList<>();
+ ranges.add(new int[] { 10, 20 });
+ ranges.add(new int[] { 31, 40 });
+ BitSet overlaps = MapList.getMappedOffsetsForPositions(1, 9, ranges, 1,
+ 1);
+ assertTrue(overlaps.isEmpty());
+ overlaps = MapList.getMappedOffsetsForPositions(1, 11, ranges, 1, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(0));
+ assertTrue(overlaps.get(1));
+ overlaps = MapList.getMappedOffsetsForPositions(15, 35, ranges, 1, 1);
+ assertEquals(11, overlaps.cardinality());
+ for (int i = 5; i <= 11; i++)
+ {
+ assertTrue(overlaps.get(i));
+ }
- /*
- * 1:3 case (peptide to codon ranges)
- */
- MapList.mapWords(ranges, 1, 3);
- assertEquals(ranges.size(), 2);
- assertArrayEquals(new int[] { 6, 14 }, ranges.get(0));
- assertArrayEquals(new int[] { 18, 29 }, ranges.get(1));
+ ranges.clear();
+ ranges.add(new int[] { 1, 200 });
+ overlaps = MapList.getMappedOffsetsForPositions(9, 9, ranges, 1, 3);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(24));
+ assertTrue(overlaps.get(25));
+ assertTrue(overlaps.get(26));
+
+ ranges.clear();
+ ranges.add(new int[] { 101, 150 });
+ ranges.add(new int[] { 171, 180 });
+ overlaps = MapList.getMappedOffsetsForPositions(101, 102, ranges, 3, 1);
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(0));
+ overlaps = MapList.getMappedOffsetsForPositions(150, 171, ranges, 3, 1);
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(16));
+
+ ranges.clear();
+ ranges.add(new int[] { 101, 150 });
+ ranges.add(new int[] { 21, 30 });
+ overlaps = MapList.getMappedOffsetsForPositions(24, 40, ranges, 3, 1);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(17));
+ assertTrue(overlaps.get(18));
+ assertTrue(overlaps.get(19));
/*
- * 3:1 case (codon or part codon to peptide)
+ * reverse range 1:1 (e.g. reverse strand gene to transcript)
*/
ranges.clear();
- ranges.add(new int[] { 0, 5 }); // 2 whole codons
- ranges.add(new int[] { 7, 11 }); // part + whole codon
- ranges.add(new int[] { 15, 19 }); // whole + part codon
- ranges.add(new int[] { 23, 27 }); // part + part codon
- ranges.add(new int[] { 30, 30 }); // first base of codon
- ranges.add(new int[] { 31, 31 }); // second base of codon
- ranges.add(new int[] { 32, 32 }); // third base of codon
- MapList.mapWords(ranges, 3, 1);
- assertEquals(ranges.size(), 7);
- assertArrayEquals(new int[] { 0, 1 }, ranges.get(0));
- assertArrayEquals(new int[] { 2, 3 }, ranges.get(1));
- assertArrayEquals(new int[] { 5, 6 }, ranges.get(2));
- assertArrayEquals(new int[] { 7, 9 }, ranges.get(3));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(4));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(5));
- assertArrayEquals(new int[] { 10, 10 }, ranges.get(6));
- }
+ ranges.add(new int[] { 20, 10 });
+ overlaps = MapList.getMappedOffsetsForPositions(12, 13, ranges, 1, 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(7));
+ assertTrue(overlaps.get(8));
- @Test(groups = { "Functional" })
- public void testLocateInFrom2()
- {
/*
- * codons at 11-16, 21-26, 31-36 mapped to peptide positions 1, 3-4, 6-8
+ * reverse range 3:1 (e.g. reverse strand gene to peptide)
+ * from EMBL:J03321 to P0CE20
*/
- MapList ml = new MapList(new int[] { 11, 16, 21, 26, 31, 36 },
- new int[]
- { 1, 1, 3, 4, 6, 8 }, 3, 1);
- assertArrayEquals(new int[] { 11, 13 }, ml.locateInFrom2(1, 1));
- assertArrayEquals(new int[] { 11, 16 }, ml.locateInFrom2(1, 3));
- assertArrayEquals(new int[] { 11, 16, 21, 23 }, ml.locateInFrom2(1, 4));
- assertArrayEquals(new int[] { 14, 16, 21, 23 }, ml.locateInFrom2(3, 4));
+ ranges.clear();
+ ranges.add(new int[] { 1480, 488 });
+ overlaps = MapList.getMappedOffsetsForPositions(1460, 1460, ranges, 3,
+ 1);
+ // 1460 is the end of the 7th codon
+ assertEquals(1, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ // add one base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1459, 1460, ranges, 3,
+ 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add second base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1458, 1460, ranges, 3,
+ 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add third base (whole codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1457, 1460, ranges, 3,
+ 1);
+ assertEquals(2, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ // add one more base (part codon)
+ overlaps = MapList.getMappedOffsetsForPositions(1456, 1460, ranges, 3,
+ 1);
+ assertEquals(3, overlaps.cardinality());
+ assertTrue(overlaps.get(6));
+ assertTrue(overlaps.get(7));
+ assertTrue(overlaps.get(8));
}
}