{
List<SubList> sublists = new ArrayList<SubList>();
+ if (ranges.isEmpty())
+ {
+ return sublists;
+ }
+
int listStartIndex = 0;
long lastEndPos = Long.MAX_VALUE;
{
NCNode<T> subrange = subranges.get(j);
- if (end < subrange.getStart() && !overlapping && !enclosing)
+ if (end < subrange.getBegin() && !overlapping && !enclosing)
{
/*
* new entry lies between subranges j-1 j
return true;
}
- if (subrange.getStart() <= start && subrange.getEnd() >= end)
+ if (subrange.getBegin() <= start && subrange.getEnd() >= end)
{
/*
* push new entry inside this subrange as it encloses it
return true;
}
- if (start <= subrange.getStart())
+ if (start <= subrange.getBegin())
{
if (end >= subrange.getEnd())
{
for (int i = candidateIndex; i < subranges.size(); i++)
{
NCNode<T> candidate = subranges.get(i);
- if (candidate.getStart() > to)
+ if (candidate.getBegin() > to)
{
/*
* we are past the end of our target range
for (int i = candidateIndex; i < subranges.size(); i++)
{
NCNode<T> candidate = subranges.get(i);
- if (candidate.getStart() > to)
+ if (candidate.getBegin() > to)
{
/*
* we are past the end of our target range
}
/**
- * Returns a string representation of the data where containment is shown bgy
+ * Returns a string representation of the data where containment is shown by
* indentation on new lines
*
* @return
int offset = 0;
int indent = 2;
prettyPrint(sb, offset, indent);
+ sb.append(System.lineSeparator());
return sb.toString();
}
int lastStart = start;
for (NCNode<T> subrange : subranges)
{
- if (subrange.getStart() < lastStart)
+ if (subrange.getBegin() < lastStart)
{
System.err.println("error in NCList: range " + subrange.toString()
+ " starts before " + lastStart);
+ " ends after " + end);
return false;
}
- lastStart = subrange.getStart();
+ lastStart = subrange.getBegin();
if (!subrange.isValid())
{
*/
public int getStart()
{
- return subranges.isEmpty() ? 0 : subranges.get(0).getStart();
+ return subranges.isEmpty() ? 0 : subranges.get(0).getBegin();
}
/**
{
/*
* if the subrange is rooted on this entry, promote its
- * subregions (if any) to replace the subrange here
+ * subregions (if any) to replace the subrange here;
+ * NB have to resort subranges after doing this since e.g.
+ * [10-30 [12-20 [16-18], 13-19]]
+ * after deleting 12-20, 16-18 is promoted to sibling of 13-19
+ * but should follow it in the list of subranges of 10-30
*/
subranges.remove(i);
if (subRegions != null)
{
subranges.addAll(i, subRegions.subranges);
+ Collections.sort(subranges, intervalSorter);
}
size--;
return true;
}
return false;
}
-
- /**
- * Answers true if this contains no ranges
- *
- * @return
- */
- public boolean isEmpty()
- {
- return getSize() == 0;
- }
-
- /**
- * Answer the list of subranges held in this NCList
- *
- * @return
- */
- List<NCNode<T>> getSubregions()
- {
- return subranges;
- }
}
}
/**
- * Do a number of pseudo-random (reproducible) builds of an NCList, with
- * checks for validity of the data structure, and searches for (pseudo-random)
- * overlap ranges, for greater (but not perfect!) confidence that corner cases
- * are being handled correctly.
+ * Do a number of pseudo-random (reproducible) builds of an NCList, to
+ * exercise as many methods of the class as possible while generating the
+ * range of possible structure topologies
+ * <ul>
+ * <li>verify that add adds an entry and increments size</li>
+ * <li>...except where the entry is already contained (by equals test)</li>
+ * <li>verify that the structure is valid at all stages of construction</li>
+ * <li>generate, run and verify a range of overlap queries</li>
+ * <li>tear down the structure by deleting entries, verifying correctness at
+ * each stage</li>
+ * </ul>
*/
@Test(groups = "Functional", dataProvider = "scalesOfLife")
- public void testAdd_FindOverlaps_pseudoRandom(Integer scale)
+ public void test_pseudoRandom(Integer scale)
{
- NCList<Range> ncl = new NCList<Range>();
- int count = 0;
- List<Range> ranges = new ArrayList<Range>(scale);
+ NCList<SequenceFeature> ncl = new NCList<SequenceFeature>();
+ List<SequenceFeature> features = new ArrayList<SequenceFeature>(scale);
- for (int i = 0; i < scale; i++)
+ testAdd_pseudoRandom(scale, ncl, features);
+
+ /*
+ * sort the list of added ranges - this doesn't affect the test,
+ * just makes it easier to inspect the data in the debugger
+ */
+ Collections.sort(features, sorter);
+
+ testFindOverlaps_pseudoRandom(ncl, scale, features);
+
+ testDelete_pseudoRandom(ncl, features);
+ }
+
+ /**
+ * Pick randomly selected entries to delete in turn, checking the NCList size
+ * and validity at each stage, until it is empty
+ *
+ * @param ncl
+ * @param features
+ */
+ void testDelete_pseudoRandom(NCList<SequenceFeature> ncl,
+ List<SequenceFeature> features)
+ {
+ int deleted = 0;
+
+ while (!features.isEmpty())
+ {
+ assertEquals(ncl.getSize(), features.size());
+ int toDelete = random.nextInt(features.size());
+ SequenceFeature entry = features.get(toDelete);
+ String lastDeleted = "'" + entry.toString() + "'";
+ assertTrue(ncl.contains(entry), String.format(
+ "NCList doesn't contain entry [%d] %s!", deleted,
+ lastDeleted));
+ ncl.delete(entry);
+ assertFalse(ncl.contains(entry), String.format(
+ "NCList still contains deleted entry [%d] %s!", deleted,
+ lastDeleted));
+ features.remove(toDelete);
+ deleted++;
+
+ assertTrue(ncl.isValid(), String.format(
+ "NCList invalid after %d deletions, last deleted was %s",
+ deleted, lastDeleted));
+
+ /*
+ * brute force check that deleting one entry didn't delete any others
+ */
+ for (int i = 0; i < features.size(); i++)
+ {
+ SequenceFeature sf = features.get(i);
+ assertTrue(ncl.contains(sf), String.format(
+ "NCList doesn't contain entry [%d] %s after deleting %s!",
+ i, sf.toString(), lastDeleted));
+ }
+ }
+ assertEquals(ncl.getSize(), 0); // all gone
+ }
+
+ /**
+ * Randomly generate entries and add them to the NCList, checking its validity
+ * and size at each stage. A few entries should be duplicates (by equals test)
+ * so not get added.
+ *
+ * @param scale
+ * @param ncl
+ * @param features
+ */
+ void testAdd_pseudoRandom(Integer scale, NCList<SequenceFeature> ncl,
+ List<SequenceFeature> features)
+ {
+ int count = 0;
+ final int size = 50;
+
+ for (int i = 0; i < size; i++)
{
int r1 = random.nextInt(scale + 1);
int r2 = random.nextInt(scale + 1);
int from = Math.min(r1, r2);
int to = Math.max(r1, r2);
- Range range = new Range(from, to);
- ncl.add(range);
- ranges.add(range);
+
+ /*
+ * choice of two feature values means that occasionally an identical
+ * feature may be generated, in which case it should not be added
+ */
+ float value = (float) i % 2;
+ SequenceFeature feature = new SequenceFeature("Pfam", "", from, to,
+ value, "group");
+
+ /*
+ * add to NCList - with duplicate entries (by equals) disallowed
+ */
+ ncl.add(feature, false);
+ if (features.contains(feature))
+ {
+ System.out.println("Duplicate feature generated "
+ + feature.toString());
+ }
+ else
+ {
+ features.add(feature);
+ count++;
+ }
/*
* check list format is valid at each stage of its construction
*/
assertTrue(ncl.isValid(),
String.format("Failed for scale = %d, i=%d", scale, i));
- count++;
assertEquals(ncl.getSize(), count);
}
// System.out.println(ncl.prettyPrint());
-
- /*
- * sort the list of added ranges - this doesn't affect the test,
- * just makes it easier to inspect the data in the debugger
- */
- Collections.sort(ranges, sorter);
-
- testFindOverlaps(ncl, scale, ranges);
}
/**
* the NCList to query
* @param scale
* ncl maximal range is [0, scale]
- * @param ranges
+ * @param features
* a list of the ranges stored in ncl
*/
- protected void testFindOverlaps(NCList<Range> ncl, int scale,
- List<Range> ranges)
+ protected void testFindOverlaps_pseudoRandom(NCList<SequenceFeature> ncl,
+ int scale,
+ List<SequenceFeature> features)
{
int halfScale = scale / 2;
int minIterations = 20;
}
}
- verifyFindOverlaps(ncl, from, to, ranges);
+ verifyFindOverlaps(ncl, from, to, features);
}
}
* @param ncl
* @param from
* @param to
- * @param ranges
+ * @param features
*/
- protected void verifyFindOverlaps(NCList<Range> ncl, int from, int to,
- List<Range> ranges)
+ protected void verifyFindOverlaps(NCList<SequenceFeature> ncl, int from,
+ int to, List<SequenceFeature> features)
{
- List<Range> overlaps = ncl.findOverlaps(from, to);
+ List<SequenceFeature> overlaps = ncl.findOverlaps(from, to);
/*
* check returned entries do indeed overlap from-to range
*/
- for (Range r : overlaps)
+ for (ContiguousI sf : overlaps)
{
- int begin = r.getBegin();
- int end = r.getEnd();
+ int begin = sf.getBegin();
+ int end = sf.getEnd();
assertTrue(begin <= to && end >= from, String.format(
"[%d, %d] does not overlap query range [%d, %d]", begin, end,
from, to));
* check overlapping ranges are included in the results
* (the test above already shows non-overlapping ranges are not)
*/
- for (Range r : ranges)
+ for (ContiguousI sf : features)
{
- int begin = r.getBegin();
- int end = r.getEnd();
+ int begin = sf.getBegin();
+ int end = sf.getEnd();
if (begin <= to && end >= from)
{
- boolean found = overlaps.contains(r);
+ boolean found = overlaps.contains(sf);
assertTrue(found, String.format(
"[%d, %d] missing in query range [%d, %d]", begin, end,
from, to));
assertTrue(ncl.contains(sf5));
assertTrue(ncl.contains(sf6));
}
+
+ @Test(groups = "Functional")
+ public void testIsValid()
+ {
+ List<Range> ranges = new ArrayList<Range>();
+ Range r1 = new Range(40, 50);
+ ranges.add(r1);
+ NCList<Range> ncl = new NCList<Range>(ranges);
+ assertTrue(ncl.isValid());
+
+ Range r2 = new Range(42, 44);
+ ncl.add(r2);
+ assertTrue(ncl.isValid());
+ Range r3 = new Range(46, 48);
+ ncl.add(r3);
+ assertTrue(ncl.isValid());
+ Range r4 = new Range(43, 43);
+ ncl.add(r4);
+ assertTrue(ncl.isValid());
+
+ assertEquals(ncl.toString(), "[40-50 [42-44 [43-43], 46-48]]");
+ assertTrue(ncl.isValid());
+
+ r1.start = 43;
+ assertFalse(ncl.isValid()); // r2 not inside r1
+ r1.start = 40;
+ assertTrue(ncl.isValid());
+
+ r3.start = 41;
+ assertFalse(ncl.isValid()); // r3 should precede r2
+ r3.start = 46;
+ assertTrue(ncl.isValid());
+
+ r4.start = 41;
+ assertFalse(ncl.isValid()); // r4 not inside r2
+ r4.start = 43;
+ assertTrue(ncl.isValid());
+
+ r4.start = 44;
+ assertFalse(ncl.isValid()); // r4 has reverse range
+ }
+
+ @Test(groups = "Functional")
+ public void testPrettyPrint()
+ {
+ /*
+ * construct NCList from a list of ranges
+ * they are sorted then assembled into NCList subregions
+ * notice that 42-42 end up inside 41-46
+ */
+ List<Range> ranges = new ArrayList<Range>();
+ ranges.add(new Range(40, 50));
+ ranges.add(new Range(45, 55));
+ ranges.add(new Range(40, 45));
+ ranges.add(new Range(41, 46));
+ ranges.add(new Range(42, 42));
+ ranges.add(new Range(42, 42));
+ NCList<Range> ncl = new NCList<Range>(ranges);
+ assertTrue(ncl.isValid());
+ assertEquals(ncl.toString(),
+ "[40-50 [40-45], 41-46 [42-42 [42-42]], 45-55]");
+ String expected = "40-50\n 40-45\n41-46\n 42-42\n 42-42\n45-55\n";
+ assertEquals(ncl.prettyPrint(), expected);
+
+ /*
+ * repeat but now add ranges one at a time
+ * notice that 42-42 end up inside 40-50 so we get
+ * a different but equal valid NCList structure
+ */
+ ranges.clear();
+ ncl = new NCList<Range>(ranges);
+ ncl.add(new Range(40, 50));
+ ncl.add(new Range(45, 55));
+ ncl.add(new Range(40, 45));
+ ncl.add(new Range(41, 46));
+ ncl.add(new Range(42, 42));
+ ncl.add(new Range(42, 42));
+ assertTrue(ncl.isValid());
+ assertEquals(ncl.toString(),
+ "[40-50 [40-45 [42-42 [42-42]], 41-46], 45-55]");
+ expected = "40-50\n 40-45\n 42-42\n 42-42\n 41-46\n45-55\n";
+ assertEquals(ncl.prettyPrint(), expected);
+ }
+
+ /**
+ * A test that shows different valid trees can be constructed from the same
+ * set of ranges, depending on the order of construction
+ */
+ @Test(groups = "Functional")
+ public void testConstructor_alternativeTrees()
+ {
+ List<Range> ranges = new ArrayList<Range>();
+ ranges.add(new Range(10, 60));
+ ranges.add(new Range(20, 30));
+ ranges.add(new Range(40, 50));
+
+ /*
+ * constructor with greedy traversal of sorted ranges to build nested
+ * containment lists results in 20-30 inside 10-60, 40-50 a sibling
+ */
+ NCList<Range> ncl = new NCList<Range>(ranges);
+ assertEquals(ncl.toString(), "[10-60 [20-30], 40-50]");
+ assertTrue(ncl.isValid());
+
+ /*
+ * adding ranges one at a time results in 40-50
+ * a sibling of 20-30 inside 10-60
+ */
+ ncl = new NCList<Range>(new Range(10, 60));
+ ncl.add(new Range(20, 30));
+ ncl.add(new Range(40, 50));
+ assertEquals(ncl.toString(), "[10-60 [20-30, 40-50]]");
+ assertTrue(ncl.isValid());
+ }
}