*/
package intervalstore.nonc;
+import jalview.datamodel.SequenceFeature;
+
import java.util.AbstractCollection;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
+import java.util.NoSuchElementException;
import intervalstore.api.IntervalI;
import intervalstore.api.IntervalStoreI;
-
/**
*
+ * A fourth idea, implementing NCList as a pointer system identical in operation
+ * to IntervalStoreJ's implementation using ArrayLists but here using just two
+ * int[] arrays and a single IntervalI[] array that is in the proper order for
+ * holding all nested and unnested arrays.
+ *
+ * Use of unnesting is optional and can be experimented with by changing the
+ * createUnnested flag to false.
+ *
+ * Preliminary testing suggests that this implementation is about 10% faster for
+ * store interval size 50, store sequence factor 10, query width -1000 (fixed
+ * 1000-unit-wide window), and query count 100000.
+ *
+ * Origional note (Mungo Carstairs, IntervalStoreJ)
+ *
* A Collection class to store interval-associated data, with options for "lazy"
* sorting so as to speed incremental construction of the data prior to issuing
* a findOverlap call.
*
+ * Accepts duplicate entries but not null values.
*
- * with O(log N) performance for overlap queries, insertion and deletion (where
- * N is the size of the store).
*
- * Accepts duplicate entries but not null values.
*
- * @author Bob Hanson 2019.08.06
+ * @author Bob Hanson 2019.09.01
*
* @param <T>
- * any type providing <code>getBegin()</code>, <code>getEnd()</code>
- * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
+ * any type providing <code>getBegin()</code> and
+ * <code>getEnd()</code>, primarily
*/
public class IntervalStore<T extends IntervalI>
extends AbstractCollection<T> implements IntervalStoreI<T>
{
- private final boolean DO_PRESORT;
+ /**
+ * Search for the last interval that ends at or just after the specified
+ * position. In the situation that there are multiple intervals starting at
+ * pos, this method returns the first of those.
+ *
+ * @param nests
+ * the nest-ordered array from createArrays()
+ * @param from
+ * the position at the start of the interval of interest
+ * @param start
+ * the starting point for the subarray search
+ * @param end
+ * the ending point for the subarray search
+ * @return index into the nests array or one greater than end if not found
+ */
+ public static int binarySearchFirstEndNotBefore(IntervalI[] nests, long from,
+ int start, int end)
+ {
+ int matched = end + 1;
+ int mid;
+ while (start <= end)
+ {
+ mid = (start + end) >>> 1;
+ if (nests[mid].getEnd() >= from)
+ {
+ matched = mid;
+ end = mid - 1;
+ }
+ else
+ {
+ start = mid + 1;
+ }
+ }
+ return matched;
+ }
+
+ /**
+ * My preference is for a bigendian comparison, but you may differ.
+ */
+ private Comparator<? super IntervalI> icompare;
+
+ /**
+ * bigendian is what NCList does; change icompare to switch to that
+ */
+ private boolean bigendian;
- private boolean isSorted;
+ // private final boolean DO_PRESORT;
+ //
+ // private boolean isSorted;
+ //
+ private boolean createUnnested = true;
- private List<T> intervals;
+ private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
+ maxEnd = Integer.MAX_VALUE;
private boolean isTainted;
- private IntervalI[] orderedIntervalStarts;
+ private int capacity = 8;
- private static Comparator<IntervalI> icompare = new IntervalComparator();
+ protected IntervalI[] intervals = new IntervalI[capacity];
- static
- {
- System.out.println("intervalstore.nonc.IntervalStore initialized");
- }
+ private int[] offsets;
+
+ protected int intervalCount;
+
+ private int added;
- // private Comparator<IntervalI> icompare = new IntervalComparator();
+ private int deleted;
+
+ private BitSet bsDeleted;
/**
- * Constructor
+ * the key array that lists the intervals in sub-interval order so that the
+ * binary search can be isolated to a single subinterval just by indicating
+ * start and end within one array
*/
- public IntervalStore()
- {
+ private IntervalI[] nests;
+
+ /**
+ * pointers to the starting positions in nests[] for a subinterval; the first
+ * element is the "unnested" pointer when unnesting (2) or the root level nest
+ * pointer when not unnesting (1); the second element is root level nest when
+ * unnesting or the start of nest data when not unnesting; after that, nests
+ * are in contiguous sets of binary-searchable blocks
+ *
+ */
+ private int[] nestOffsets;
+
+ /**
+ * the count of intervals within a nest
+ *
+ */
+ private int[] nestLengths;
+
+ /**
+ * pointer to the root nest (intervalCount)
+ */
+ private int root;
+
+ /**
+ * pointer to the unnested set (intervalCount + 1);
+ */
+ private int unnested;
- intervals = new ArrayList<>();
- DO_PRESORT = true;
- };
-
- public IntervalStore(boolean presort)
+ public IntervalStore()
{
- intervals = new ArrayList<>();
- DO_PRESORT = presort;
- };
+ this(null);
+ }
+ // public IntervalStore(boolean presort)
+ // {
+ // this(null, presort);
+ // }
+ //
/**
* Constructor given a list of intervals. Note that the list may get sorted as
* a side-effect of calling this constructor.
*/
public IntervalStore(List<T> intervals)
{
- this(intervals, true);
+ this(intervals, null, true);
}
/**
- * Allows a presort option, which can speed up initial loading of individual
- * features but will delay the first findOverlap if set to true.
*
* @param intervals
- * @param presort
+ * intervals to initialize with (others may still be added)
+ * @param comparator
+ * IntervalI.COMPARATOR_BEGIN_ASC_END_ASC or
+ * IntervalI.COMPARE_BEGIN_ASC_END_DESC, but this could also be one
+ * that sorts by description as well, for example.
+ * @param bigendian
+ * true if the comparator sorts [10-100] before [10-80]; defaults to
+ * true
*/
- public IntervalStore(List<T> intervals, boolean presort)
+ public IntervalStore(List<T> intervals,
+ Comparator<? super IntervalI> comparator, boolean bigendian)
{
- this.intervals = intervals;
- DO_PRESORT = presort;
- if (DO_PRESORT)
+ // COMPARE_BEGIN_ASC_END_DESC is standard, meaning
+ // the order will be [10,100] before [10,80]
+ // this order doesn't really matter much.
+ icompare = (comparator != null ? comparator
+ : bigendian ? IntervalI.COMPARE_BEGIN_ASC_END_DESC
+ : IntervalI.COMPARE_BEGIN_ASC_END_ASC);
+ this.bigendian = bigendian;
+
+ if (intervals != null)
{
- sort();
+ // So, five hours later, we learn that all my timing has been thrown off
+ // because I used Array.sort, which if you look in the Java JDK is exactly
+ // what Collections.sort is, but for whatever reason, all my times were
+ // high by about 100-200 ms 100% reproducibly. Just one call to Array.sort
+ // prior to the nanotimer start messed it all up. Some sort of memory or
+ // garbage issue; I do not know. But using Collections.sort here fixes the
+ // problem.
+
+ Collections.sort(intervals, icompare);
+ intervals.toArray(
+ this.intervals = new IntervalI[capacity = intervalCount = intervals
+ .size()]);
}
isTainted = true;
+ // DO_PRESORT = presort;
+ if (// DO_PRESORT &&
+ intervalCount > 1)
+ {
+ updateMinMaxStart();
+ // isSorted = true;
+ ensureFinalized();
+ }
+ // else
+ // {
+ // isSorted = DO_PRESORT;
+ // isTainted = true;
+ // }
}
/**
- * Adds one interval to the store.
+ * Adds one interval to the store, allowing duplicates.
*
* @param interval
*/
@Override
public boolean add(T interval)
{
+ return add(interval, true);
+ }
+
+ /**
+ * Adds one interval to the store, optionally checking for duplicates.
+ *
+ * This fast-adding algorithm uses a double-length int[] (offsets) to hold
+ * pointers into intervals[] that allows continual sorting of an expanding
+ * array buffer. When the time comes, this is cleaned up and packed back into
+ * a standard array, but in the mean time, it can be added to with no loss of
+ * sorting.
+ *
+ * @param interval
+ * @param allowDuplicates
+ */
+ @Override
+ public boolean add(T interval, boolean allowDuplicates)
+ {
if (interval == null)
{
return false;
}
+ if (deleted > 0)
+ {
+ finalizeDeletion();
+ }
+ if (!isTainted)
+ {
+ offsets = null;
+ isTainted = true;
+ }
+
synchronized (intervals)
{
- if (DO_PRESORT)
+ int index = intervalCount;
+ int start = interval.getBegin();
+
+ if (intervalCount + added + 1 >= capacity)
+ {
+ intervals = finalizeAddition(
+ new IntervalI[capacity = capacity << 1]);
+
+ }
+
+ // if (DO_PRESORT && isSorted)
+ // {
+ if (intervalCount == 0)
+ {
+ // ignore
+ }
+ else
+ {
+ index = findInterval(interval, allowDuplicates);
+ if (!allowDuplicates && index >= 0)
+ {
+ return false;
+ }
+ if (index < 0)
+ {
+ index = -1 - index;
+ }
+ else
+ {
+ index++;
+ }
+ }
+
+ // }
+ // else
+ // {
+ // if (!allowDuplicates && findInterval(interval) >= 0)
+ // {
+ // return false;
+ // }
+ // isSorted = false;
+ // }
+
+ if (index == intervalCount)
{
- int insertPosition = findFirstBegin(intervals, interval.getBegin());
- intervals.add(insertPosition, interval);
- isSorted = true;
+ intervals[intervalCount++] = interval;
+ // System.out.println("added " + intervalCount + " " + interval);
}
else
{
- intervals.add(interval);
- isSorted = false;
+ int pt = capacity - ++added;
+ intervals[pt] = interval;
+ // System.out.println("stashed " + pt + " " + interval + " for "
+ // + index + " " + intervals[index]);
+ if (offsets == null)
+ {
+ offsets = new int[capacity];
+ }
+
+ offsets[pt] = offsets[index];
+
+ offsets[index] = pt;
}
- isTainted = true;
+
+ minStart = Math.min(minStart, start);
+ maxStart = Math.max(maxStart, start);
return true;
}
}
- @Override
- public void clear()
+ /**
+ * Clean up the intervals array into a simple ordered array.
+ *
+ * @param dest
+ * @return
+ */
+ private IntervalI[] finalizeAddition(IntervalI[] dest)
{
- intervals.clear();
- orderedIntervalStarts = null;
- isTainted = true;
- }
+ if (dest == null)
+ {
+ dest = intervals;
+ }
+ if (added == 0)
+ {
+ if (intervalCount > 0 && dest != intervals)
+ {
+ System.arraycopy(intervals, 0, dest, 0, intervalCount);
+ }
+ capacity = dest.length;
+ return dest;
+ }
+ // System.out.println("finalizing " + intervalCount + " " + added);
- @Override
- public boolean contains(Object entry)
- {
- return listContains(intervals, entry);
- }
+ // array is [(intervalCount)...null...(added)]
- private void ensureFinalized()
- {
- if (isTainted && intervals.size() >= 2)
+ int ntotal = intervalCount + added;
+ for (int ptShift = ntotal, pt = intervalCount; pt >= 0;)
{
- if (!isSorted)
+ int pt0 = pt;
+ while (--pt >= 0 && offsets[pt] == 0)
{
- sort();
+ ;
+ }
+ if (pt < 0)
+ {
+ pt = 0;
+ }
+ int nOK = pt0 - pt;
+ // shift upper intervals right
+ ptShift -= nOK;
+ if (nOK > 0)
+ {
+ System.arraycopy(intervals, pt, dest, ptShift, nOK);
+ }
+ if (added == 0)
+ {
+ break;
+ }
+ for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
+ {
+ dest[--ptShift] = intervals[offset];
+ --added;
}
- orderedIntervalStarts = intervals.toArray(new IntervalI[0]);
- linkFeatures(orderedIntervalStarts);
- isTainted = false;
}
+ offsets = null;
+ intervalCount = ntotal;
+ capacity = dest.length;
+ return dest;
}
/**
- * Sort intervals by start (lowest first) and end (highest first).
+ * A binary search for a duplicate.
+ *
+ * @param interval
+ * @return
*/
- private void sort()
+ public int binaryIdentitySearch(IntervalI interval)
{
- Collections.sort(intervals, icompare);
- isSorted = true;
+ return binaryIdentitySearch(interval, null, false);
}
- protected int findFirstBegin(List<T> list, long pos)
+ /**
+ * for remove() and contains()
+ *
+ * @param list
+ * @param interval
+ * @param bsIgnore
+ * for deleted
+ * @param rangeOnly
+ * don't do a full identity check, just a range check
+ * @return index or, if not found, -1 - "would be here"
+ */
+ private int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore,
+ boolean rangeOnly)
{
int start = 0;
- int end = list.size() - 1;
- int matched = list.size();
-
+ int r0 = interval.getBegin();
+ int r1 = interval.getEnd();
+ int end = intervalCount - 1;
+ if (end < 0 || r0 < minStart)
+ {
+ return -1;
+ }
+ if (r0 > maxStart)
+ {
+ return -1 - intervalCount;
+ }
while (start <= end)
{
- int mid = (start + end) / 2;
- if (list.get(mid).getBegin() >= pos)
- {
- matched = mid;
- end = mid - 1;
- }
- else
+ int mid = (start + end) >>> 1;
+ IntervalI r = intervals[mid];
+ switch (compareRange(r, r0, r1))
{
+ case -1:
start = mid + 1;
+ continue;
+ case 1:
+ end = mid - 1;
+ continue;
+ case 0:
+ IntervalI iv = intervals[mid];
+ if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(mid))
+ && sameInterval(interval, iv))
+ {
+ return mid;
+ // found one; just scan up and down now, first checking the range, but
+ // also checking other possible aspects of equivalence.
+ }
+
+ for (int i = mid; ++i <= end;)
+ {
+ if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
+ {
+ break;
+ }
+ if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(i))
+ && sameInterval(interval, iv))
+ {
+ return i;
+ }
+ }
+ for (int i = mid; --i >= start;)
+ {
+ if ((iv = intervals[i]).getBegin() != r0
+ || (bigendian ? r1 < iv.getEnd() : iv.getEnd() < r1))
+ {
+ return -1 - ++i;
+ }
+ if ((bsIgnore == null || !bsIgnore.get(i))
+ && sameInterval(interval, iv))
+ {
+ return i;
+ }
+ }
+ return -1 - mid;
}
}
- return matched;
+ return -1 - start;
}
- protected int findFirstEnd(List<T> list, long pos)
+ public boolean canCheckForDuplicates()
{
- int start = 0;
- int end = list.size() - 1;
- int matched = list.size();
+ return true;
+ }
- while (start <= end)
- {
- int mid = (start + end) / 2;
- if (list.get(mid).getEnd() >= pos)
- {
- matched = mid;
- end = mid - 1;
- }
- else
- {
- start = mid + 1;
- }
- }
- return matched;
+ /**
+ * Reset all arrays.
+ *
+ */
+ @Override
+ public void clear()
+ {
+ intervalCount = added = 0;
+ // isSorted = true;
+ isTainted = true;
+ offsets = null;
+ intervals = new IntervalI[8];
+ nestOffsets = nestLengths = null;
+ nests = null;
+ minStart = maxEnd = Integer.MAX_VALUE;
+ maxStart = Integer.MIN_VALUE;
}
/**
- * Adds non-nested intervals to the result list that lie within the target
- * range
+ * Compare an interval t to a from/to range for insertion purposes
*
+ * @param t
* @param from
* @param to
- * @param result
+ * @return 0 if same, 1 if start is after from, or start equals from and
+ * [bigendian: end is before to | littleendian: end is after to], else
+ * -1
*/
- protected void findIntervalOverlaps(long from, long to,
- List<T> result)
+ private int compareRange(IntervalI t, long from, long to)
+ {
+ int order = Long.signum(t.getBegin() - from);
+ return (order == 0
+ ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
+ : order);
+ }
+
+ @Override
+ public boolean contains(Object entry)
+ {
+ if (entry == null || intervalCount == 0 && added == 0 && deleted == 0)
+ {
+ return false;
+ }
+ if (// !isSorted ||
+ deleted > 0)
+ {
+ sort();
+ }
+ return (findInterval((IntervalI) entry, false) >= 0);
+ }
+
+ /**
+ * Check to see if a given interval is within another.
+ *
+ * Not implemented.
+ *
+ * @param outer
+ * @param inner
+ * @return
+ */
+ public boolean containsInterval(IntervalI outer, IntervalI inner)
{
+ return false; // not applicable
+ }
+
+ /**
+ * Ensure that all addition, deletion, and sorting has been done, and that the
+ * nesting arrays have been created so that we are ready for findOverlaps().
+ *
+ */
- int startIndex = findFirstEnd(intervals, from);
- final int startIndex1 = startIndex;
- int i = startIndex1;
- while (i < intervals.size())
+ private void ensureFinalized()
+ {
+ if (isTainted)
{
- T sf = intervals.get(i);
- if (sf.getBegin() > to)
+ if (// !isSorted ||
+ added > 0 || deleted > 0)
{
- break;
+ sort();
}
- if (sf.getBegin() <= to && sf.getEnd() >= from)
+ if (intervalCount > 0)
{
- result.add(sf);
+ createArrays();
}
- i++;
+ isTainted = false;
}
}
-
+ /**
+ * Find all overlaps within the given range, inclusively.
+ *
+ * @return a list sorted in ascending order of start position
+ *
+ */
@Override
- public List<T> findOverlaps(long start, long end)
+ public List<T> findOverlaps(long from, long to)
{
- return findOverlaps(start, end, null);
+ return findOverlaps(from, to, null);
}
+ /**
+ * Find all overlaps within the given range, inclusively.
+ *
+ * @return a list sorted in the order provided by the features list comparator
+ *
+ */
+
@SuppressWarnings("unchecked")
@Override
- public List<T> findOverlaps(long start, long end, List<T> result)
+ public List<T> findOverlaps(long from, long to, List<T> result)
{
-
if (result == null)
{
result = new ArrayList<>();
}
- int n = intervals.size();
- switch (n)
+ switch (intervalCount + added)
{
case 0:
return result;
case 1:
- T sf = intervals.get(0);
- if (sf.getBegin() <= end && sf.getEnd() >= start)
+ IntervalI sf = intervals[0];
+ if (sf.getBegin() <= to && sf.getEnd() >= from)
{
- result.add(sf);
+ result.add((T) sf);
}
return result;
}
ensureFinalized();
- // (1) Find the closest feature to this position.
-
- int index = getClosestFeature(orderedIntervalStarts, start);
- IntervalI sf = (index < 0 ? null : orderedIntervalStarts[index]);
-
- // (2) Traverse the containedBy field, checking for overlap.
-
- while (sf != null)
+ if (from > maxEnd || to < minStart)
{
- if (sf.getEnd() >= start)
- {
- result.add((T) sf);
- }
- sf = sf.getContainedBy();
+ return result;
}
-
- // (3) For an interval, find the last feature that starts in this interval,
- // and add all features up through that feature.
-
- if (end >= start)
+ if (createUnnested)
{
- // fill in with all features that start within this interval, fully
- // inclusive
- int index2 = getClosestFeature(orderedIntervalStarts, end);
- while (++index <= index2)
+ if (nestLengths[unnested] > 0)
{
- result.add((T) orderedIntervalStarts[index]);
+ searchUnnested(from, to, (List<IntervalI>) result);
}
-
+ }
+ if (nestLengths[root] > 0)
+ {
+ search(nests, from, to, root, result);
}
return result;
}
- private int getClosestFeature(IntervalI[] l, long pos)
+ /**
+ * A simpler search, since we know we don't have any subintervals. Not
+ * necessary, actually.
+ *
+ * @param nestOffsets
+ * @param nestLengths
+ * @param nests
+ * @param from
+ * @param to
+ * @param result
+ */
+ private void searchUnnested(long from, long to, List<IntervalI> result)
{
- int low = 0;
- int high = l.length - 1;
- while (low <= high)
+ int start = nestOffsets[unnested];
+ int end = start + nestLengths[unnested] - 1;
+ for (int pt = binarySearchFirstEndNotBefore(nests, from, start,
+ end); pt <= end; pt++)
{
- int mid = (low + high) >>> 1;
- IntervalI f = l[mid];
- switch (Long.signum(f.getBegin() - pos))
+ IntervalI ival = nests[pt];
+ if (ival.getBegin() > to)
{
- case -1:
- low = mid + 1;
- continue;
- case 1:
- high = mid - 1;
- continue;
- case 0:
-
- while (++mid <= high && l[mid].getBegin() == pos)
- {
- ;
- }
- return --mid;
+ break;
}
+ result.add(ival);
}
- return (high < 0 ? -1 : high);
}
+ /**
+ * The main search of the nests[] array's subarrays
+ *
+ * @param nests
+ * @param from
+ * @param to
+ * @param nest
+ * @param result
+ */
@SuppressWarnings("unchecked")
- public T getContainedBy(T sf, T sf0)
+ private void search(IntervalI[] nests, long from, long to, int nest,
+ List<T> result)
{
- int begin = sf0.getBegin();
- while (sf != null)
+ int start = nestOffsets[nest];
+ int n = nestLengths[nest];
+ int end = start + n - 1;
+ IntervalI first = nests[start];
+ IntervalI last = nests[end];
+
+ // quick tests for common cases:
+ // out of range
+ if (last.getEnd() < from || first.getBegin() > to)
{
- if (begin <= sf.getEnd())
+ return;
+ }
+ int pt;
+ switch (n)
+ {
+ case 1:
+ // just one interval and hasn't failed begin/end test
+ pt = start;
+ break;
+ case 2:
+ // just two and didn't fail begin/end test
+ // so there is only one option: either the first or the second is our
+ // winner
+ pt = (first.getEnd() >= from ? start : end);
+ break;
+ default:
+ // do the binary search
+ pt = binarySearchFirstEndNotBefore(nests, from, start, end);
+ break;
+ }
+ for (; pt <= end; pt++)
+ {
+ IntervalI ival = nests[pt];
+ // check for out of range
+ if (ival.getBegin() > to)
+ {
+ break;
+ }
+ result.add((T) ival);
+ if (nestLengths[pt] > 0)
{
- // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
- // + "\nFS in " + sf.getIndex1() + ":" + sf);
- return sf;
+ // check subintervals in this nest
+ search(nests, from, to, pt, result);
}
- sf = (T) sf.getContainedBy();
}
- return null;
}
+ /**
+ * return the i-th interval in the designated order (bigendian or
+ * littleendian)
+ */
+ public IntervalI get(int i)
+ {
+ if (i < 0 || i >= intervalCount + added)
+ {
+ return null;
+ }
+ ensureFinalized();
+ return intervals[i];
+ }
+
+ /**
+ * Return the deepest level of nesting.
+ *
+ */
@Override
public int getDepth()
{
- int n = intervals.size();
- if (n < 2)
+ ensureFinalized();
+ BitSet bsTested = new BitSet();
+ return Math.max((createUnnested ? getDepth(unnested, bsTested) : 0),
+ getDepth(root, bsTested));
+ }
+
+ /**
+ * Iteratively dive deeply.
+ *
+ * @param pt
+ * @param bsTested
+ * @return
+ */
+ private int getDepth(int pt, BitSet bsTested)
+ {
+ int maxDepth = 0;
+ int depth;
+ int n = nestLengths[pt];
+ if (n == 0 || bsTested.get(pt))
{
- return n;
+ return 1;
}
- ensureFinalized();
- int maxDepth = 1;
- T root = null;
- for (int i = 0; i < n; i++)
+ bsTested.set(pt);
+ for (int st = nestOffsets[pt], i = st + n; --i >= st;)
{
- T element = intervals.get(i);
- IntervalI container = element;
- if (element.getContainedBy() == null)
+ if ((depth = getDepth(i, bsTested)) > maxDepth)
{
- root = element;
- }
- int depth = 1;
- while ((container = container.getContainedBy()) != null)
- {
- if (++depth > maxDepth && container == root)
- {
- maxDepth = depth;
- break;
- }
+ maxDepth = depth;
}
}
- return maxDepth;
+ return maxDepth + 1;
+ }
+
+ /**
+ * Get the number of root-level nests.
+ *
+ */
+ public int getWidth()
+ {
+ ensureFinalized();
+ return nestLengths[root] + (createUnnested ? nestLengths[unnested] : 0);
}
- @Override
public boolean isValid()
{
ensureFinalized();
@Override
public Iterator<T> iterator()
{
- return intervals.iterator();
+ ensureFinalized();
+ return new Iterator<T>()
+ {
- }
+ private int next;
- @SuppressWarnings("unchecked")
- private void linkFeatures(IntervalI[] features)
- {
- int n = features.length;
- if (n < 2)
- {
- return;
- }
- int maxEnd = features[0].getEnd();
- for (int i = 1; i < n; i++)
- {
- T sf = (T) features[i];
- if (sf.getBegin() <= maxEnd)
+ @Override
+ public boolean hasNext()
{
- sf.setContainedBy(getContainedBy((T) features[i - 1], sf));
+ return next < intervalCount;
}
- if (sf.getEnd() > maxEnd)
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T next()
{
- maxEnd = sf.getEnd();
+ if (next >= intervalCount)
+ {
+ throw new NoSuchElementException();
+ }
+ return (T) intervals[next++];
}
- }
+ };
}
/**
- * Answers true if the list contains the interval, else false. This method is
- * optimised for the condition that the list is sorted on interval start
- * position ascending, and will give unreliable results if this does not hold.
+ * Indented printing of the intervals.
*
- * @param intervals
- * @param entry
- * @return
*/
- protected boolean listContains(List<T> intervals, Object entry)
+ @Override
+ public String prettyPrint()
{
- if (intervals == null || entry == null)
- {
- return false;
- }
-
- if (!isSorted)
+ ensureFinalized();
+ StringBuffer sb = new StringBuffer();
+ if (createUnnested)
{
- return intervals.contains(entry);
+ sb.append("unnested:");
+ dump(intervalCount + 1, sb, "\n");
+ sb.append("\nnested:");
}
+ dump(intervalCount, sb, "\n");
+ return sb.toString();
+ }
- @SuppressWarnings("unchecked")
- T interval = (T) entry;
+ /**
+ * Iterative nest dump.
+ *
+ * @param nest
+ * @param sb
+ * @param sep
+ */
+ private void dump(int nest, StringBuffer sb, String sep)
+ {
+ int pt = nestOffsets[nest];
+ int n = nestLengths[nest];
+ sep += " ";
- /*
- * locate the first entry in the list which does not precede the interval
- */
- int pos = findFirstBegin(intervals, interval.getBegin());
- int len = intervals.size();
- while (pos < len)
+ for (int i = 0; i < n; i++)
{
- T sf = intervals.get(pos);
- if (sf.getBegin() > interval.getBegin())
- {
- return false; // no match found
- }
- if (sf.getEnd() == interval.getEnd() && sf.equals(interval))
+ sb.append(sep).append(nests[pt + i].toString());
+ if (nestLengths[pt + i] > 0)
{
- return true;
+ dump(pt + i, sb, sep + " ");
}
- pos++;
}
- return false;
}
@Override
// {
// throw new NullPointerException();
// }
- return (o != null && intervals.size() > 0
+ return (o != null && intervalCount > 0
&& removeInterval((IntervalI) o));
}
/**
+ * Find the interval or return where it should go, possibly into the add
+ * buffer
+ *
+ * @param interval
+ * @param rangeOnly
+ * don't do a full identity check, just a range check
+ *
+ * @return index (nonnegative) or index where it would go (negative)
+ */
+
+ private int findInterval(IntervalI interval, boolean rangeOnly)
+ {
+
+ // if (isSorted)
+ // {
+ int pt = binaryIdentitySearch(interval, null, rangeOnly);
+ // if (addPt == intervalCount || offsets[pt] == 0)
+ // return pt;
+ if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
+ {
+ return pt;
+ }
+ pt = -1 - pt;
+ int start = interval.getBegin();
+ int end = interval.getEnd();
+
+ int match = pt;
+
+ while ((pt = offsets[pt]) != 0)
+ {
+ IntervalI iv = intervals[pt];
+ switch (compareRange(iv, start, end))
+ {
+ case -1:
+ break;
+ case 0:
+ if (!rangeOnly && sameInterval(iv, interval))
+ {
+ return pt;
+ }
+ // fall through
+ case 1:
+ match = pt;
+ continue;
+ }
+ }
+ return -1 - match;
+ // }
+ // else
+ // {
+ // int i = intervalCount;
+ // while (--i >= 0 && !sameInterval(intervals[i], interval))
+ // {
+ // ;
+ // }
+ // return i;
+ // }
+ }
+
+ /**
* Uses a binary search to find the entry and removes it if found.
*
- * @param entry
+ * @param interval
* @return
*/
- protected boolean removeInterval(IntervalI entry)
+ protected boolean removeInterval(IntervalI interval)
{
- if (!isSorted)
+
+ if (// !isSorted ||
+ added > 0)
+ {
+ sort();
+ }
+ int i = binaryIdentitySearch(interval, bsDeleted, false);
+ if (i < 0)
+ {
+ return false;
+ }
+ if (deleted == 0)
{
- return intervals.remove(entry);
+ if (bsDeleted == null)
+ {
+ bsDeleted = new BitSet(intervalCount);
+ }
+ else
+ {
+ bsDeleted.clear();
+ }
}
+ bsDeleted.set(i);
+ deleted++;
+ return (isTainted = true);
+ }
- /*
- * find the first interval that might match, i.e. whose
- * start position is not less than the target range start
- * (NB inequality test ensures the first match if any is found)
- */
- int startIndex = findFirstBegin(intervals, entry.getBegin());
+ /**
+ * Fill in the gaps of the intervals array after one or more deletions.
+ *
+ */
+ private void finalizeDeletion()
+ {
+ if (deleted == 0)
+ {
+ return;
+ }
- /*
- * traverse intervals to look for a match
- */
- int from = entry.getBegin();
- int to = entry.getEnd();
- for (int i = startIndex, size = intervals.size(); i < size; i++)
+ // ......xxx.....xxxx.....xxxxx....
+ // ......^i,pt
+ // ...... .......
+ // ............
+ for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;)
{
- T sf = intervals.get(i);
- if (sf.getBegin() > from)
+ i = bsDeleted.nextClearBit(i + 1);
+ int pt1 = bsDeleted.nextSetBit(i + 1);
+ if (pt1 < 0)
{
- return false;
+ pt1 = intervalCount;
}
- if (sf.getEnd() == to && sf.equals(entry))
+ int n = pt1 - i;
+ System.arraycopy(intervals, i, intervals, pt, n);
+ pt += n;
+ if (pt1 == intervalCount)
{
- intervals.remove(i).setContainedBy(null);
- return (isTainted = true);
+ for (i = pt1; --i >= pt;)
+ {
+ intervals[i] = null;
+ }
+ intervalCount -= deleted;
+ deleted = 0;
+ bsDeleted.clear();
+ break;
}
+ i = pt1;
}
- return false;
+
}
- @Override
- public int size()
+ /**
+ * Recreate the key nest arrays.
+ *
+ */
+ public boolean revalidate()
{
- return intervals.size();
+ isTainted = true;
+ // isSorted = false;
+ ensureFinalized();
+ return true;
}
+ /**
+ * Return the total number of intervals in the store.
+ *
+ */
@Override
- public String toString()
+ public int size()
{
- return prettyPrint();
+ return intervalCount + added - deleted;
}
+ /**
+ * AbstractCollection override to ensure that we have finalized the store.
+ */
@Override
- public int getWidth()
+ public Object[] toArray()
{
ensureFinalized();
- int w = 0;
- for (int i = intervals.size(); --i >= 0;)
+ return super.toArray();
+ }
+
+ /**
+ * Sort intervals by start.
+ */
+ private void sort()
+ {
+ if (added > 0)
{
- if (intervals.get(i).getContainedBy() == null)
- {
- w++;
- }
+ intervals = finalizeAddition(new IntervalI[intervalCount + added]);
+ }
+ else if (deleted > 0)
+ {
+ finalizeDeletion();
+ }
+ else
+ {
+ // SOMETHING HAPPENS WHEN Arrays.sort is run that
+ // adds 100 ms to a 150 ms run time.
+ // I don't know why.
+ Arrays.sort(intervals, 0, intervalCount, icompare);
}
- return w;
+ updateMinMaxStart();
+ // isSorted = true;
}
- @Override
- public String prettyPrint()
+ /**
+ * Create the key arrays: nests, nestOffsets, and nestLengths. The starting
+ * point is getting the container array, which may point to then root nest or
+ * the unnested set.
+ *
+ * This is a pretty complicated method; it was way simpler before I decided to
+ * support unnesting as an option.
+ *
+ */
+ private void createArrays()
{
- int n = intervals.size();
- if (n == 0)
+
+ // goal here is to create an array, nests[], that is a block
+
+ /**
+ * When unnesting, we need a second top-level listing.
+ *
+ */
+ int len = intervalCount + (createUnnested ? 2 : 1);
+ root = intervalCount;
+ unnested = intervalCount + 1;
+
+ /**
+ * The three key arrays produced by this method:
+ */
+ nests = new IntervalI[intervalCount];
+ nestOffsets = new int[len];
+ nestLengths = new int[len];
+
+ /**
+ * the objectives of Phase One
+ */
+
+ int[] myContainer = new int[intervalCount];
+ int[] counts = new int[len];
+
+ // Phase One: Get the temporary container array myContainer.
+
+ myContainer[0] = (createUnnested ? unnested : root);
+ counts[myContainer[0]] = 1;
+
+ // memories for previous begin and end
+
+ int beginLast = intervals[0].getBegin();
+ int endLast = intervals[0].getEnd();
+
+ // memories for previous unnested pt, begin, and end
+
+ int ptLastNot2 = root;
+ int beginLast2 = beginLast;
+ int endLast2 = endLast;
+
+ for (int i = 1; i < intervalCount; i++)
{
- return "";
+ int pt = i - 1;
+ int begin = intervals[i].getBegin();
+ int end = intervals[i].getEnd();
+
+ // initialize the container to either root or unnested
+
+ myContainer[i] = myContainer[0];
+
+ // OK, now figure it all out...
+
+ boolean isNested;
+ if (createUnnested)
+ {
+ // Using a method isNested(...) here, because there are different
+ // ways of defining "nested" when start or end are the
+ // same. The definition used here would not be my first choice,
+ // but it matches results for IntervalStoreJ
+ // perfectly, down to the exact number of times that the
+ // binary search runs through its start/mid/end loops in findOverlap.
+
+ // beginLast2 and endLast2 refer to the last unnested level
+
+ isNested = isNested(begin, end, beginLast2, endLast2);
+ if (isNested)
+ {
+ // this is tricky; making sure we properly get the
+ // nests that are to be removed from the top-level
+ // unnested list assigned a container root, while all
+ // top-level nests get the pointer to unnested.
+
+ pt = ptLastNot2;
+ isNested = (pt == root || isNested(begin, end,
+ intervals[pt].getBegin(), intervals[pt].getEnd()));
+ if (!isNested)
+ {
+ myContainer[i] = root;
+ }
+ }
+ }
+ else
+ {
+ isNested = isNested(begin, end, beginLast, endLast);
+ }
+
+ // ...almost done...
+
+ if (isNested)
+ {
+ myContainer[i] = pt;
+ }
+ else
+ {
+
+ // monotonic -- find the parent that is doing the nesting
+
+ while ((pt = myContainer[pt]) < root)
+ {
+ if (isNested(begin, end, intervals[pt].getBegin(),
+ intervals[pt].getEnd()))
+ {
+ myContainer[i] = pt;
+ // fully contained by a previous interval
+ // System.out.println("mycontainer " + i + " = " + pt);
+ break;
+ }
+ }
+ }
+
+ // update the counts and pointers
+
+ counts[myContainer[i]]++;
+ if (myContainer[i] == unnested)
+ {
+ endLast2 = end;
+ beginLast2 = begin;
+ }
+ else
+ {
+ ptLastNot2 = i;
+ endLast = end;
+ beginLast = begin;
+ }
}
- if (n == 1)
+
+ // System.out.println("intervals " + Arrays.toString(intervals));
+ // System.out.println("conts " + Arrays.toString(myContainer));
+ // System.out.println("counts " + Arrays.toString(counts));
+
+ // Phase Two: Construct the nests[] array and its associated
+ // starting pointer array and nest element counts. These counts
+ // are actually produced above, but we reconstruct it as a set
+ // of dynamic pointers during construction.
+
+ // The reason this is two phases is that only now do we know where to start
+ // the nested set. If we did not unnest, we could do this all in one pass
+ // using a reverse sort for the root, and then just reverse that, but with
+ // unnesting, we have two unknown starts, and that doesn't give us that
+ // opportunity.
+
+ /**
+ * this temporary array tracks the pointer within nestOffsets to the nest
+ * block offset for intervals[i]'s container.
+ */
+ int[] startPt = new int[len];
+
+ startPt[root] = root;
+
+ int nextStart = counts[root];
+
+ if (unnested > 0)
{
- return intervals.get(0) + "\n";
+ nestOffsets[root] = counts[unnested];
+ nextStart += counts[unnested];
+ startPt[unnested] = unnested;
}
- ensureFinalized();
- String sep = "\t";
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < n; i++)
+
+ // Now get all the pointers right and set the nests[] pointer into intervals
+ // correctly.
+
+ for (int i = 0; i < intervalCount; i++)
{
- IntervalI range = orderedIntervalStarts[i];
- IntervalI container = range.getContainedBy();
- while (container != null)
+ int ptOffset = startPt[myContainer[i]];
+ int p = nestOffsets[ptOffset] + nestLengths[ptOffset]++;
+ nests[p] = intervals[i];
+ if (counts[i] > 0)
{
- sb.append(sep);
- container = container.getContainedBy();
+ // this is a container
+ startPt[i] = p;
+ nestOffsets[p] = nextStart;
+ nextStart += counts[i];
}
- sb.append(range.toString()).append('\n');
}
- return sb.toString();
+
+ // System.out.println("intervals " + Arrays.toString(intervals));
+ // System.out.println("nests " + Arrays.toString(nests));
+ // System.out.println("conts " + Arrays.toString(myContainer));
+ // System.out.println("offsets " + Arrays.toString(nestOffsets));
+ // System.out.println("lengths " + Arrays.toString(nestLengths));
+ // System.out.println(
+ // "done " + nestLengths[root]
+ // + (unnested > 0 ? " " + nestLengths[unnested] : ""));
}
- @Override
- public boolean revalidate()
+ /**
+ * Child-Parent relationships to match IntervalStoreJ. Perhaps a bit arcane?
+ * Objective is to minimize the depth when we can.
+ *
+ * @param childStart
+ * @param childEnd
+ * @param parentStart
+ * @param parentEnd
+ * @return
+ */
+ private static boolean isNested(int childStart, int childEnd,
+ int parentStart, int parentEnd)
{
- isTainted = true;
- isSorted = true;
- ensureFinalized();
- return true;
+ return (parentStart <= childStart && parentEnd > childEnd
+ || parentStart < childStart && parentEnd == childEnd);
+ }
+
+ /**
+ * Just a couple of pointers to help speed findOverlaps along a bit.
+ *
+ */
+ private void updateMinMaxStart()
+ {
+ if (intervalCount > 0)
+ {
+ minStart = intervals[0].getBegin();
+ maxStart = intervals[intervalCount - 1].getBegin();
+ }
+ else
+ {
+ minStart = Integer.MAX_VALUE;
+ maxStart = Integer.MIN_VALUE;
+ }
}
@Override
- public IntervalI get(int i)
+ public String toString()
{
- ensureFinalized();
- return (i < 0 || i >= orderedIntervalStarts.length ? null
- : orderedIntervalStarts[i]);
+ return prettyPrint();
+ }
+
+ /**
+ * CAUTION! This presumes that equalsInterval does check descriptions. Note
+ * that bartongroup.IntervalStoreJ does NOT do this and
+ * jalview.datamodel.features.SequenceFeature does not, either. But
+ * nonc.SimpleFeature in test DOES, because it overrides equalsInterval.
+ *
+ * The reason we do it this way is to avoid an unnecessary and costly test for
+ * obj instanceof IntervalI.
+ *
+ * Added by Mungo to use equals, but that requires use of instance checking,
+ * which is not significant in Java (apparently), but is a bigger deal in
+ * JavaScript. So here we have to hack
+ * bobhanson.IntervalStoreJ.nonc.InervalStore to bypass equals.
+ *
+ * @param i1
+ * @param i2
+ * @return
+ */
+ boolean sameInterval(IntervalI i1, IntervalI i2)
+ {
+ // avoiding equals() for JavaScript performance
+ return ((SequenceFeature) i1).equals((SequenceFeature) i2, false);
}
}