X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fintervalstore%2Fnonc%2FIntervalStore.java;h=405a4714828cb1dc48791a533db2a1472bf7a0d5;hb=c769685cc0a0eead20b9ce9b5a3d0d96f664b6cb;hp=91138d38e74004b01b3c631832f9a543a3c47ac2;hpb=0031ee4b6a42ad328e417cb65c7a840183e62e87;p=jalview.git diff --git a/src/intervalstore/nonc/IntervalStore.java b/src/intervalstore/nonc/IntervalStore.java index 91138d3..405a471 100644 --- a/src/intervalstore/nonc/IntervalStore.java +++ b/src/intervalstore/nonc/IntervalStore.java @@ -31,386 +31,799 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package intervalstore.nonc; +import jalview.datamodel.SequenceFeature; + import java.util.AbstractCollection; import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.NoSuchElementException; import intervalstore.api.IntervalI; import intervalstore.api.IntervalStoreI; - /** * + * A fourth idea, implementing NCList as a pointer system identical in operation + * to IntervalStoreJ's implementation using ArrayLists but here using just two + * int[] arrays and a single IntervalI[] array that is in the proper order for + * holding all nested and unnested arrays. + * + * Use of unnesting is optional and can be experimented with by changing the + * createUnnested flag to false. + * + * Preliminary testing suggests that this implementation is about 10% faster for + * store interval size 50, store sequence factor 10, query width -1000 (fixed + * 1000-unit-wide window), and query count 100000. + * + * Origional note (Mungo Carstairs, IntervalStoreJ) + * * A Collection class to store interval-associated data, with options for "lazy" * sorting so as to speed incremental construction of the data prior to issuing * a findOverlap call. * + * Accepts duplicate entries but not null values. * - * with O(log N) performance for overlap queries, insertion and deletion (where - * N is the size of the store). * - * Accepts duplicate entries but not null values. * - * @author Bob Hanson 2019.08.06 + * @author Bob Hanson 2019.09.01 * * @param - * any type providing getBegin(), getEnd() - * getContainedBy(), and setContainedBy() + * any type providing getBegin() and + * getEnd(), primarily */ public class IntervalStore extends AbstractCollection implements IntervalStoreI { - private final boolean DO_PRESORT; + /** + * Search for the last interval that ends at or just after the specified + * position. In the situation that there are multiple intervals starting at + * pos, this method returns the first of those. + * + * @param nests + * the nest-ordered array from createArrays() + * @param from + * the position at the start of the interval of interest + * @param start + * the starting point for the subarray search + * @param end + * the ending point for the subarray search + * @return index into the nests array or one greater than end if not found + */ + public static int binarySearchFirstEndNotBefore(IntervalI[] nests, long from, + int start, int end) + { + int matched = end + 1; + int mid; + while (start <= end) + { + mid = (start + end) >>> 1; + if (nests[mid].getEnd() >= from) + { + matched = mid; + end = mid - 1; + } + else + { + start = mid + 1; + } + } + return matched; + } + + /** + * My preference is for a bigendian comparison, but you may differ. + */ + private Comparator icompare; + + /** + * bigendian is what NCList does; change icompare to switch to that + */ + private boolean bigendian; - private boolean isSorted; + // private final boolean DO_PRESORT; + // + // private boolean isSorted; + // + private boolean createUnnested = true; - private List intervals; + private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE, + maxEnd = Integer.MAX_VALUE; private boolean isTainted; - private IntervalI[] orderedIntervalStarts; + private int capacity = 8; - private static Comparator icompare = new IntervalComparator(); + protected IntervalI[] intervals = new IntervalI[capacity]; - static - { - System.out.println("intervalstore.nonc.IntervalStore initialized"); - } + private int[] offsets; + + protected int intervalCount; + + private int added; - // private Comparator icompare = new IntervalComparator(); + private int deleted; + + private BitSet bsDeleted; /** - * Constructor + * the key array that lists the intervals in sub-interval order so that the + * binary search can be isolated to a single subinterval just by indicating + * start and end within one array */ - public IntervalStore() - { + private IntervalI[] nests; + + /** + * pointers to the starting positions in nests[] for a subinterval; the first + * element is the "unnested" pointer when unnesting (2) or the root level nest + * pointer when not unnesting (1); the second element is root level nest when + * unnesting or the start of nest data when not unnesting; after that, nests + * are in contiguous sets of binary-searchable blocks + * + */ + private int[] nestOffsets; + + /** + * the count of intervals within a nest + * + */ + private int[] nestLengths; + + /** + * pointer to the root nest (intervalCount) + */ + private int root; + + /** + * pointer to the unnested set (intervalCount + 1); + */ + private int unnested; - intervals = new ArrayList<>(); - DO_PRESORT = true; - }; - - public IntervalStore(boolean presort) + public IntervalStore() { - intervals = new ArrayList<>(); - DO_PRESORT = presort; - }; + this(null); + } + // public IntervalStore(boolean presort) + // { + // this(null, presort); + // } + // /** * Constructor given a list of intervals. Note that the list may get sorted as * a side-effect of calling this constructor. */ public IntervalStore(List intervals) { - this(intervals, true); + this(intervals, null, true); } /** - * Allows a presort option, which can speed up initial loading of individual - * features but will delay the first findOverlap if set to true. * * @param intervals - * @param presort + * intervals to initialize with (others may still be added) + * @param comparator + * IntervalI.COMPARATOR_BEGIN_ASC_END_ASC or + * IntervalI.COMPARE_BEGIN_ASC_END_DESC, but this could also be one + * that sorts by description as well, for example. + * @param bigendian + * true if the comparator sorts [10-100] before [10-80]; defaults to + * true */ - public IntervalStore(List intervals, boolean presort) + public IntervalStore(List intervals, + Comparator comparator, boolean bigendian) { - this.intervals = intervals; - DO_PRESORT = presort; - if (DO_PRESORT) + // COMPARE_BEGIN_ASC_END_DESC is standard, meaning + // the order will be [10,100] before [10,80] + // this order doesn't really matter much. + icompare = (comparator != null ? comparator + : bigendian ? IntervalI.COMPARE_BEGIN_ASC_END_DESC + : IntervalI.COMPARE_BEGIN_ASC_END_ASC); + this.bigendian = bigendian; + + if (intervals != null) { - sort(); + // So, five hours later, we learn that all my timing has been thrown off + // because I used Array.sort, which if you look in the Java JDK is exactly + // what Collections.sort is, but for whatever reason, all my times were + // high by about 100-200 ms 100% reproducibly. Just one call to Array.sort + // prior to the nanotimer start messed it all up. Some sort of memory or + // garbage issue; I do not know. But using Collections.sort here fixes the + // problem. + + Collections.sort(intervals, icompare); + intervals.toArray( + this.intervals = new IntervalI[capacity = intervalCount = intervals + .size()]); } isTainted = true; + // DO_PRESORT = presort; + if (// DO_PRESORT && + intervalCount > 1) + { + updateMinMaxStart(); + // isSorted = true; + ensureFinalized(); + } + // else + // { + // isSorted = DO_PRESORT; + // isTainted = true; + // } } /** - * Adds one interval to the store. + * Adds one interval to the store, allowing duplicates. * * @param interval */ @Override public boolean add(T interval) { + return add(interval, true); + } + + /** + * Adds one interval to the store, optionally checking for duplicates. + * + * This fast-adding algorithm uses a double-length int[] (offsets) to hold + * pointers into intervals[] that allows continual sorting of an expanding + * array buffer. When the time comes, this is cleaned up and packed back into + * a standard array, but in the mean time, it can be added to with no loss of + * sorting. + * + * @param interval + * @param allowDuplicates + */ + @Override + public boolean add(T interval, boolean allowDuplicates) + { if (interval == null) { return false; } + if (deleted > 0) + { + finalizeDeletion(); + } + if (!isTainted) + { + offsets = null; + isTainted = true; + } + synchronized (intervals) { - if (DO_PRESORT) + int index = intervalCount; + int start = interval.getBegin(); + + if (intervalCount + added + 1 >= capacity) + { + intervals = finalizeAddition( + new IntervalI[capacity = capacity << 1]); + + } + + // if (DO_PRESORT && isSorted) + // { + if (intervalCount == 0) + { + // ignore + } + else + { + index = findInterval(interval, allowDuplicates); + if (!allowDuplicates && index >= 0) + { + return false; + } + if (index < 0) + { + index = -1 - index; + } + else + { + index++; + } + } + + // } + // else + // { + // if (!allowDuplicates && findInterval(interval) >= 0) + // { + // return false; + // } + // isSorted = false; + // } + + if (index == intervalCount) { - int insertPosition = findFirstBegin(intervals, interval.getBegin()); - intervals.add(insertPosition, interval); - isSorted = true; + intervals[intervalCount++] = interval; + // System.out.println("added " + intervalCount + " " + interval); } else { - intervals.add(interval); - isSorted = false; + int pt = capacity - ++added; + intervals[pt] = interval; + // System.out.println("stashed " + pt + " " + interval + " for " + // + index + " " + intervals[index]); + if (offsets == null) + { + offsets = new int[capacity]; + } + + offsets[pt] = offsets[index]; + + offsets[index] = pt; } - isTainted = true; + + minStart = Math.min(minStart, start); + maxStart = Math.max(maxStart, start); return true; } } - @Override - public void clear() + /** + * Clean up the intervals array into a simple ordered array. + * + * @param dest + * @return + */ + private IntervalI[] finalizeAddition(IntervalI[] dest) { - intervals.clear(); - orderedIntervalStarts = null; - isTainted = true; - } + if (dest == null) + { + dest = intervals; + } + if (added == 0) + { + if (intervalCount > 0 && dest != intervals) + { + System.arraycopy(intervals, 0, dest, 0, intervalCount); + } + capacity = dest.length; + return dest; + } + // System.out.println("finalizing " + intervalCount + " " + added); - @Override - public boolean contains(Object entry) - { - return listContains(intervals, entry); - } + // array is [(intervalCount)...null...(added)] - private void ensureFinalized() - { - if (isTainted && intervals.size() >= 2) + int ntotal = intervalCount + added; + for (int ptShift = ntotal, pt = intervalCount; pt >= 0;) { - if (!isSorted) + int pt0 = pt; + while (--pt >= 0 && offsets[pt] == 0) { - sort(); + ; + } + if (pt < 0) + { + pt = 0; + } + int nOK = pt0 - pt; + // shift upper intervals right + ptShift -= nOK; + if (nOK > 0) + { + System.arraycopy(intervals, pt, dest, ptShift, nOK); + } + if (added == 0) + { + break; + } + for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) + { + dest[--ptShift] = intervals[offset]; + --added; } - orderedIntervalStarts = intervals.toArray(new IntervalI[0]); - linkFeatures(orderedIntervalStarts); - isTainted = false; } + offsets = null; + intervalCount = ntotal; + capacity = dest.length; + return dest; } /** - * Sort intervals by start (lowest first) and end (highest first). + * A binary search for a duplicate. + * + * @param interval + * @return */ - private void sort() + public int binaryIdentitySearch(IntervalI interval) { - Collections.sort(intervals, icompare); - isSorted = true; + return binaryIdentitySearch(interval, null, false); } - protected int findFirstBegin(List list, long pos) + /** + * for remove() and contains() + * + * @param list + * @param interval + * @param bsIgnore + * for deleted + * @param rangeOnly + * don't do a full identity check, just a range check + * @return index or, if not found, -1 - "would be here" + */ + private int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore, + boolean rangeOnly) { int start = 0; - int end = list.size() - 1; - int matched = list.size(); - + int r0 = interval.getBegin(); + int r1 = interval.getEnd(); + int end = intervalCount - 1; + if (end < 0 || r0 < minStart) + { + return -1; + } + if (r0 > maxStart) + { + return -1 - intervalCount; + } while (start <= end) { - int mid = (start + end) / 2; - if (list.get(mid).getBegin() >= pos) - { - matched = mid; - end = mid - 1; - } - else + int mid = (start + end) >>> 1; + IntervalI r = intervals[mid]; + switch (compareRange(r, r0, r1)) { + case -1: start = mid + 1; + continue; + case 1: + end = mid - 1; + continue; + case 0: + IntervalI iv = intervals[mid]; + if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(mid)) + && sameInterval(interval, iv)) + { + return mid; + // found one; just scan up and down now, first checking the range, but + // also checking other possible aspects of equivalence. + } + + for (int i = mid; ++i <= end;) + { + if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1) + { + break; + } + if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(i)) + && sameInterval(interval, iv)) + { + return i; + } + } + for (int i = mid; --i >= start;) + { + if ((iv = intervals[i]).getBegin() != r0 + || (bigendian ? r1 < iv.getEnd() : iv.getEnd() < r1)) + { + return -1 - ++i; + } + if ((bsIgnore == null || !bsIgnore.get(i)) + && sameInterval(interval, iv)) + { + return i; + } + } + return -1 - mid; } } - return matched; + return -1 - start; } - protected int findFirstEnd(List list, long pos) + public boolean canCheckForDuplicates() { - int start = 0; - int end = list.size() - 1; - int matched = list.size(); + return true; + } - while (start <= end) - { - int mid = (start + end) / 2; - if (list.get(mid).getEnd() >= pos) - { - matched = mid; - end = mid - 1; - } - else - { - start = mid + 1; - } - } - return matched; + /** + * Reset all arrays. + * + */ + @Override + public void clear() + { + intervalCount = added = 0; + // isSorted = true; + isTainted = true; + offsets = null; + intervals = new IntervalI[8]; + nestOffsets = nestLengths = null; + nests = null; + minStart = maxEnd = Integer.MAX_VALUE; + maxStart = Integer.MIN_VALUE; } /** - * Adds non-nested intervals to the result list that lie within the target - * range + * Compare an interval t to a from/to range for insertion purposes * + * @param t * @param from * @param to - * @param result + * @return 0 if same, 1 if start is after from, or start equals from and + * [bigendian: end is before to | littleendian: end is after to], else + * -1 */ - protected void findIntervalOverlaps(long from, long to, - List result) + private int compareRange(IntervalI t, long from, long to) + { + int order = Long.signum(t.getBegin() - from); + return (order == 0 + ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to) + : order); + } + + @Override + public boolean contains(Object entry) + { + if (entry == null || intervalCount == 0 && added == 0 && deleted == 0) + { + return false; + } + if (// !isSorted || + deleted > 0) + { + sort(); + } + return (findInterval((IntervalI) entry, false) >= 0); + } + + /** + * Check to see if a given interval is within another. + * + * Not implemented. + * + * @param outer + * @param inner + * @return + */ + public boolean containsInterval(IntervalI outer, IntervalI inner) { + return false; // not applicable + } + + /** + * Ensure that all addition, deletion, and sorting has been done, and that the + * nesting arrays have been created so that we are ready for findOverlaps(). + * + */ - int startIndex = findFirstEnd(intervals, from); - final int startIndex1 = startIndex; - int i = startIndex1; - while (i < intervals.size()) + private void ensureFinalized() + { + if (isTainted) { - T sf = intervals.get(i); - if (sf.getBegin() > to) + if (// !isSorted || + added > 0 || deleted > 0) { - break; + sort(); } - if (sf.getBegin() <= to && sf.getEnd() >= from) + if (intervalCount > 0) { - result.add(sf); + createArrays(); } - i++; + isTainted = false; } } - + /** + * Find all overlaps within the given range, inclusively. + * + * @return a list sorted in ascending order of start position + * + */ @Override - public List findOverlaps(long start, long end) + public List findOverlaps(long from, long to) { - return findOverlaps(start, end, null); + return findOverlaps(from, to, null); } + /** + * Find all overlaps within the given range, inclusively. + * + * @return a list sorted in the order provided by the features list comparator + * + */ + @SuppressWarnings("unchecked") @Override - public List findOverlaps(long start, long end, List result) + public List findOverlaps(long from, long to, List result) { - if (result == null) { result = new ArrayList<>(); } - int n = intervals.size(); - switch (n) + switch (intervalCount + added) { case 0: return result; case 1: - T sf = intervals.get(0); - if (sf.getBegin() <= end && sf.getEnd() >= start) + IntervalI sf = intervals[0]; + if (sf.getBegin() <= to && sf.getEnd() >= from) { - result.add(sf); + result.add((T) sf); } return result; } ensureFinalized(); - // (1) Find the closest feature to this position. - - int index = getClosestFeature(orderedIntervalStarts, start); - IntervalI sf = (index < 0 ? null : orderedIntervalStarts[index]); - - // (2) Traverse the containedBy field, checking for overlap. - - while (sf != null) + if (from > maxEnd || to < minStart) { - if (sf.getEnd() >= start) - { - result.add((T) sf); - } - sf = sf.getContainedBy(); + return result; } - - // (3) For an interval, find the last feature that starts in this interval, - // and add all features up through that feature. - - if (end >= start) + if (createUnnested) { - // fill in with all features that start within this interval, fully - // inclusive - int index2 = getClosestFeature(orderedIntervalStarts, end); - while (++index <= index2) + if (nestLengths[unnested] > 0) { - result.add((T) orderedIntervalStarts[index]); + searchUnnested(from, to, (List) result); } - + } + if (nestLengths[root] > 0) + { + search(nests, from, to, root, result); } return result; } - private int getClosestFeature(IntervalI[] l, long pos) + /** + * A simpler search, since we know we don't have any subintervals. Not + * necessary, actually. + * + * @param nestOffsets + * @param nestLengths + * @param nests + * @param from + * @param to + * @param result + */ + private void searchUnnested(long from, long to, List result) { - int low = 0; - int high = l.length - 1; - while (low <= high) + int start = nestOffsets[unnested]; + int end = start + nestLengths[unnested] - 1; + for (int pt = binarySearchFirstEndNotBefore(nests, from, start, + end); pt <= end; pt++) { - int mid = (low + high) >>> 1; - IntervalI f = l[mid]; - switch (Long.signum(f.getBegin() - pos)) + IntervalI ival = nests[pt]; + if (ival.getBegin() > to) { - case -1: - low = mid + 1; - continue; - case 1: - high = mid - 1; - continue; - case 0: - - while (++mid <= high && l[mid].getBegin() == pos) - { - ; - } - return --mid; + break; } + result.add(ival); } - return (high < 0 ? -1 : high); } + /** + * The main search of the nests[] array's subarrays + * + * @param nests + * @param from + * @param to + * @param nest + * @param result + */ @SuppressWarnings("unchecked") - public T getContainedBy(T sf, T sf0) + private void search(IntervalI[] nests, long from, long to, int nest, + List result) { - int begin = sf0.getBegin(); - while (sf != null) + int start = nestOffsets[nest]; + int n = nestLengths[nest]; + int end = start + n - 1; + IntervalI first = nests[start]; + IntervalI last = nests[end]; + + // quick tests for common cases: + // out of range + if (last.getEnd() < from || first.getBegin() > to) { - if (begin <= sf.getEnd()) + return; + } + int pt; + switch (n) + { + case 1: + // just one interval and hasn't failed begin/end test + pt = start; + break; + case 2: + // just two and didn't fail begin/end test + // so there is only one option: either the first or the second is our + // winner + pt = (first.getEnd() >= from ? start : end); + break; + default: + // do the binary search + pt = binarySearchFirstEndNotBefore(nests, from, start, end); + break; + } + for (; pt <= end; pt++) + { + IntervalI ival = nests[pt]; + // check for out of range + if (ival.getBegin() > to) + { + break; + } + result.add((T) ival); + if (nestLengths[pt] > 0) { - // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0 - // + "\nFS in " + sf.getIndex1() + ":" + sf); - return sf; + // check subintervals in this nest + search(nests, from, to, pt, result); } - sf = (T) sf.getContainedBy(); } - return null; } + /** + * return the i-th interval in the designated order (bigendian or + * littleendian) + */ + public IntervalI get(int i) + { + if (i < 0 || i >= intervalCount + added) + { + return null; + } + ensureFinalized(); + return intervals[i]; + } + + /** + * Return the deepest level of nesting. + * + */ @Override public int getDepth() { - int n = intervals.size(); - if (n < 2) + ensureFinalized(); + BitSet bsTested = new BitSet(); + return Math.max((createUnnested ? getDepth(unnested, bsTested) : 0), + getDepth(root, bsTested)); + } + + /** + * Iteratively dive deeply. + * + * @param pt + * @param bsTested + * @return + */ + private int getDepth(int pt, BitSet bsTested) + { + int maxDepth = 0; + int depth; + int n = nestLengths[pt]; + if (n == 0 || bsTested.get(pt)) { - return n; + return 1; } - ensureFinalized(); - int maxDepth = 1; - T root = null; - for (int i = 0; i < n; i++) + bsTested.set(pt); + for (int st = nestOffsets[pt], i = st + n; --i >= st;) { - T element = intervals.get(i); - IntervalI container = element; - if (element.getContainedBy() == null) + if ((depth = getDepth(i, bsTested)) > maxDepth) { - root = element; - } - int depth = 1; - while ((container = container.getContainedBy()) != null) - { - if (++depth > maxDepth && container == root) - { - maxDepth = depth; - break; - } + maxDepth = depth; } } - return maxDepth; + return maxDepth + 1; + } + + /** + * Get the number of root-level nests. + * + */ + public int getWidth() + { + ensureFinalized(); + return nestLengths[root] + (createUnnested ? nestLengths[unnested] : 0); } - @Override public boolean isValid() { ensureFinalized(); @@ -426,77 +839,72 @@ public class IntervalStore @Override public Iterator iterator() { - return intervals.iterator(); + ensureFinalized(); + return new Iterator() + { - } + private int next; - @SuppressWarnings("unchecked") - private void linkFeatures(IntervalI[] features) - { - int n = features.length; - if (n < 2) - { - return; - } - int maxEnd = features[0].getEnd(); - for (int i = 1; i < n; i++) - { - T sf = (T) features[i]; - if (sf.getBegin() <= maxEnd) + @Override + public boolean hasNext() { - sf.setContainedBy(getContainedBy((T) features[i - 1], sf)); + return next < intervalCount; } - if (sf.getEnd() > maxEnd) + + @SuppressWarnings("unchecked") + @Override + public T next() { - maxEnd = sf.getEnd(); + if (next >= intervalCount) + { + throw new NoSuchElementException(); + } + return (T) intervals[next++]; } - } + }; } /** - * Answers true if the list contains the interval, else false. This method is - * optimised for the condition that the list is sorted on interval start - * position ascending, and will give unreliable results if this does not hold. + * Indented printing of the intervals. * - * @param intervals - * @param entry - * @return */ - protected boolean listContains(List intervals, Object entry) + @Override + public String prettyPrint() { - if (intervals == null || entry == null) - { - return false; - } - - if (!isSorted) + ensureFinalized(); + StringBuffer sb = new StringBuffer(); + if (createUnnested) { - return intervals.contains(entry); + sb.append("unnested:"); + dump(intervalCount + 1, sb, "\n"); + sb.append("\nnested:"); } + dump(intervalCount, sb, "\n"); + return sb.toString(); + } - @SuppressWarnings("unchecked") - T interval = (T) entry; + /** + * Iterative nest dump. + * + * @param nest + * @param sb + * @param sep + */ + private void dump(int nest, StringBuffer sb, String sep) + { + int pt = nestOffsets[nest]; + int n = nestLengths[nest]; + sep += " "; - /* - * locate the first entry in the list which does not precede the interval - */ - int pos = findFirstBegin(intervals, interval.getBegin()); - int len = intervals.size(); - while (pos < len) + for (int i = 0; i < n; i++) { - T sf = intervals.get(pos); - if (sf.getBegin() > interval.getBegin()) - { - return false; // no match found - } - if (sf.getEnd() == interval.getEnd() && sf.equals(interval)) + sb.append(sep).append(nests[pt + i].toString()); + if (nestLengths[pt + i] > 0) { - return true; + dump(pt + i, sb, sep + " "); } - pos++; } - return false; } @Override @@ -506,122 +914,463 @@ public class IntervalStore // { // throw new NullPointerException(); // } - return (o != null && intervals.size() > 0 + return (o != null && intervalCount > 0 && removeInterval((IntervalI) o)); } /** + * Find the interval or return where it should go, possibly into the add + * buffer + * + * @param interval + * @param rangeOnly + * don't do a full identity check, just a range check + * + * @return index (nonnegative) or index where it would go (negative) + */ + + private int findInterval(IntervalI interval, boolean rangeOnly) + { + + // if (isSorted) + // { + int pt = binaryIdentitySearch(interval, null, rangeOnly); + // if (addPt == intervalCount || offsets[pt] == 0) + // return pt; + if (pt >= 0 || added == 0 || pt == -1 - intervalCount) + { + return pt; + } + pt = -1 - pt; + int start = interval.getBegin(); + int end = interval.getEnd(); + + int match = pt; + + while ((pt = offsets[pt]) != 0) + { + IntervalI iv = intervals[pt]; + switch (compareRange(iv, start, end)) + { + case -1: + break; + case 0: + if (!rangeOnly && sameInterval(iv, interval)) + { + return pt; + } + // fall through + case 1: + match = pt; + continue; + } + } + return -1 - match; + // } + // else + // { + // int i = intervalCount; + // while (--i >= 0 && !sameInterval(intervals[i], interval)) + // { + // ; + // } + // return i; + // } + } + + /** * Uses a binary search to find the entry and removes it if found. * - * @param entry + * @param interval * @return */ - protected boolean removeInterval(IntervalI entry) + protected boolean removeInterval(IntervalI interval) { - if (!isSorted) + + if (// !isSorted || + added > 0) + { + sort(); + } + int i = binaryIdentitySearch(interval, bsDeleted, false); + if (i < 0) + { + return false; + } + if (deleted == 0) { - return intervals.remove(entry); + if (bsDeleted == null) + { + bsDeleted = new BitSet(intervalCount); + } + else + { + bsDeleted.clear(); + } } + bsDeleted.set(i); + deleted++; + return (isTainted = true); + } - /* - * find the first interval that might match, i.e. whose - * start position is not less than the target range start - * (NB inequality test ensures the first match if any is found) - */ - int startIndex = findFirstBegin(intervals, entry.getBegin()); + /** + * Fill in the gaps of the intervals array after one or more deletions. + * + */ + private void finalizeDeletion() + { + if (deleted == 0) + { + return; + } - /* - * traverse intervals to look for a match - */ - int from = entry.getBegin(); - int to = entry.getEnd(); - for (int i = startIndex, size = intervals.size(); i < size; i++) + // ......xxx.....xxxx.....xxxxx.... + // ......^i,pt + // ...... ....... + // ............ + for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;) { - T sf = intervals.get(i); - if (sf.getBegin() > from) + i = bsDeleted.nextClearBit(i + 1); + int pt1 = bsDeleted.nextSetBit(i + 1); + if (pt1 < 0) { - return false; + pt1 = intervalCount; } - if (sf.getEnd() == to && sf.equals(entry)) + int n = pt1 - i; + System.arraycopy(intervals, i, intervals, pt, n); + pt += n; + if (pt1 == intervalCount) { - intervals.remove(i).setContainedBy(null); - return (isTainted = true); + for (i = pt1; --i >= pt;) + { + intervals[i] = null; + } + intervalCount -= deleted; + deleted = 0; + bsDeleted.clear(); + break; } + i = pt1; } - return false; + } - @Override - public int size() + /** + * Recreate the key nest arrays. + * + */ + public boolean revalidate() { - return intervals.size(); + isTainted = true; + // isSorted = false; + ensureFinalized(); + return true; } + /** + * Return the total number of intervals in the store. + * + */ @Override - public String toString() + public int size() { - return prettyPrint(); + return intervalCount + added - deleted; } + /** + * AbstractCollection override to ensure that we have finalized the store. + */ @Override - public int getWidth() + public Object[] toArray() { ensureFinalized(); - int w = 0; - for (int i = intervals.size(); --i >= 0;) + return super.toArray(); + } + + /** + * Sort intervals by start. + */ + private void sort() + { + if (added > 0) { - if (intervals.get(i).getContainedBy() == null) - { - w++; - } + intervals = finalizeAddition(new IntervalI[intervalCount + added]); + } + else if (deleted > 0) + { + finalizeDeletion(); + } + else + { + // SOMETHING HAPPENS WHEN Arrays.sort is run that + // adds 100 ms to a 150 ms run time. + // I don't know why. + Arrays.sort(intervals, 0, intervalCount, icompare); } - return w; + updateMinMaxStart(); + // isSorted = true; } - @Override - public String prettyPrint() + /** + * Create the key arrays: nests, nestOffsets, and nestLengths. The starting + * point is getting the container array, which may point to then root nest or + * the unnested set. + * + * This is a pretty complicated method; it was way simpler before I decided to + * support unnesting as an option. + * + */ + private void createArrays() { - int n = intervals.size(); - if (n == 0) + + // goal here is to create an array, nests[], that is a block + + /** + * When unnesting, we need a second top-level listing. + * + */ + int len = intervalCount + (createUnnested ? 2 : 1); + root = intervalCount; + unnested = intervalCount + 1; + + /** + * The three key arrays produced by this method: + */ + nests = new IntervalI[intervalCount]; + nestOffsets = new int[len]; + nestLengths = new int[len]; + + /** + * the objectives of Phase One + */ + + int[] myContainer = new int[intervalCount]; + int[] counts = new int[len]; + + // Phase One: Get the temporary container array myContainer. + + myContainer[0] = (createUnnested ? unnested : root); + counts[myContainer[0]] = 1; + + // memories for previous begin and end + + int beginLast = intervals[0].getBegin(); + int endLast = intervals[0].getEnd(); + + // memories for previous unnested pt, begin, and end + + int ptLastNot2 = root; + int beginLast2 = beginLast; + int endLast2 = endLast; + + for (int i = 1; i < intervalCount; i++) { - return ""; + int pt = i - 1; + int begin = intervals[i].getBegin(); + int end = intervals[i].getEnd(); + + // initialize the container to either root or unnested + + myContainer[i] = myContainer[0]; + + // OK, now figure it all out... + + boolean isNested; + if (createUnnested) + { + // Using a method isNested(...) here, because there are different + // ways of defining "nested" when start or end are the + // same. The definition used here would not be my first choice, + // but it matches results for IntervalStoreJ + // perfectly, down to the exact number of times that the + // binary search runs through its start/mid/end loops in findOverlap. + + // beginLast2 and endLast2 refer to the last unnested level + + isNested = isNested(begin, end, beginLast2, endLast2); + if (isNested) + { + // this is tricky; making sure we properly get the + // nests that are to be removed from the top-level + // unnested list assigned a container root, while all + // top-level nests get the pointer to unnested. + + pt = ptLastNot2; + isNested = (pt == root || isNested(begin, end, + intervals[pt].getBegin(), intervals[pt].getEnd())); + if (!isNested) + { + myContainer[i] = root; + } + } + } + else + { + isNested = isNested(begin, end, beginLast, endLast); + } + + // ...almost done... + + if (isNested) + { + myContainer[i] = pt; + } + else + { + + // monotonic -- find the parent that is doing the nesting + + while ((pt = myContainer[pt]) < root) + { + if (isNested(begin, end, intervals[pt].getBegin(), + intervals[pt].getEnd())) + { + myContainer[i] = pt; + // fully contained by a previous interval + // System.out.println("mycontainer " + i + " = " + pt); + break; + } + } + } + + // update the counts and pointers + + counts[myContainer[i]]++; + if (myContainer[i] == unnested) + { + endLast2 = end; + beginLast2 = begin; + } + else + { + ptLastNot2 = i; + endLast = end; + beginLast = begin; + } } - if (n == 1) + + // System.out.println("intervals " + Arrays.toString(intervals)); + // System.out.println("conts " + Arrays.toString(myContainer)); + // System.out.println("counts " + Arrays.toString(counts)); + + // Phase Two: Construct the nests[] array and its associated + // starting pointer array and nest element counts. These counts + // are actually produced above, but we reconstruct it as a set + // of dynamic pointers during construction. + + // The reason this is two phases is that only now do we know where to start + // the nested set. If we did not unnest, we could do this all in one pass + // using a reverse sort for the root, and then just reverse that, but with + // unnesting, we have two unknown starts, and that doesn't give us that + // opportunity. + + /** + * this temporary array tracks the pointer within nestOffsets to the nest + * block offset for intervals[i]'s container. + */ + int[] startPt = new int[len]; + + startPt[root] = root; + + int nextStart = counts[root]; + + if (unnested > 0) { - return intervals.get(0) + "\n"; + nestOffsets[root] = counts[unnested]; + nextStart += counts[unnested]; + startPt[unnested] = unnested; } - ensureFinalized(); - String sep = "\t"; - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < n; i++) + + // Now get all the pointers right and set the nests[] pointer into intervals + // correctly. + + for (int i = 0; i < intervalCount; i++) { - IntervalI range = orderedIntervalStarts[i]; - IntervalI container = range.getContainedBy(); - while (container != null) + int ptOffset = startPt[myContainer[i]]; + int p = nestOffsets[ptOffset] + nestLengths[ptOffset]++; + nests[p] = intervals[i]; + if (counts[i] > 0) { - sb.append(sep); - container = container.getContainedBy(); + // this is a container + startPt[i] = p; + nestOffsets[p] = nextStart; + nextStart += counts[i]; } - sb.append(range.toString()).append('\n'); } - return sb.toString(); + + // System.out.println("intervals " + Arrays.toString(intervals)); + // System.out.println("nests " + Arrays.toString(nests)); + // System.out.println("conts " + Arrays.toString(myContainer)); + // System.out.println("offsets " + Arrays.toString(nestOffsets)); + // System.out.println("lengths " + Arrays.toString(nestLengths)); + // System.out.println( + // "done " + nestLengths[root] + // + (unnested > 0 ? " " + nestLengths[unnested] : "")); } - @Override - public boolean revalidate() + /** + * Child-Parent relationships to match IntervalStoreJ. Perhaps a bit arcane? + * Objective is to minimize the depth when we can. + * + * @param childStart + * @param childEnd + * @param parentStart + * @param parentEnd + * @return + */ + private static boolean isNested(int childStart, int childEnd, + int parentStart, int parentEnd) { - isTainted = true; - isSorted = true; - ensureFinalized(); - return true; + return (parentStart <= childStart && parentEnd > childEnd + || parentStart < childStart && parentEnd == childEnd); + } + + /** + * Just a couple of pointers to help speed findOverlaps along a bit. + * + */ + private void updateMinMaxStart() + { + if (intervalCount > 0) + { + minStart = intervals[0].getBegin(); + maxStart = intervals[intervalCount - 1].getBegin(); + } + else + { + minStart = Integer.MAX_VALUE; + maxStart = Integer.MIN_VALUE; + } } @Override - public IntervalI get(int i) + public String toString() { - ensureFinalized(); - return (i < 0 || i >= orderedIntervalStarts.length ? null - : orderedIntervalStarts[i]); + return prettyPrint(); + } + + /** + * CAUTION! This presumes that equalsInterval does check descriptions. Note + * that bartongroup.IntervalStoreJ does NOT do this and + * jalview.datamodel.features.SequenceFeature does not, either. But + * nonc.SimpleFeature in test DOES, because it overrides equalsInterval. + * + * The reason we do it this way is to avoid an unnecessary and costly test for + * obj instanceof IntervalI. + * + * Added by Mungo to use equals, but that requires use of instance checking, + * which is not significant in Java (apparently), but is a bigger deal in + * JavaScript. So here we have to hack + * bobhanson.IntervalStoreJ.nonc.InervalStore to bypass equals. + * + * @param i1 + * @param i2 + * @return + */ + boolean sameInterval(IntervalI i1, IntervalI i2) + { + // avoiding equals() for JavaScript performance + return ((SequenceFeature) i1).equals((SequenceFeature) i2, false); } }