X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fintervalstore%2Fnonc%2FIntervalStore.java;h=405a4714828cb1dc48791a533db2a1472bf7a0d5;hb=c769685cc0a0eead20b9ce9b5a3d0d96f664b6cb;hp=23a9d1cc080d6e37ac08e513cfe0efdeffa4d1e7;hpb=28e9024f09a78a9625ed4defa2012bf342bec51e;p=jalview.git diff --git a/src/intervalstore/nonc/IntervalStore.java b/src/intervalstore/nonc/IntervalStore.java index 23a9d1c..405a471 100644 --- a/src/intervalstore/nonc/IntervalStore.java +++ b/src/intervalstore/nonc/IntervalStore.java @@ -31,6 +31,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package intervalstore.nonc; +import jalview.datamodel.SequenceFeature; + import java.util.AbstractCollection; import java.util.ArrayList; import java.util.Arrays; @@ -46,92 +48,70 @@ import intervalstore.api.IntervalStoreI; /** * - * A second idea, doing a double binary sort for the full interval. Seemed like - * a good idea, but is 50% slower. + * A fourth idea, implementing NCList as a pointer system identical in operation + * to IntervalStoreJ's implementation using ArrayLists but here using just two + * int[] arrays and a single IntervalI[] array that is in the proper order for + * holding all nested and unnested arrays. + * + * Use of unnesting is optional and can be experimented with by changing the + * createUnnested flag to false. + * + * Preliminary testing suggests that this implementation is about 10% faster for + * store interval size 50, store sequence factor 10, query width -1000 (fixed + * 1000-unit-wide window), and query count 100000. + * + * Origional note (Mungo Carstairs, IntervalStoreJ) * * A Collection class to store interval-associated data, with options for "lazy" * sorting so as to speed incremental construction of the data prior to issuing * a findOverlap call. * - * * Accepts duplicate entries but not null values. * * * - * @author Bob Hanson 2019.08.06 + * @author Bob Hanson 2019.09.01 * * @param - * any type providing getBegin(), getEnd() - * getContainedBy(), and setContainedBy() + * any type providing getBegin() and + * getEnd(), primarily */ public class IntervalStore extends AbstractCollection implements IntervalStoreI { /** - * Search for the last interval that starts before or at the specified from/to - * range and the first interval that starts after it. In the situation that - * there are multiple intervals starting at from, this method returns the - * first of those. + * Search for the last interval that ends at or just after the specified + * position. In the situation that there are multiple intervals starting at + * pos, this method returns the first of those. * - * @param a + * @param nests + * the nest-ordered array from createArrays() * @param from - * @param to - * @param ret - * @return + * the position at the start of the interval of interest + * @param start + * the starting point for the subarray search + * @param end + * the ending point for the subarray search + * @return index into the nests array or one greater than end if not found */ - public int binaryLastIntervalSearch(long from, - long to, int[] ret) + public static int binarySearchFirstEndNotBefore(IntervalI[] nests, long from, + int start, int end) { - int start = 0, start2 = 0; - int matched = 0; - int end = intervalCount - 1, end2 = intervalCount; - int mid, begin; - IntervalI e; + int matched = end + 1; + int mid; while (start <= end) { mid = (start + end) >>> 1; - e = intervals[mid]; - begin = e.getBegin(); - switch (Long.signum(begin - from)) + if (nests[mid].getEnd() >= from) { - case -1: matched = mid; - start = mid + 1; - break; - case 0: - case 1: - end = mid - 1; - if (begin > to) - { - end2 = mid; - } - else - { - start2 = mid; - } - break; - } - } - ret[0] = end2; - start = Math.max(start2, end); - end = end2 - 1; - - while (start <= end) - { - mid = (start + end) >>> 1; - e = intervals[mid]; - begin = e.getBegin(); - if (begin > to) - { - ret[0] = mid; end = mid - 1; } else { start = mid + 1; } - } return matched; } @@ -146,26 +126,24 @@ public class IntervalStore */ private boolean bigendian; - private final boolean DO_PRESORT; - - private boolean isSorted; + // private final boolean DO_PRESORT; + // + // private boolean isSorted; + // + private boolean createUnnested = true; private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE, maxEnd = Integer.MAX_VALUE; - // private Comparator icompare = new IntervalComparator(); - private boolean isTainted; private int capacity = 8; - private IntervalI[] intervals = new IntervalI[capacity]; + protected IntervalI[] intervals = new IntervalI[capacity]; private int[] offsets; - private int[] ret = new int[1]; - - private int intervalCount; + protected int intervalCount; private int added; @@ -174,73 +152,109 @@ public class IntervalStore private BitSet bsDeleted; /** - * Constructor + * the key array that lists the intervals in sub-interval order so that the + * binary search can be isolated to a single subinterval just by indicating + * start and end within one array */ - public IntervalStore() - { - this(true); - } + private IntervalI[] nests; - public IntervalStore(boolean presort) - { - this(null, presort); - } + /** + * pointers to the starting positions in nests[] for a subinterval; the first + * element is the "unnested" pointer when unnesting (2) or the root level nest + * pointer when not unnesting (1); the second element is root level nest when + * unnesting or the start of nest data when not unnesting; after that, nests + * are in contiguous sets of binary-searchable blocks + * + */ + private int[] nestOffsets; /** - * Constructor given a list of intervals. Note that the list may get sorted as - * a side-effect of calling this constructor. + * the count of intervals within a nest + * */ - public IntervalStore(List intervals) + private int[] nestLengths; + + /** + * pointer to the root nest (intervalCount) + */ + private int root; + + /** + * pointer to the unnested set (intervalCount + 1); + */ + private int unnested; + + public IntervalStore() { - this(intervals, true); + this(null); } + // public IntervalStore(boolean presort) + // { + // this(null, presort); + // } + // /** - * Allows a presort option, which can speed up initial loading of individual - * features but will delay the first findOverlap if set to true. - * - * @param intervals - * @param presort + * Constructor given a list of intervals. Note that the list may get sorted as + * a side-effect of calling this constructor. */ - public IntervalStore(List intervals, boolean presort) + public IntervalStore(List intervals) { - this(intervals, presort, null, false); + this(intervals, null, true); } /** * * @param intervals * intervals to initialize with (others may still be added) - * @param presort - * whether or not to presort the list as additions are made * @param comparator - * IntervalI.COMPARATOR_LITTLEENDIAN or - * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that - * sorts by description as well, for example. + * IntervalI.COMPARATOR_BEGIN_ASC_END_ASC or + * IntervalI.COMPARE_BEGIN_ASC_END_DESC, but this could also be one + * that sorts by description as well, for example. * @param bigendian - * true if the comparator sorts [10-30] before [10-20] + * true if the comparator sorts [10-100] before [10-80]; defaults to + * true */ - public IntervalStore(List intervals, boolean presort, + public IntervalStore(List intervals, Comparator comparator, boolean bigendian) { + // COMPARE_BEGIN_ASC_END_DESC is standard, meaning + // the order will be [10,100] before [10,80] + // this order doesn't really matter much. + icompare = (comparator != null ? comparator + : bigendian ? IntervalI.COMPARE_BEGIN_ASC_END_DESC + : IntervalI.COMPARE_BEGIN_ASC_END_ASC); + this.bigendian = bigendian; + if (intervals != null) { + // So, five hours later, we learn that all my timing has been thrown off + // because I used Array.sort, which if you look in the Java JDK is exactly + // what Collections.sort is, but for whatever reason, all my times were + // high by about 100-200 ms 100% reproducibly. Just one call to Array.sort + // prior to the nanotimer start messed it all up. Some sort of memory or + // garbage issue; I do not know. But using Collections.sort here fixes the + // problem. + + Collections.sort(intervals, icompare); intervals.toArray( this.intervals = new IntervalI[capacity = intervalCount = intervals .size()]); } - DO_PRESORT = presort; - icompare = (comparator != null ? comparator - : bigendian ? IntervalI.COMPARATOR_BIGENDIAN - : IntervalI.COMPARATOR_LITTLEENDIAN); - this.bigendian = bigendian; - - if (DO_PRESORT && intervalCount > 1) + isTainted = true; + // DO_PRESORT = presort; + if (// DO_PRESORT && + intervalCount > 1) { - sort(); + updateMinMaxStart(); + // isSorted = true; + ensureFinalized(); } - isSorted = DO_PRESORT; - isTainted = true; + // else + // { + // isSorted = DO_PRESORT; + // isTainted = true; + // } } /** @@ -257,9 +271,16 @@ public class IntervalStore /** * Adds one interval to the store, optionally checking for duplicates. * + * This fast-adding algorithm uses a double-length int[] (offsets) to hold + * pointers into intervals[] that allows continual sorting of an expanding + * array buffer. When the time comes, this is cleaned up and packed back into + * a standard array, but in the mean time, it can be added to with no loss of + * sorting. + * * @param interval * @param allowDuplicates */ + @Override public boolean add(T interval, boolean allowDuplicates) { if (interval == null) @@ -268,7 +289,9 @@ public class IntervalStore } if (deleted > 0) + { finalizeDeletion(); + } if (!isTainted) { offsets = null; @@ -287,34 +310,38 @@ public class IntervalStore } - if (DO_PRESORT && isSorted) - { + // if (DO_PRESORT && isSorted) + // { if (intervalCount == 0) { // ignore } else { - index = findInterval(interval); + index = findInterval(interval, allowDuplicates); if (!allowDuplicates && index >= 0) { return false; } if (index < 0) + { index = -1 - index; + } else + { index++; + } } - } - else - { - if (!allowDuplicates && findInterval(interval) >= 0) - { - return false; - } - - } + // } + // else + // { + // if (!allowDuplicates && findInterval(interval) >= 0) + // { + // return false; + // } + // isSorted = false; + // } if (index == intervalCount) { @@ -328,7 +355,9 @@ public class IntervalStore // System.out.println("stashed " + pt + " " + interval + " for " // + index + " " + intervals[index]); if (offsets == null) + { offsets = new int[capacity]; + } offsets[pt] = offsets[index]; @@ -341,10 +370,18 @@ public class IntervalStore } } + /** + * Clean up the intervals array into a simple ordered array. + * + * @param dest + * @return + */ private IntervalI[] finalizeAddition(IntervalI[] dest) { if (dest == null) + { dest = intervals; + } if (added == 0) { if (intervalCount > 0 && dest != intervals) @@ -354,29 +391,38 @@ public class IntervalStore capacity = dest.length; return dest; } + // System.out.println("finalizing " + intervalCount + " " + added); // array is [(intervalCount)...null...(added)] int ntotal = intervalCount + added; - for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;) + for (int ptShift = ntotal, pt = intervalCount; pt >= 0;) { int pt0 = pt; while (--pt >= 0 && offsets[pt] == 0) + { ; + } if (pt < 0) + { pt = 0; + } int nOK = pt0 - pt; // shift upper intervals right ptShift -= nOK; if (nOK > 0) + { System.arraycopy(intervals, pt, dest, ptShift, nOK); + } if (added == 0) + { break; - for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) - { - dest[--ptShift] = intervals[offset]; - --added; - } + } + for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) + { + dest[--ptShift] = intervals[offset]; + --added; + } } offsets = null; intervalCount = ntotal; @@ -384,10 +430,17 @@ public class IntervalStore return dest; } + /** + * A binary search for a duplicate. + * + * @param interval + * @return + */ public int binaryIdentitySearch(IntervalI interval) { - return binaryIdentitySearch(interval, null); + return binaryIdentitySearch(interval, null, false); } + /** * for remove() and contains() * @@ -395,18 +448,25 @@ public class IntervalStore * @param interval * @param bsIgnore * for deleted + * @param rangeOnly + * don't do a full identity check, just a range check * @return index or, if not found, -1 - "would be here" */ - public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore) + private int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore, + boolean rangeOnly) { int start = 0; int r0 = interval.getBegin(); int r1 = interval.getEnd(); int end = intervalCount - 1; if (end < 0 || r0 < minStart) + { return -1; + } if (r0 > maxStart) + { return -1 - intervalCount; + } while (start <= end) { int mid = (start + end) >>> 1; @@ -421,73 +481,68 @@ public class IntervalStore continue; case 0: IntervalI iv = intervals[mid]; - if ((bsIgnore == null || !bsIgnore.get(mid)) - && iv.equalsInterval(interval)) + if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(mid)) + && sameInterval(interval, iv)) + { return mid; - // found one; just scan up and down now, first checking the range, but - // also checking other possible aspects of equivalence. + // found one; just scan up and down now, first checking the range, but + // also checking other possible aspects of equivalence. + } for (int i = mid; ++i <= end;) { if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1) + { break; - if ((bsIgnore == null || !bsIgnore.get(i)) - && iv.equalsInterval(interval)) + } + if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(i)) + && sameInterval(interval, iv)) + { return i; + } } for (int i = mid; --i >= start;) { - if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1) + if ((iv = intervals[i]).getBegin() != r0 + || (bigendian ? r1 < iv.getEnd() : iv.getEnd() < r1)) + { return -1 - ++i; + } if ((bsIgnore == null || !bsIgnore.get(i)) - && iv.equalsInterval(interval)) + && sameInterval(interval, iv)) + { return i; + } } - return -1 - start; + return -1 - mid; } } return -1 - start; } - // private int binaryInsertionSearch(long from, long to) - // { - // int matched = intervalCount; - // int end = matched - 1; - // int start = matched; - // if (end < 0 || from > intervals[end].getEnd() - // || from < intervals[start = 0].getBegin()) - // return start; - // while (start <= end) - // { - // int mid = (start + end) >>> 1; - // switch (compareRange(intervals[mid], from, to)) - // { - // case 0: - // return mid; - // case 1: - // matched = mid; - // end = mid - 1; - // continue; - // case -1: - // start = mid + 1; - // continue; - // } - // - // } - // return matched; - // } - + public boolean canCheckForDuplicates() + { + return true; + } + /** + * Reset all arrays. + * + */ @Override public void clear() { intervalCount = added = 0; - isSorted = true; + // isSorted = true; isTainted = true; offsets = null; + intervals = new IntervalI[8]; + nestOffsets = nestLengths = null; + nests = null; minStart = maxEnd = Integer.MAX_VALUE; maxStart = Integer.MIN_VALUE; } + /** * Compare an interval t to a from/to range for insertion purposes * @@ -500,8 +555,6 @@ public class IntervalStore */ private int compareRange(IntervalI t, long from, long to) { - if (t == null) - System.out.println("???"); int order = Long.signum(t.getBegin() - from); return (order == 0 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to) @@ -511,41 +564,51 @@ public class IntervalStore @Override public boolean contains(Object entry) { - if (entry == null || intervalCount == 0) + if (entry == null || intervalCount == 0 && added == 0 && deleted == 0) + { return false; - if (!isSorted || deleted > 0) + } + if (// !isSorted || + deleted > 0) { sort(); } - return (findInterval((IntervalI) entry) >= 0); + return (findInterval((IntervalI) entry, false) >= 0); } + /** + * Check to see if a given interval is within another. + * + * Not implemented. + * + * @param outer + * @param inner + * @return + */ public boolean containsInterval(IntervalI outer, IntervalI inner) { - ensureFinalized(); - int index = binaryIdentitySearch(inner, null); - if (index >= 0) - while ((index = index - Math.abs(offsets[index])) >= 0) - { - if (intervals[index] == outer) - { - return true; - } - } - return false; + return false; // not applicable } + /** + * Ensure that all addition, deletion, and sorting has been done, and that the + * nesting arrays have been created so that we are ready for findOverlaps(). + * + */ + private void ensureFinalized() { - if (isTainted && intervalCount + added > 1) + if (isTainted) { - if (!isSorted || added > 0 || deleted > 0) + if (// !isSorted || + added > 0 || deleted > 0) { sort(); } - if (offsets == null || offsets.length < intervalCount) - offsets = new int[intervalCount]; - linkFeatures(); + if (intervalCount > 0) + { + createArrays(); + } isTainted = false; } } @@ -559,18 +622,16 @@ public class IntervalStore @Override public List findOverlaps(long from, long to) { - List list = findOverlaps(from, to, null); - Collections.reverse(list); - return list; + return findOverlaps(from, to, null); } /** * Find all overlaps within the given range, inclusively. * - * @return a list sorted in descending order of start position + * @return a list sorted in the order provided by the features list comparator * */ - + @SuppressWarnings("unchecked") @Override public List findOverlaps(long from, long to, List result) @@ -579,7 +640,7 @@ public class IntervalStore { result = new ArrayList<>(); } - switch (intervalCount) + switch (intervalCount + added) { case 0: return result; @@ -595,112 +656,174 @@ public class IntervalStore ensureFinalized(); if (from > maxEnd || to < minStart) + { return result; - int index = binaryLastIntervalSearch(from, to, ret); - int index1 = ret[0]; - if (index1 < 0) - return result; - - if (index1 > index + 1) + } + if (createUnnested) { - while (--index1 > index) + if (nestLengths[unnested] > 0) { - result.add((T) intervals[index1]); + searchUnnested(from, to, (List) result); } } - boolean isMonotonic = false; - while (index >= 0) + if (nestLengths[root] > 0) + { + search(nests, from, to, root, result); + } + return result; + } + + /** + * A simpler search, since we know we don't have any subintervals. Not + * necessary, actually. + * + * @param nestOffsets + * @param nestLengths + * @param nests + * @param from + * @param to + * @param result + */ + private void searchUnnested(long from, long to, List result) + { + int start = nestOffsets[unnested]; + int end = start + nestLengths[unnested] - 1; + for (int pt = binarySearchFirstEndNotBefore(nests, from, start, + end); pt <= end; pt++) { - IntervalI sf = intervals[index]; - if (sf.getEnd() >= from) + IntervalI ival = nests[pt]; + if (ival.getBegin() > to) { - result.add((T) sf); + break; } - else if (isMonotonic) + result.add(ival); + } + } + + /** + * The main search of the nests[] array's subarrays + * + * @param nests + * @param from + * @param to + * @param nest + * @param result + */ + @SuppressWarnings("unchecked") + private void search(IntervalI[] nests, long from, long to, int nest, + List result) + { + int start = nestOffsets[nest]; + int n = nestLengths[nest]; + int end = start + n - 1; + IntervalI first = nests[start]; + IntervalI last = nests[end]; + + // quick tests for common cases: + // out of range + if (last.getEnd() < from || first.getBegin() > to) + { + return; + } + int pt; + switch (n) + { + case 1: + // just one interval and hasn't failed begin/end test + pt = start; + break; + case 2: + // just two and didn't fail begin/end test + // so there is only one option: either the first or the second is our + // winner + pt = (first.getEnd() >= from ? start : end); + break; + default: + // do the binary search + pt = binarySearchFirstEndNotBefore(nests, from, start, end); + break; + } + for (; pt <= end; pt++) + { + IntervalI ival = nests[pt]; + // check for out of range + if (ival.getBegin() > to) { break; } - int offset = offsets[index]; - isMonotonic = (offset < 0); - index -= (isMonotonic ? -offset : offset); + result.add((T) ival); + if (nestLengths[pt] > 0) + { + // check subintervals in this nest + search(nests, from, to, pt, result); + } } - return result; } - @Override + /** + * return the i-th interval in the designated order (bigendian or + * littleendian) + */ public IntervalI get(int i) { if (i < 0 || i >= intervalCount + added) + { return null; + } ensureFinalized(); return intervals[i]; } - private int getContainedBy(int index, int begin) - { - while (index >= 0) - { - IntervalI sf = intervals[index]; - if (sf.getEnd() >= begin) - { - // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0 - // + "\nFS in " + sf.getIndex1() + ":" + sf); - return index; - } - index -= Math.abs(offsets[index]); - } - return IntervalI.NOT_CONTAINED; - } - + /** + * Return the deepest level of nesting. + * + */ @Override public int getDepth() { ensureFinalized(); - if (intervalCount < 2) + BitSet bsTested = new BitSet(); + return Math.max((createUnnested ? getDepth(unnested, bsTested) : 0), + getDepth(root, bsTested)); + } + + /** + * Iteratively dive deeply. + * + * @param pt + * @param bsTested + * @return + */ + private int getDepth(int pt, BitSet bsTested) + { + int maxDepth = 0; + int depth; + int n = nestLengths[pt]; + if (n == 0 || bsTested.get(pt)) { - return intervalCount; + return 1; } - int maxDepth = 1; - IntervalI root = null; - for (int i = 0; i < intervalCount; i++) + bsTested.set(pt); + for (int st = nestOffsets[pt], i = st + n; --i >= st;) { - IntervalI element = intervals[i]; - if (offsets[i] == IntervalI.NOT_CONTAINED) + if ((depth = getDepth(i, bsTested)) > maxDepth) { - root = element; - } - int depth = 1; - int index = i; - int offset; - while ((index = index - Math.abs(offset = offsets[index])) >= 0) - { - element = intervals[index]; - if (++depth > maxDepth && (element == root || offset < 0)) - { - maxDepth = depth; - break; - } + maxDepth = depth; } } - return maxDepth; + return maxDepth + 1; } - @Override + /** + * Get the number of root-level nests. + * + */ public int getWidth() { ensureFinalized(); - int w = 0; - for (int i = offsets.length; --i >= 0;) - { - if (offsets[i] > 0) - { - w++; - } - } - return w; + return nestLengths[root] + (createUnnested ? nestLengths[unnested] : 0); } - @Override public boolean isValid() { ensureFinalized(); @@ -716,7 +839,7 @@ public class IntervalStore @Override public Iterator iterator() { - finalizeAddition(null); + ensureFinalized(); return new Iterator() { @@ -733,67 +856,55 @@ public class IntervalStore public T next() { if (next >= intervalCount) + { throw new NoSuchElementException(); + } return (T) intervals[next++]; } }; } - private void linkFeatures() + /** + * Indented printing of the intervals. + * + */ + @Override + public String prettyPrint() { - if (intervalCount == 0) - return; - maxEnd = intervals[0].getEnd(); - offsets[0] = IntervalI.NOT_CONTAINED; - if (intervalCount == 1) + ensureFinalized(); + StringBuffer sb = new StringBuffer(); + if (createUnnested) { - return; + sb.append("unnested:"); + dump(intervalCount + 1, sb, "\n"); + sb.append("\nnested:"); } - boolean isMonotonic = true; - for (int i = 1; i < intervalCount; i++) - { - IntervalI sf = intervals[i]; - int begin = sf.getBegin(); - int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1); - // System.out.println(sf + " is contained by " - // + (index < 0 ? null : starts[index])); - - offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED - : isMonotonic ? index - i : i - index); - isMonotonic = (sf.getEnd() > maxEnd); - if (isMonotonic) - { - maxEnd = sf.getEnd(); - } - } - + dump(intervalCount, sb, "\n"); + return sb.toString(); } - @Override - public String prettyPrint() + /** + * Iterative nest dump. + * + * @param nest + * @param sb + * @param sep + */ + private void dump(int nest, StringBuffer sb, String sep) { - switch (intervalCount) - { - case 0: - return ""; - case 1: - return intervals[0] + "\n"; - } - ensureFinalized(); - String sep = "\t"; - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < intervalCount; i++) + int pt = nestOffsets[nest]; + int n = nestLengths[nest]; + sep += " "; + + for (int i = 0; i < n; i++) { - IntervalI range = intervals[i]; - int index = i; - while ((index = index - Math.abs(offsets[index])) >= 0) + sb.append(sep).append(nests[pt + i].toString()); + if (nestLengths[pt + i] > 0) { - sb.append(sep); + dump(pt + i, sb, sep + " "); } - sb.append(range.toString()).append('\n'); } - return sb.toString(); } @Override @@ -812,50 +923,59 @@ public class IntervalStore * buffer * * @param interval + * @param rangeOnly + * don't do a full identity check, just a range check + * * @return index (nonnegative) or index where it would go (negative) */ - private int findInterval(IntervalI interval) + private int findInterval(IntervalI interval, boolean rangeOnly) { - if (isSorted) - { - int pt = binaryIdentitySearch(interval, null); + // if (isSorted) + // { + int pt = binaryIdentitySearch(interval, null, rangeOnly); // if (addPt == intervalCount || offsets[pt] == 0) // return pt; if (pt >= 0 || added == 0 || pt == -1 - intervalCount) + { return pt; + } pt = -1 - pt; - int start = interval.getBegin(); - int end = interval.getEnd(); + int start = interval.getBegin(); + int end = interval.getEnd(); - int match = pt; + int match = pt; - while ((pt = offsets[pt]) != 0) + while ((pt = offsets[pt]) != 0) + { + IntervalI iv = intervals[pt]; + switch (compareRange(iv, start, end)) { - IntervalI iv = intervals[pt]; - switch (compareRange(iv, start, end)) + case -1: + break; + case 0: + if (!rangeOnly && sameInterval(iv, interval)) { - case -1: - break; - case 0: - if (iv.equalsInterval(interval)) - return pt; + return pt; + } // fall through - case 1: + case 1: match = pt; - continue; - } + continue; } + } return -1 - match; - } - else - { - int i = intervalCount; - while (--i >= 0 && !intervals[i].equalsInterval(interval)) - ; - return i; - } + // } + // else + // { + // int i = intervalCount; + // while (--i >= 0 && !sameInterval(intervals[i], interval)) + // { + // ; + // } + // return i; + // } } /** @@ -867,61 +987,90 @@ public class IntervalStore protected boolean removeInterval(IntervalI interval) { - if (!isSorted || added > 0) + if (// !isSorted || + added > 0) { sort(); } - int i = binaryIdentitySearch(interval, bsDeleted); + int i = binaryIdentitySearch(interval, bsDeleted, false); if (i < 0) + { return false; + } if (deleted == 0) { if (bsDeleted == null) + { bsDeleted = new BitSet(intervalCount); + } else + { bsDeleted.clear(); + } } bsDeleted.set(i); deleted++; return (isTainted = true); } + /** + * Fill in the gaps of the intervals array after one or more deletions. + * + */ private void finalizeDeletion() { if (deleted == 0) + { return; + } // ......xxx.....xxxx.....xxxxx.... // ......^i,pt // ...... ....... // ............ - for (int i = bsDeleted.nextSetBit(0); i >= 0;) + for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;) { - int pt = i; i = bsDeleted.nextClearBit(i + 1); int pt1 = bsDeleted.nextSetBit(i + 1); if (pt1 < 0) + { pt1 = intervalCount; - if (pt1 == pt) + } + int n = pt1 - i; + System.arraycopy(intervals, i, intervals, pt, n); + pt += n; + if (pt1 == intervalCount) + { + for (i = pt1; --i >= pt;) + { + intervals[i] = null; + } + intervalCount -= deleted; + deleted = 0; + bsDeleted.clear(); break; - System.arraycopy(intervals, i, intervals, pt, pt1 - i); + } i = pt1; } - intervalCount -= deleted; - deleted = 0; - bsDeleted.clear(); } - @Override + /** + * Recreate the key nest arrays. + * + */ public boolean revalidate() { isTainted = true; - isSorted = false; + // isSorted = false; ensureFinalized(); return true; } + /** + * Return the total number of intervals in the store. + * + */ @Override public int size() { @@ -929,7 +1078,17 @@ public class IntervalStore } /** - * Sort intervals by start (lowest first) and end (highest first). + * AbstractCollection override to ensure that we have finalized the store. + */ + @Override + public Object[] toArray() + { + ensureFinalized(); + return super.toArray(); + } + + /** + * Sort intervals by start. */ private void sort() { @@ -943,12 +1102,233 @@ public class IntervalStore } else { + // SOMETHING HAPPENS WHEN Arrays.sort is run that + // adds 100 ms to a 150 ms run time. + // I don't know why. Arrays.sort(intervals, 0, intervalCount, icompare); } updateMinMaxStart(); - isSorted = true; + // isSorted = true; + } + + /** + * Create the key arrays: nests, nestOffsets, and nestLengths. The starting + * point is getting the container array, which may point to then root nest or + * the unnested set. + * + * This is a pretty complicated method; it was way simpler before I decided to + * support unnesting as an option. + * + */ + private void createArrays() + { + + // goal here is to create an array, nests[], that is a block + + /** + * When unnesting, we need a second top-level listing. + * + */ + int len = intervalCount + (createUnnested ? 2 : 1); + root = intervalCount; + unnested = intervalCount + 1; + + /** + * The three key arrays produced by this method: + */ + nests = new IntervalI[intervalCount]; + nestOffsets = new int[len]; + nestLengths = new int[len]; + + /** + * the objectives of Phase One + */ + + int[] myContainer = new int[intervalCount]; + int[] counts = new int[len]; + + // Phase One: Get the temporary container array myContainer. + + myContainer[0] = (createUnnested ? unnested : root); + counts[myContainer[0]] = 1; + + // memories for previous begin and end + + int beginLast = intervals[0].getBegin(); + int endLast = intervals[0].getEnd(); + + // memories for previous unnested pt, begin, and end + + int ptLastNot2 = root; + int beginLast2 = beginLast; + int endLast2 = endLast; + + for (int i = 1; i < intervalCount; i++) + { + int pt = i - 1; + int begin = intervals[i].getBegin(); + int end = intervals[i].getEnd(); + + // initialize the container to either root or unnested + + myContainer[i] = myContainer[0]; + + // OK, now figure it all out... + + boolean isNested; + if (createUnnested) + { + // Using a method isNested(...) here, because there are different + // ways of defining "nested" when start or end are the + // same. The definition used here would not be my first choice, + // but it matches results for IntervalStoreJ + // perfectly, down to the exact number of times that the + // binary search runs through its start/mid/end loops in findOverlap. + + // beginLast2 and endLast2 refer to the last unnested level + + isNested = isNested(begin, end, beginLast2, endLast2); + if (isNested) + { + // this is tricky; making sure we properly get the + // nests that are to be removed from the top-level + // unnested list assigned a container root, while all + // top-level nests get the pointer to unnested. + + pt = ptLastNot2; + isNested = (pt == root || isNested(begin, end, + intervals[pt].getBegin(), intervals[pt].getEnd())); + if (!isNested) + { + myContainer[i] = root; + } + } + } + else + { + isNested = isNested(begin, end, beginLast, endLast); + } + + // ...almost done... + + if (isNested) + { + myContainer[i] = pt; + } + else + { + + // monotonic -- find the parent that is doing the nesting + + while ((pt = myContainer[pt]) < root) + { + if (isNested(begin, end, intervals[pt].getBegin(), + intervals[pt].getEnd())) + { + myContainer[i] = pt; + // fully contained by a previous interval + // System.out.println("mycontainer " + i + " = " + pt); + break; + } + } + } + + // update the counts and pointers + + counts[myContainer[i]]++; + if (myContainer[i] == unnested) + { + endLast2 = end; + beginLast2 = begin; + } + else + { + ptLastNot2 = i; + endLast = end; + beginLast = begin; + } + } + + // System.out.println("intervals " + Arrays.toString(intervals)); + // System.out.println("conts " + Arrays.toString(myContainer)); + // System.out.println("counts " + Arrays.toString(counts)); + + // Phase Two: Construct the nests[] array and its associated + // starting pointer array and nest element counts. These counts + // are actually produced above, but we reconstruct it as a set + // of dynamic pointers during construction. + + // The reason this is two phases is that only now do we know where to start + // the nested set. If we did not unnest, we could do this all in one pass + // using a reverse sort for the root, and then just reverse that, but with + // unnesting, we have two unknown starts, and that doesn't give us that + // opportunity. + + /** + * this temporary array tracks the pointer within nestOffsets to the nest + * block offset for intervals[i]'s container. + */ + int[] startPt = new int[len]; + + startPt[root] = root; + + int nextStart = counts[root]; + + if (unnested > 0) + { + nestOffsets[root] = counts[unnested]; + nextStart += counts[unnested]; + startPt[unnested] = unnested; + } + + // Now get all the pointers right and set the nests[] pointer into intervals + // correctly. + + for (int i = 0; i < intervalCount; i++) + { + int ptOffset = startPt[myContainer[i]]; + int p = nestOffsets[ptOffset] + nestLengths[ptOffset]++; + nests[p] = intervals[i]; + if (counts[i] > 0) + { + // this is a container + startPt[i] = p; + nestOffsets[p] = nextStart; + nextStart += counts[i]; + } + } + + // System.out.println("intervals " + Arrays.toString(intervals)); + // System.out.println("nests " + Arrays.toString(nests)); + // System.out.println("conts " + Arrays.toString(myContainer)); + // System.out.println("offsets " + Arrays.toString(nestOffsets)); + // System.out.println("lengths " + Arrays.toString(nestLengths)); + // System.out.println( + // "done " + nestLengths[root] + // + (unnested > 0 ? " " + nestLengths[unnested] : "")); + } + + /** + * Child-Parent relationships to match IntervalStoreJ. Perhaps a bit arcane? + * Objective is to minimize the depth when we can. + * + * @param childStart + * @param childEnd + * @param parentStart + * @param parentEnd + * @return + */ + private static boolean isNested(int childStart, int childEnd, + int parentStart, int parentEnd) + { + return (parentStart <= childStart && parentEnd > childEnd + || parentStart < childStart && parentEnd == childEnd); } + /** + * Just a couple of pointers to help speed findOverlaps along a bit. + * + */ private void updateMinMaxStart() { if (intervalCount > 0) @@ -969,4 +1349,28 @@ public class IntervalStore return prettyPrint(); } + /** + * CAUTION! This presumes that equalsInterval does check descriptions. Note + * that bartongroup.IntervalStoreJ does NOT do this and + * jalview.datamodel.features.SequenceFeature does not, either. But + * nonc.SimpleFeature in test DOES, because it overrides equalsInterval. + * + * The reason we do it this way is to avoid an unnecessary and costly test for + * obj instanceof IntervalI. + * + * Added by Mungo to use equals, but that requires use of instance checking, + * which is not significant in Java (apparently), but is a bigger deal in + * JavaScript. So here we have to hack + * bobhanson.IntervalStoreJ.nonc.InervalStore to bypass equals. + * + * @param i1 + * @param i2 + * @return + */ + boolean sameInterval(IntervalI i1, IntervalI i2) + { + // avoiding equals() for JavaScript performance + return ((SequenceFeature) i1).equals((SequenceFeature) i2, false); + } + }