/* BSD 3-Clause License Copyright (c) 2018, Mungo Carstairs All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package intervalstore.nonc; import java.util.AbstractCollection; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import intervalstore.api.IntervalI; import intervalstore.api.IntervalStoreI; /** * * A second idea, doing a double binary sort for the full interval. Seemed like * a good idea, but is 50% slower. * * A Collection class to store interval-associated data, with options for "lazy" * sorting so as to speed incremental construction of the data prior to issuing * a findOverlap call. * * * Accepts duplicate entries but not null values. * * * * @author Bob Hanson 2019.08.06 * * @param * any type providing getBegin(), getEnd() * getContainedBy(), and setContainedBy() */ public class IntervalStore extends AbstractCollection implements IntervalStoreI { /** * Search for the last interval that starts before or at the specified from/to * range and the first interval that starts after it. In the situation that * there are multiple intervals starting at from, this method returns the * first of those. * * @param a * @param from * @param to * @param ret * @return */ public int binaryLastIntervalSearch(long from, long to, int[] ret) { int start = 0, start2 = 0; int matched = 0; int end = intervalCount - 1, end2 = intervalCount; int mid, begin; IntervalI e; while (start <= end) { mid = (start + end) >>> 1; e = intervals[mid]; begin = e.getBegin(); switch (Long.signum(begin - from)) { case -1: matched = mid; start = mid + 1; break; case 0: case 1: end = mid - 1; if (begin > to) { end2 = mid; } else { start2 = mid; } break; } } ret[0] = end2; start = Math.max(start2, end); end = end2 - 1; while (start <= end) { mid = (start + end) >>> 1; e = intervals[mid]; begin = e.getBegin(); if (begin > to) { ret[0] = mid; end = mid - 1; } else { start = mid + 1; } } return matched; } /** * My preference is for a bigendian comparison, but you may differ. */ private Comparator icompare; /** * bigendian is what NCList does; change icompare to switch to that */ private boolean bigendian; private final boolean DO_PRESORT; private boolean isSorted; private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE, maxEnd = Integer.MAX_VALUE; // private Comparator icompare = new IntervalComparator(); private boolean isTainted; private int capacity = 8; private IntervalI[] intervals = new IntervalI[capacity]; private int[] offsets; private int[] ret = new int[1]; private int intervalCount; private int added; private int deleted; private BitSet bsDeleted; /** * Constructor */ public IntervalStore() { this(true); } public IntervalStore(boolean presort) { this(null, presort); } /** * Constructor given a list of intervals. Note that the list may get sorted as * a side-effect of calling this constructor. */ public IntervalStore(List intervals) { this(intervals, true); } /** * Allows a presort option, which can speed up initial loading of individual * features but will delay the first findOverlap if set to true. * * @param intervals * @param presort */ public IntervalStore(List intervals, boolean presort) { this(intervals, presort, null, false); } /** * * @param intervals * intervals to initialize with (others may still be added) * @param presort * whether or not to presort the list as additions are made * @param comparator * IntervalI.COMPARATOR_LITTLEENDIAN or * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that * sorts by description as well, for example. * @param bigendian * true if the comparator sorts [10-30] before [10-20] */ public IntervalStore(List intervals, boolean presort, Comparator comparator, boolean bigendian) { if (intervals != null) { intervals.toArray( this.intervals = new IntervalI[capacity = intervalCount = intervals .size()]); } DO_PRESORT = presort; icompare = (comparator != null ? comparator : bigendian ? IntervalI.COMPARATOR_BIGENDIAN : IntervalI.COMPARATOR_LITTLEENDIAN); this.bigendian = bigendian; if (DO_PRESORT && intervalCount > 1) { sort(); } isSorted = DO_PRESORT; isTainted = true; } /** * Adds one interval to the store, allowing duplicates. * * @param interval */ @Override public boolean add(T interval) { return add(interval, true); } /** * Adds one interval to the store, optionally checking for duplicates. * * @param interval * @param allowDuplicates */ public boolean add(T interval, boolean allowDuplicates) { if (interval == null) { return false; } if (deleted > 0) { finalizeDeletion(); } if (!isTainted) { offsets = null; isTainted = true; } synchronized (intervals) { int index = intervalCount; int start = interval.getBegin(); if (intervalCount + added + 1 >= capacity) { intervals = finalizeAddition( new IntervalI[capacity = capacity << 1]); } if (DO_PRESORT && isSorted) { if (intervalCount == 0) { // ignore } else { index = findInterval(interval); if (!allowDuplicates && index >= 0) { return false; } if (index < 0) { index = -1 - index; } else { index++; } } } else { if (!allowDuplicates && findInterval(interval) >= 0) { return false; } isSorted = false; } if (index == intervalCount) { intervals[intervalCount++] = interval; // System.out.println("added " + intervalCount + " " + interval); } else { int pt = capacity - ++added; intervals[pt] = interval; // System.out.println("stashed " + pt + " " + interval + " for " // + index + " " + intervals[index]); if (offsets == null) { offsets = new int[capacity]; } offsets[pt] = offsets[index]; offsets[index] = pt; } minStart = Math.min(minStart, start); maxStart = Math.max(maxStart, start); return true; } } private IntervalI[] finalizeAddition(IntervalI[] dest) { if (dest == null) { dest = intervals; } if (added == 0) { if (intervalCount > 0 && dest != intervals) { System.arraycopy(intervals, 0, dest, 0, intervalCount); } capacity = dest.length; return dest; } // array is [(intervalCount)...null...(added)] int ntotal = intervalCount + added; for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;) { int pt0 = pt; while (--pt >= 0 && offsets[pt] == 0) { ; } if (pt < 0) { pt = 0; } int nOK = pt0 - pt; // shift upper intervals right ptShift -= nOK; if (nOK > 0) { System.arraycopy(intervals, pt, dest, ptShift, nOK); } if (added == 0) { break; } for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) { dest[--ptShift] = intervals[offset]; --added; } } offsets = null; intervalCount = ntotal; capacity = dest.length; return dest; } public int binaryIdentitySearch(IntervalI interval) { return binaryIdentitySearch(interval, null); } /** * for remove() and contains() * * @param list * @param interval * @param bsIgnore * for deleted * @return index or, if not found, -1 - "would be here" */ public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore) { int start = 0; int r0 = interval.getBegin(); int r1 = interval.getEnd(); int end = intervalCount - 1; if (end < 0 || r0 < minStart) { return -1; } if (r0 > maxStart) { return -1 - intervalCount; } while (start <= end) { int mid = (start + end) >>> 1; IntervalI r = intervals[mid]; switch (compareRange(r, r0, r1)) { case -1: start = mid + 1; continue; case 1: end = mid - 1; continue; case 0: IntervalI iv = intervals[mid]; if ((bsIgnore == null || !bsIgnore.get(mid)) && iv.equalsInterval(interval)) { return mid; // found one; just scan up and down now, first checking the range, but // also checking other possible aspects of equivalence. } for (int i = mid; ++i <= end;) { if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1) { break; } if ((bsIgnore == null || !bsIgnore.get(i)) && iv.equalsInterval(interval)) { return i; } } for (int i = mid; --i >= start;) { if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1) { return -1 - ++i; } if ((bsIgnore == null || !bsIgnore.get(i)) && iv.equalsInterval(interval)) { return i; } } return -1 - start; } } return -1 - start; } // private int binaryInsertionSearch(long from, long to) // { // int matched = intervalCount; // int end = matched - 1; // int start = matched; // if (end < 0 || from > intervals[end].getEnd() // || from < intervals[start = 0].getBegin()) // return start; // while (start <= end) // { // int mid = (start + end) >>> 1; // switch (compareRange(intervals[mid], from, to)) // { // case 0: // return mid; // case 1: // matched = mid; // end = mid - 1; // continue; // case -1: // start = mid + 1; // continue; // } // // } // return matched; // } @Override public void clear() { intervalCount = added = 0; isSorted = true; isTainted = true; offsets = null; minStart = maxEnd = Integer.MAX_VALUE; maxStart = Integer.MIN_VALUE; } /** * Compare an interval t to a from/to range for insertion purposes * * @param t * @param from * @param to * @return 0 if same, 1 if start is after from, or start equals from and * [bigendian: end is before to | littleendian: end is after to], else * -1 */ private int compareRange(IntervalI t, long from, long to) { if (t == null) { System.out.println("???"); } int order = Long.signum(t.getBegin() - from); return (order == 0 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to) : order); } @Override public boolean contains(Object entry) { if (entry == null || intervalCount == 0) { return false; } if (!isSorted || deleted > 0) { sort(); } return (findInterval((IntervalI) entry) >= 0); } public boolean containsInterval(IntervalI outer, IntervalI inner) { ensureFinalized(); int index = binaryIdentitySearch(inner, null); if (index >= 0) { while ((index = index - Math.abs(offsets[index])) >= 0) { if (intervals[index] == outer) { return true; } } } return false; } private void ensureFinalized() { if (isTainted) { if (!isSorted || added > 0 || deleted > 0) { sort(); } if (offsets == null || offsets.length < intervalCount) { offsets = new int[intervalCount]; } linkFeatures(); isTainted = false; } } /** * Find all overlaps within the given range, inclusively. * * @return a list sorted in ascending order of start position * */ @Override public List findOverlaps(long from, long to) { List list = findOverlaps(from, to, null); Collections.reverse(list); return list; } /** * Find all overlaps within the given range, inclusively. * * @return a list sorted in descending order of start position * */ @SuppressWarnings("unchecked") @Override public List findOverlaps(long from, long to, List result) { if (result == null) { result = new ArrayList<>(); } switch (intervalCount) { case 0: return result; case 1: IntervalI sf = intervals[0]; if (sf.getBegin() <= to && sf.getEnd() >= from) { result.add((T) sf); } return result; } ensureFinalized(); if (from > maxEnd || to < minStart) { return result; } int index = binaryLastIntervalSearch(from, to, ret); int index1 = ret[0]; if (index1 < 0) { return result; } if (index1 > index + 1) { while (--index1 > index) { result.add((T) intervals[index1]); } } boolean isMonotonic = false; while (index >= 0) { IntervalI sf = intervals[index]; if (sf.getEnd() >= from) { result.add((T) sf); } else if (isMonotonic) { break; } int offset = offsets[index]; isMonotonic = (offset < 0); index -= (isMonotonic ? -offset : offset); } return result; } @Override public IntervalI get(int i) { if (i < 0 || i >= intervalCount + added) { return null; } ensureFinalized(); return intervals[i]; } private int getContainedBy(int index, int begin) { while (index >= 0) { IntervalI sf = intervals[index]; if (sf.getEnd() >= begin) { // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0 // + "\nFS in " + sf.getIndex1() + ":" + sf); return index; } index -= Math.abs(offsets[index]); } return IntervalI.NOT_CONTAINED; } @Override public int getDepth() { ensureFinalized(); if (intervalCount < 2) { return intervalCount; } int maxDepth = 1; IntervalI root = null; for (int i = 0; i < intervalCount; i++) { IntervalI element = intervals[i]; if (offsets[i] == IntervalI.NOT_CONTAINED) { root = element; } int depth = 1; int index = i; int offset; while ((index = index - Math.abs(offset = offsets[index])) >= 0) { element = intervals[index]; if (++depth > maxDepth && (element == root || offset < 0)) { maxDepth = depth; break; } } } return maxDepth; } @Override public int getWidth() { ensureFinalized(); int w = 0; for (int i = offsets.length; --i >= 0;) { if (offsets[i] > 0) { w++; } } return w; } @Override public boolean isValid() { ensureFinalized(); return true; } /** * Answers an iterator over the intervals in the store, with no particular * ordering guaranteed. The iterator does not support the optional * remove operation (throws * UnsupportedOperationException if attempted). */ @Override public Iterator iterator() { ensureFinalized(); return new Iterator() { private int next; @Override public boolean hasNext() { return next < intervalCount; } @SuppressWarnings("unchecked") @Override public T next() { if (next >= intervalCount) { throw new NoSuchElementException(); } return (T) intervals[next++]; } }; } private void linkFeatures() { if (intervalCount == 0) { return; } maxEnd = intervals[0].getEnd(); offsets[0] = IntervalI.NOT_CONTAINED; if (intervalCount == 1) { return; } boolean isMonotonic = true; for (int i = 1; i < intervalCount; i++) { IntervalI sf = intervals[i]; int begin = sf.getBegin(); int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1); // System.out.println(sf + " is contained by " // + (index < 0 ? null : starts[index])); offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED : isMonotonic ? index - i : i - index); isMonotonic = (sf.getEnd() > maxEnd); if (isMonotonic) { maxEnd = sf.getEnd(); } } } @Override public String prettyPrint() { switch (intervalCount) { case 0: return ""; case 1: return intervals[0] + "\n"; } ensureFinalized(); String sep = "\t"; StringBuffer sb = new StringBuffer(); for (int i = 0; i < intervalCount; i++) { IntervalI range = intervals[i]; int index = i; while ((index = index - Math.abs(offsets[index])) >= 0) { sb.append(sep); } sb.append(range.toString()).append('\n'); } return sb.toString(); } @Override public synchronized boolean remove(Object o) { // if (o == null) // { // throw new NullPointerException(); // } return (o != null && intervalCount > 0 && removeInterval((IntervalI) o)); } /** * Find the interval or return where it should go, possibly into the add * buffer * * @param interval * @return index (nonnegative) or index where it would go (negative) */ private int findInterval(IntervalI interval) { if (isSorted) { int pt = binaryIdentitySearch(interval, null); // if (addPt == intervalCount || offsets[pt] == 0) // return pt; if (pt >= 0 || added == 0 || pt == -1 - intervalCount) { return pt; } pt = -1 - pt; int start = interval.getBegin(); int end = interval.getEnd(); int match = pt; while ((pt = offsets[pt]) != 0) { IntervalI iv = intervals[pt]; switch (compareRange(iv, start, end)) { case -1: break; case 0: if (iv.equalsInterval(interval)) { return pt; } // fall through case 1: match = pt; continue; } } return -1 - match; } else { int i = intervalCount; while (--i >= 0 && !intervals[i].equalsInterval(interval)) { ; } return i; } } /** * Uses a binary search to find the entry and removes it if found. * * @param interval * @return */ protected boolean removeInterval(IntervalI interval) { if (!isSorted || added > 0) { sort(); } int i = binaryIdentitySearch(interval, bsDeleted); if (i < 0) { return false; } if (deleted == 0) { if (bsDeleted == null) { bsDeleted = new BitSet(intervalCount); } else { bsDeleted.clear(); } } bsDeleted.set(i); deleted++; return (isTainted = true); } private void finalizeDeletion() { if (deleted == 0) { return; } // ......xxx.....xxxx.....xxxxx.... // ......^i,pt // ...... ....... // ............ for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;) { i = bsDeleted.nextClearBit(i + 1); int pt1 = bsDeleted.nextSetBit(i + 1); if (pt1 < 0) { pt1 = intervalCount; } int n = pt1 - i; System.arraycopy(intervals, i, intervals, pt, n); pt += n; if (pt1 == intervalCount) { for (i = pt1; --i >= pt;) { intervals[i] = null; } intervalCount -= deleted; deleted = 0; bsDeleted.clear(); break; } i = pt1; } } @Override public boolean revalidate() { isTainted = true; isSorted = false; ensureFinalized(); return true; } @Override public int size() { return intervalCount + added - deleted; } @Override public Object[] toArray() { ensureFinalized(); return super.toArray(); } /** * Sort intervals by start (lowest first) and end (highest first). */ private void sort() { if (added > 0) { intervals = finalizeAddition(new IntervalI[intervalCount + added]); } else if (deleted > 0) { finalizeDeletion(); } else { Arrays.sort(intervals, 0, intervalCount, icompare); } updateMinMaxStart(); isSorted = true; } private void updateMinMaxStart() { if (intervalCount > 0) { minStart = intervals[0].getBegin(); maxStart = intervals[intervalCount - 1].getBegin(); } else { minStart = Integer.MAX_VALUE; maxStart = Integer.MIN_VALUE; } } @Override public String toString() { return prettyPrint(); } }