4 Copyright (c) 2018, Mungo Carstairs
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
17 * Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 package intervalstore.nonc;
34 import java.util.AbstractCollection;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Comparator;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.NoSuchElementException;
44 import intervalstore.api.IntervalI;
45 import intervalstore.api.IntervalStoreI;
49 * A second idea, doing a double binary sort for the full interval. Seemed like
50 * a good idea, but is 50% slower.
52 * A Collection class to store interval-associated data, with options for "lazy"
53 * sorting so as to speed incremental construction of the data prior to issuing
57 * Accepts duplicate entries but not null values.
61 * @author Bob Hanson 2019.08.06
64 * any type providing <code>getBegin()</code>, <code>getEnd()</code>
65 * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
67 public class IntervalStore0<T extends IntervalI>
68 extends AbstractCollection<T> implements IntervalStoreI<T>
70 private static int NOT_CONTAINED = Integer.MIN_VALUE;
73 * Search for the last interval that starts before or at the specified from/to
74 * range and the first interval that starts after it. In the situation that
75 * there are multiple intervals starting at from, this method returns the
84 public int binaryLastIntervalSearch(long from, long to, int[] ret)
86 int start = 0, start2 = 0;
88 int end = intervalCount - 1, end2 = intervalCount;
93 mid = (start + end) >>> 1;
96 switch (Long.signum(begin - from))
117 start = Math.max(start2, end);
122 mid = (start + end) >>> 1;
124 begin = e.getBegin();
140 * My preference is for a bigendian comparison, but you may differ.
142 private Comparator<? super IntervalI> icompare;
145 * bigendian is what NCList does; change icompare to switch to that
147 private boolean bigendian;
149 private final boolean DO_PRESORT;
151 private boolean isSorted;
153 private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
154 maxEnd = Integer.MAX_VALUE;
156 // private Comparator<IntervalI> icompare = new IntervalComparator();
158 private boolean isTainted;
160 private int capacity = 8;
162 protected IntervalI[] intervals = new IntervalI[capacity];
164 private int[] offsets;
166 private int[] ret = new int[1];
168 protected int intervalCount;
174 private BitSet bsDeleted;
179 public IntervalStore0()
184 public IntervalStore0(boolean presort)
190 * Constructor given a list of intervals. Note that the list may get sorted as
191 * a side-effect of calling this constructor.
193 public IntervalStore0(List<T> intervals)
195 this(intervals, true);
199 * Allows a presort option, which can speed up initial loading of individual
200 * features but will delay the first findOverlap if set to true.
205 public IntervalStore0(List<T> intervals, boolean presort)
207 this(intervals, presort, null, false);
213 * intervals to initialize with (others may still be added)
215 * whether or not to presort the list as additions are made
217 * IntervalI.COMPARATOR_LITTLEENDIAN or
218 * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that
219 * sorts by description as well, for example.
221 * true if the comparator sorts [10-30] before [10-20]
223 public IntervalStore0(List<T> intervals, boolean presort,
224 Comparator<? super IntervalI> comparator, boolean bigendian)
226 if (intervals != null)
229 this.intervals = new IntervalI[capacity = intervalCount = intervals
232 DO_PRESORT = presort;
233 icompare = (comparator != null ? comparator
234 : bigendian ? IntervalI.COMPARE_BEGIN_ASC_END_DESC
235 : IntervalI.COMPARE_BEGIN_ASC_END_ASC);
236 this.bigendian = bigendian;
238 if (DO_PRESORT && intervalCount > 1)
242 isSorted = DO_PRESORT;
247 * Adds one interval to the store, allowing duplicates.
252 public boolean add(T interval)
254 return add(interval, false);
258 * Adds one interval to the store, optionally checking for duplicates.
261 * @param allowDuplicates
264 public boolean add(T interval, boolean allowDuplicates)
266 if (interval == null)
281 synchronized (intervals)
283 int index = intervalCount;
284 int start = interval.getBegin();
286 if (intervalCount + added + 1 >= capacity)
288 intervals = finalizeAddition(
289 new IntervalI[capacity = capacity << 1]);
293 if (DO_PRESORT && isSorted)
295 if (intervalCount == 0)
301 index = findInterval(interval);
302 if (!allowDuplicates && index >= 0)
319 if (!allowDuplicates && findInterval(interval) >= 0)
326 if (index == intervalCount)
328 intervals[intervalCount++] = interval;
329 // System.out.println("added " + intervalCount + " " + interval);
333 int pt = capacity - ++added;
334 intervals[pt] = interval;
335 // System.out.println("stashed " + pt + " " + interval + " for "
336 // + index + " " + intervals[index]);
339 offsets = new int[capacity];
342 offsets[pt] = offsets[index];
347 minStart = Math.min(minStart, start);
348 maxStart = Math.max(maxStart, start);
353 private IntervalI[] finalizeAddition(IntervalI[] dest)
361 if (intervalCount > 0 && dest != intervals)
363 System.arraycopy(intervals, 0, dest, 0, intervalCount);
365 capacity = dest.length;
369 // array is [(intervalCount)...null...(added)]
371 int ntotal = intervalCount + added;
372 for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;)
375 while (--pt >= 0 && offsets[pt] == 0)
384 // shift upper intervals right
388 System.arraycopy(intervals, pt, dest, ptShift, nOK);
394 for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
396 dest[--ptShift] = intervals[offset];
401 intervalCount = ntotal;
402 capacity = dest.length;
406 public int binaryIdentitySearch(IntervalI interval)
408 return binaryIdentitySearch(interval, null);
412 * for remove() and contains()
418 * @return index or, if not found, -1 - "would be here"
420 public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore)
423 int r0 = interval.getBegin();
424 int r1 = interval.getEnd();
425 int end = intervalCount - 1;
426 if (end < 0 || r0 < minStart)
432 return -1 - intervalCount;
436 int mid = (start + end) >>> 1;
437 IntervalI r = intervals[mid];
438 switch (compareRange(r, r0, r1))
447 IntervalI iv = intervals[mid];
448 if ((bsIgnore == null || !bsIgnore.get(mid))
449 && sameInterval(interval, iv))
452 // found one; just scan up and down now, first checking the range, but
453 // also checking other possible aspects of equivalence.
456 for (int i = mid; ++i <= end;)
458 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
462 if ((bsIgnore == null || !bsIgnore.get(i))
463 && sameInterval(interval, iv))
468 for (int i = mid; --i >= start;)
470 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1)
474 if ((bsIgnore == null || !bsIgnore.get(i))
475 && sameInterval(interval, iv))
486 boolean sameInterval(IntervalI i1, IntervalI i2)
489 * do the quick test of begin/end first for speed
491 return i1.equalsInterval(i2) && i1.equals(i2);
497 intervalCount = added = 0;
501 minStart = maxEnd = Integer.MAX_VALUE;
502 maxStart = Integer.MIN_VALUE;
506 * Compare an interval t to a from/to range for insertion purposes
511 * @return 0 if same, 1 if start is after from, or start equals from and
512 * [bigendian: end is before to | littleendian: end is after to], else
515 private int compareRange(IntervalI t, long from, long to)
517 int order = Long.signum(t.getBegin() - from);
519 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
524 public boolean contains(Object entry)
526 if (entry == null || intervalCount == 0)
530 if (!isSorted || deleted > 0)
534 return (findInterval((IntervalI) entry) >= 0);
537 public boolean containsInterval(IntervalI outer, IntervalI inner)
540 int index = binaryIdentitySearch(inner, null);
543 while ((index = index - Math.abs(offsets[index])) >= 0)
545 if (intervals[index] == outer)
554 private void ensureFinalized()
558 if (!isSorted || added > 0 || deleted > 0)
562 if (offsets == null || offsets.length < intervalCount)
564 offsets = new int[intervalCount];
572 * Find all overlaps within the given range, inclusively.
574 * @return a list sorted in ascending order of start position
578 public List<T> findOverlaps(long from, long to)
580 List<T> list = findOverlaps(from, to, null);
581 Collections.reverse(list);
586 * Find all overlaps within the given range, inclusively.
588 * @return a list sorted in descending order of start position
592 @SuppressWarnings("unchecked")
594 public List<T> findOverlaps(long from, long to, List<T> result)
598 result = new ArrayList<>();
600 switch (intervalCount + added)
605 IntervalI sf = intervals[0];
606 if (sf.getBegin() <= to && sf.getEnd() >= from)
615 if (from > maxEnd || to < minStart)
619 int index = binaryLastIntervalSearch(from, to, ret);
626 if (index1 > index + 1)
628 while (--index1 > index)
630 result.add((T) intervals[index1]);
633 boolean isMonotonic = false;
636 IntervalI sf = intervals[index];
637 if (sf.getEnd() >= from)
641 else if (isMonotonic)
645 int offset = offsets[index];
646 isMonotonic = (offset < 0);
647 index -= (isMonotonic ? -offset : offset);
652 private int getContainedBy(int index, int begin)
656 IntervalI sf = intervals[index];
657 if (sf.getEnd() >= begin)
659 // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
660 // + "\nFS in " + sf.getIndex1() + ":" + sf);
663 index -= Math.abs(offsets[index]);
665 return NOT_CONTAINED;
669 public int getDepth()
672 if (intervalCount < 2)
674 return intervalCount;
677 IntervalI root = null;
678 for (int i = 0; i < intervalCount; i++)
680 IntervalI element = intervals[i];
681 if (offsets[i] == NOT_CONTAINED)
688 while ((index = index - Math.abs(offset = offsets[index])) >= 0)
690 element = intervals[index];
691 if (++depth > maxDepth && (element == root || offset < 0))
702 * Answers an iterator over the intervals in the store, with no particular
703 * ordering guaranteed. The iterator does not support the optional
704 * <code>remove</code> operation (throws
705 * <code>UnsupportedOperationException</code> if attempted).
708 public Iterator<T> iterator()
711 return new Iterator<T>()
717 public boolean hasNext()
719 return next < intervalCount;
722 @SuppressWarnings("unchecked")
726 if (next >= intervalCount)
728 throw new NoSuchElementException();
730 return (T) intervals[next++];
736 private void linkFeatures()
738 if (intervalCount == 0)
742 maxEnd = intervals[0].getEnd();
743 offsets[0] = NOT_CONTAINED;
744 if (intervalCount == 1)
748 boolean isMonotonic = true;
749 for (int i = 1; i < intervalCount; i++)
751 IntervalI sf = intervals[i];
752 int begin = sf.getBegin();
753 int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1);
754 // System.out.println(sf + " is contained by "
755 // + (index < 0 ? null : starts[index]));
757 offsets[i] = (index < 0 ? NOT_CONTAINED
758 : isMonotonic ? index - i : i - index);
759 isMonotonic = (sf.getEnd() > maxEnd);
762 maxEnd = sf.getEnd();
769 public String prettyPrint()
771 switch (intervalCount + added)
776 return intervals[0] + "\n";
780 StringBuffer sb = new StringBuffer();
781 for (int i = 0; i < intervalCount; i++)
783 IntervalI range = intervals[i];
785 while ((index = index - Math.abs(offsets[index])) >= 0)
789 sb.append(range.toString()).append('\n');
791 return sb.toString();
795 public synchronized boolean remove(Object o)
799 // throw new NullPointerException();
801 return (o != null && intervalCount > 0
802 && removeInterval((IntervalI) o));
806 * Find the interval or return where it should go, possibly into the add
810 * @return index (nonnegative) or index where it would go (negative)
813 private int findInterval(IntervalI interval)
818 int pt = binaryIdentitySearch(interval, null);
819 // if (addPt == intervalCount || offsets[pt] == 0)
821 if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
826 int start = interval.getBegin();
827 int end = interval.getEnd();
831 while ((pt = offsets[pt]) != 0)
833 IntervalI iv = intervals[pt];
834 switch (compareRange(iv, start, end))
839 if (sameInterval(interval, iv))
853 int i = intervalCount;
854 while (--i >= 0 && !sameInterval(intervals[i], interval))
863 * Uses a binary search to find the entry and removes it if found.
868 protected boolean removeInterval(IntervalI interval)
871 if (!isSorted || added > 0)
875 int i = binaryIdentitySearch(interval, bsDeleted);
882 if (bsDeleted == null)
884 bsDeleted = new BitSet(intervalCount);
893 return (isTainted = true);
896 private void finalizeDeletion()
903 // ......xxx.....xxxx.....xxxxx....
907 for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;)
909 i = bsDeleted.nextClearBit(i + 1);
910 int pt1 = bsDeleted.nextSetBit(i + 1);
916 System.arraycopy(intervals, i, intervals, pt, n);
918 if (pt1 == intervalCount)
920 for (i = pt1; --i >= pt;)
924 intervalCount -= deleted;
937 return intervalCount + added - deleted;
941 public Object[] toArray()
944 return super.toArray();
948 * Sort intervals by start (lowest first) and end (highest first).
954 intervals = finalizeAddition(new IntervalI[intervalCount + added]);
956 else if (deleted > 0)
962 Arrays.sort(intervals, 0, intervalCount, icompare);
968 private void updateMinMaxStart()
970 if (intervalCount > 0)
972 minStart = intervals[0].getBegin();
973 maxStart = intervals[intervalCount - 1].getBegin();
977 minStart = Integer.MAX_VALUE;
978 maxStart = Integer.MIN_VALUE;
983 public String toString()
985 return prettyPrint();