4 Copyright (c) 2018, Mungo Carstairs
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
17 * Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 package intervalstore.nonc;
34 import java.util.AbstractCollection;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Comparator;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.NoSuchElementException;
44 import intervalstore.api.IntervalI;
45 import intervalstore.api.IntervalStoreI;
49 * A second idea, doing a double binary sort for the full interval. Seemed like
50 * a good idea, but is 50% slower.
52 * A Collection class to store interval-associated data, with options for "lazy"
53 * sorting so as to speed incremental construction of the data prior to issuing
57 * Accepts duplicate entries but not null values.
61 * @author Bob Hanson 2019.08.06
64 * any type providing <code>getBegin()</code>, <code>getEnd()</code>
65 * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
67 public class IntervalStore0<T extends IntervalI>
68 extends AbstractCollection<T> implements IntervalStoreI<T>
72 * Search for the last interval that starts before or at the specified from/to
73 * range and the first interval that starts after it. In the situation that
74 * there are multiple intervals starting at from, this method returns the
83 public int binaryLastIntervalSearch(long from, long to, int[] ret)
85 int start = 0, start2 = 0;
87 int end = intervalCount - 1, end2 = intervalCount;
92 mid = (start + end) >>> 1;
95 switch (Long.signum(begin - from))
116 start = Math.max(start2, end);
121 mid = (start + end) >>> 1;
123 begin = e.getBegin();
139 * My preference is for a bigendian comparison, but you may differ.
141 private Comparator<? super IntervalI> icompare;
144 * bigendian is what NCList does; change icompare to switch to that
146 private boolean bigendian;
148 private final boolean DO_PRESORT;
150 private boolean isSorted;
152 private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
153 maxEnd = Integer.MAX_VALUE;
155 // private Comparator<IntervalI> icompare = new IntervalComparator();
157 private boolean isTainted;
159 private int capacity = 8;
161 protected IntervalI[] intervals = new IntervalI[capacity];
163 private int[] offsets;
165 private int[] ret = new int[1];
167 protected int intervalCount;
173 private BitSet bsDeleted;
178 public IntervalStore0()
183 public IntervalStore0(boolean presort)
189 * Constructor given a list of intervals. Note that the list may get sorted as
190 * a side-effect of calling this constructor.
192 public IntervalStore0(List<T> intervals)
194 this(intervals, true);
198 * Allows a presort option, which can speed up initial loading of individual
199 * features but will delay the first findOverlap if set to true.
204 public IntervalStore0(List<T> intervals, boolean presort)
206 this(intervals, presort, null, false);
212 * intervals to initialize with (others may still be added)
214 * whether or not to presort the list as additions are made
216 * IntervalI.COMPARATOR_LITTLEENDIAN or
217 * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that
218 * sorts by description as well, for example.
220 * true if the comparator sorts [10-30] before [10-20]
222 public IntervalStore0(List<T> intervals, boolean presort,
223 Comparator<? super IntervalI> comparator, boolean bigendian)
225 if (intervals != null)
228 this.intervals = new IntervalI[capacity = intervalCount = intervals
231 DO_PRESORT = presort;
232 icompare = (comparator != null ? comparator
233 : bigendian ? IntervalI.COMPARATOR_BIGENDIAN
234 : IntervalI.COMPARATOR_LITTLEENDIAN);
235 this.bigendian = bigendian;
237 if (DO_PRESORT && intervalCount > 1)
241 isSorted = DO_PRESORT;
246 * Adds one interval to the store, allowing duplicates.
251 public boolean add(T interval)
253 return add(interval, true);
257 * Adds one interval to the store, optionally checking for duplicates.
260 * @param allowDuplicates
263 public boolean add(T interval, boolean allowDuplicates)
265 if (interval == null)
280 synchronized (intervals)
282 int index = intervalCount;
283 int start = interval.getBegin();
285 if (intervalCount + added + 1 >= capacity)
287 intervals = finalizeAddition(
288 new IntervalI[capacity = capacity << 1]);
292 if (DO_PRESORT && isSorted)
294 if (intervalCount == 0)
300 index = findInterval(interval);
301 if (!allowDuplicates && index >= 0)
318 if (!allowDuplicates && findInterval(interval) >= 0)
325 if (index == intervalCount)
327 intervals[intervalCount++] = interval;
328 // System.out.println("added " + intervalCount + " " + interval);
332 int pt = capacity - ++added;
333 intervals[pt] = interval;
334 // System.out.println("stashed " + pt + " " + interval + " for "
335 // + index + " " + intervals[index]);
338 offsets = new int[capacity];
341 offsets[pt] = offsets[index];
346 minStart = Math.min(minStart, start);
347 maxStart = Math.max(maxStart, start);
352 private IntervalI[] finalizeAddition(IntervalI[] dest)
360 if (intervalCount > 0 && dest != intervals)
362 System.arraycopy(intervals, 0, dest, 0, intervalCount);
364 capacity = dest.length;
368 // array is [(intervalCount)...null...(added)]
370 int ntotal = intervalCount + added;
371 for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;)
374 while (--pt >= 0 && offsets[pt] == 0)
383 // shift upper intervals right
387 System.arraycopy(intervals, pt, dest, ptShift, nOK);
393 for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
395 dest[--ptShift] = intervals[offset];
400 intervalCount = ntotal;
401 capacity = dest.length;
405 public int binaryIdentitySearch(IntervalI interval)
407 return binaryIdentitySearch(interval, null);
411 * for remove() and contains()
417 * @return index or, if not found, -1 - "would be here"
419 public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore)
422 int r0 = interval.getBegin();
423 int r1 = interval.getEnd();
424 int end = intervalCount - 1;
425 if (end < 0 || r0 < minStart)
431 return -1 - intervalCount;
435 int mid = (start + end) >>> 1;
436 IntervalI r = intervals[mid];
437 switch (compareRange(r, r0, r1))
446 IntervalI iv = intervals[mid];
447 if ((bsIgnore == null || !bsIgnore.get(mid))
448 && iv.equalsInterval(interval))
451 // found one; just scan up and down now, first checking the range, but
452 // also checking other possible aspects of equivalence.
455 for (int i = mid; ++i <= end;)
457 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
461 if ((bsIgnore == null || !bsIgnore.get(i))
462 && iv.equalsInterval(interval))
467 for (int i = mid; --i >= start;)
469 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1)
473 if ((bsIgnore == null || !bsIgnore.get(i))
474 && iv.equalsInterval(interval))
485 // private int binaryInsertionSearch(long from, long to)
487 // int matched = intervalCount;
488 // int end = matched - 1;
489 // int start = matched;
490 // if (end < 0 || from > intervals[end].getEnd()
491 // || from < intervals[start = 0].getBegin())
493 // while (start <= end)
495 // int mid = (start + end) >>> 1;
496 // switch (compareRange(intervals[mid], from, to))
516 intervalCount = added = 0;
520 minStart = maxEnd = Integer.MAX_VALUE;
521 maxStart = Integer.MIN_VALUE;
525 * Compare an interval t to a from/to range for insertion purposes
530 * @return 0 if same, 1 if start is after from, or start equals from and
531 * [bigendian: end is before to | littleendian: end is after to], else
534 private int compareRange(IntervalI t, long from, long to)
536 int order = Long.signum(t.getBegin() - from);
538 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
543 public boolean contains(Object entry)
545 if (entry == null || intervalCount == 0)
549 if (!isSorted || deleted > 0)
553 return (findInterval((IntervalI) entry) >= 0);
556 public boolean containsInterval(IntervalI outer, IntervalI inner)
559 int index = binaryIdentitySearch(inner, null);
562 while ((index = index - Math.abs(offsets[index])) >= 0)
564 if (intervals[index] == outer)
573 private void ensureFinalized()
577 if (!isSorted || added > 0 || deleted > 0)
581 if (offsets == null || offsets.length < intervalCount)
583 offsets = new int[intervalCount];
591 * Find all overlaps within the given range, inclusively.
593 * @return a list sorted in ascending order of start position
597 public List<T> findOverlaps(long from, long to)
599 List<T> list = findOverlaps(from, to, null);
600 Collections.reverse(list);
605 * Find all overlaps within the given range, inclusively.
607 * @return a list sorted in descending order of start position
611 @SuppressWarnings("unchecked")
613 public List<T> findOverlaps(long from, long to, List<T> result)
617 result = new ArrayList<>();
619 switch (intervalCount + added)
624 IntervalI sf = intervals[0];
625 if (sf.getBegin() <= to && sf.getEnd() >= from)
634 if (from > maxEnd || to < minStart)
638 int index = binaryLastIntervalSearch(from, to, ret);
645 if (index1 > index + 1)
647 while (--index1 > index)
649 result.add((T) intervals[index1]);
652 boolean isMonotonic = false;
655 IntervalI sf = intervals[index];
656 if (sf.getEnd() >= from)
660 else if (isMonotonic)
664 int offset = offsets[index];
665 isMonotonic = (offset < 0);
666 index -= (isMonotonic ? -offset : offset);
672 public IntervalI get(int i)
674 if (i < 0 || i >= intervalCount + added)
682 private int getContainedBy(int index, int begin)
686 IntervalI sf = intervals[index];
687 if (sf.getEnd() >= begin)
689 // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
690 // + "\nFS in " + sf.getIndex1() + ":" + sf);
693 index -= Math.abs(offsets[index]);
695 return IntervalI.NOT_CONTAINED;
699 public int getDepth()
702 if (intervalCount < 2)
704 return intervalCount;
707 IntervalI root = null;
708 for (int i = 0; i < intervalCount; i++)
710 IntervalI element = intervals[i];
711 if (offsets[i] == IntervalI.NOT_CONTAINED)
718 while ((index = index - Math.abs(offset = offsets[index])) >= 0)
720 element = intervals[index];
721 if (++depth > maxDepth && (element == root || offset < 0))
732 public int getWidth()
736 for (int i = offsets.length; --i >= 0;)
747 public boolean isValid()
754 * Answers an iterator over the intervals in the store, with no particular
755 * ordering guaranteed. The iterator does not support the optional
756 * <code>remove</code> operation (throws
757 * <code>UnsupportedOperationException</code> if attempted).
760 public Iterator<T> iterator()
763 return new Iterator<T>()
769 public boolean hasNext()
771 return next < intervalCount;
774 @SuppressWarnings("unchecked")
778 if (next >= intervalCount)
780 throw new NoSuchElementException();
782 return (T) intervals[next++];
788 private void linkFeatures()
790 if (intervalCount == 0)
794 maxEnd = intervals[0].getEnd();
795 offsets[0] = IntervalI.NOT_CONTAINED;
796 if (intervalCount == 1)
800 boolean isMonotonic = true;
801 for (int i = 1; i < intervalCount; i++)
803 IntervalI sf = intervals[i];
804 int begin = sf.getBegin();
805 int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1);
806 // System.out.println(sf + " is contained by "
807 // + (index < 0 ? null : starts[index]));
809 offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED
810 : isMonotonic ? index - i : i - index);
811 isMonotonic = (sf.getEnd() > maxEnd);
814 maxEnd = sf.getEnd();
821 public String prettyPrint()
823 switch (intervalCount + added)
828 return intervals[0] + "\n";
832 StringBuffer sb = new StringBuffer();
833 for (int i = 0; i < intervalCount; i++)
835 IntervalI range = intervals[i];
837 while ((index = index - Math.abs(offsets[index])) >= 0)
841 sb.append(range.toString()).append('\n');
843 return sb.toString();
847 public synchronized boolean remove(Object o)
851 // throw new NullPointerException();
853 return (o != null && intervalCount > 0
854 && removeInterval((IntervalI) o));
858 * Find the interval or return where it should go, possibly into the add
862 * @return index (nonnegative) or index where it would go (negative)
865 private int findInterval(IntervalI interval)
870 int pt = binaryIdentitySearch(interval, null);
871 // if (addPt == intervalCount || offsets[pt] == 0)
873 if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
878 int start = interval.getBegin();
879 int end = interval.getEnd();
883 while ((pt = offsets[pt]) != 0)
885 IntervalI iv = intervals[pt];
886 switch (compareRange(iv, start, end))
891 if (iv.equalsInterval(interval))
905 int i = intervalCount;
906 while (--i >= 0 && !intervals[i].equalsInterval(interval))
915 * Uses a binary search to find the entry and removes it if found.
920 protected boolean removeInterval(IntervalI interval)
923 if (!isSorted || added > 0)
927 int i = binaryIdentitySearch(interval, bsDeleted);
934 if (bsDeleted == null)
936 bsDeleted = new BitSet(intervalCount);
945 return (isTainted = true);
948 private void finalizeDeletion()
955 // ......xxx.....xxxx.....xxxxx....
959 for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;)
961 i = bsDeleted.nextClearBit(i + 1);
962 int pt1 = bsDeleted.nextSetBit(i + 1);
968 System.arraycopy(intervals, i, intervals, pt, n);
970 if (pt1 == intervalCount)
972 for (i = pt1; --i >= pt;)
976 intervalCount -= deleted;
987 public boolean revalidate()
998 return intervalCount + added - deleted;
1002 public Object[] toArray()
1005 return super.toArray();
1009 * Sort intervals by start (lowest first) and end (highest first).
1015 intervals = finalizeAddition(new IntervalI[intervalCount + added]);
1017 else if (deleted > 0)
1023 Arrays.sort(intervals, 0, intervalCount, icompare);
1025 updateMinMaxStart();
1029 private void updateMinMaxStart()
1031 if (intervalCount > 0)
1033 minStart = intervals[0].getBegin();
1034 maxStart = intervals[intervalCount - 1].getBegin();
1038 minStart = Integer.MAX_VALUE;
1039 maxStart = Integer.MIN_VALUE;
1044 public String toString()
1046 return prettyPrint();
1050 public boolean canCheckForDuplicates()