4 Copyright (c) 2018, Mungo Carstairs
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
17 * Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 package intervalstore.nonc;
34 import java.util.AbstractCollection;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Comparator;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.NoSuchElementException;
44 import intervalstore.api.IntervalI;
45 import intervalstore.api.IntervalStoreI;
49 * A second idea, doing a double binary sort for the full interval. Seemed like
50 * a good idea, but is 50% slower.
52 * A Collection class to store interval-associated data, with options for "lazy"
53 * sorting so as to speed incremental construction of the data prior to issuing
57 * Accepts duplicate entries but not null values.
61 * @author Bob Hanson 2019.08.06
64 * any type providing <code>getBegin()</code>, <code>getEnd()</code>
65 * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
67 public class IntervalStore<T extends IntervalI>
68 extends AbstractCollection<T> implements IntervalStoreI<T>
72 * Search for the last interval that starts before or at the specified from/to
73 * range and the first interval that starts after it. In the situation that
74 * there are multiple intervals starting at from, this method returns the
83 public int binaryLastIntervalSearch(long from,
86 int start = 0, start2 = 0;
88 int end = intervalCount - 1, end2 = intervalCount;
93 mid = (start + end) >>> 1;
96 switch (Long.signum(begin - from))
117 start = Math.max(start2, end);
122 mid = (start + end) >>> 1;
124 begin = e.getBegin();
140 * My preference is for a bigendian comparison, but you may differ.
142 private Comparator<? super IntervalI> icompare;
145 * bigendian is what NCList does; change icompare to switch to that
147 private boolean bigendian;
149 private final boolean DO_PRESORT;
151 private boolean isSorted;
153 private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
154 maxEnd = Integer.MAX_VALUE;
156 // private Comparator<IntervalI> icompare = new IntervalComparator();
158 private boolean isTainted;
160 private int capacity = 8;
162 private IntervalI[] intervals = new IntervalI[capacity];
164 private int[] offsets;
166 private int[] ret = new int[1];
168 private int intervalCount;
174 private BitSet bsDeleted;
179 public IntervalStore()
184 public IntervalStore(boolean presort)
190 * Constructor given a list of intervals. Note that the list may get sorted as
191 * a side-effect of calling this constructor.
193 public IntervalStore(List<T> intervals)
195 this(intervals, true);
199 * Allows a presort option, which can speed up initial loading of individual
200 * features but will delay the first findOverlap if set to true.
205 public IntervalStore(List<T> intervals, boolean presort)
207 this(intervals, presort, null, false);
213 * intervals to initialize with (others may still be added)
215 * whether or not to presort the list as additions are made
217 * IntervalI.COMPARATOR_LITTLEENDIAN or
218 * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that
219 * sorts by description as well, for example.
221 * true if the comparator sorts [10-30] before [10-20]
223 public IntervalStore(List<T> intervals, boolean presort,
224 Comparator<? super IntervalI> comparator, boolean bigendian)
226 if (intervals != null)
229 this.intervals = new IntervalI[capacity = intervalCount = intervals
232 DO_PRESORT = presort;
233 icompare = (comparator != null ? comparator
234 : bigendian ? IntervalI.COMPARATOR_BIGENDIAN
235 : IntervalI.COMPARATOR_LITTLEENDIAN);
236 this.bigendian = bigendian;
238 if (DO_PRESORT && intervalCount > 1)
242 isSorted = DO_PRESORT;
247 * Adds one interval to the store, allowing duplicates.
252 public boolean add(T interval)
254 return add(interval, true);
258 * Adds one interval to the store, optionally checking for duplicates.
261 * @param allowDuplicates
263 public boolean add(T interval, boolean allowDuplicates)
265 if (interval == null)
280 synchronized (intervals)
282 int index = intervalCount;
283 int start = interval.getBegin();
285 if (intervalCount + added + 1 >= capacity)
287 intervals = finalizeAddition(
288 new IntervalI[capacity = capacity << 1]);
292 if (DO_PRESORT && isSorted)
294 if (intervalCount == 0)
300 index = findInterval(interval);
301 if (!allowDuplicates && index >= 0)
318 if (!allowDuplicates && findInterval(interval) >= 0)
325 if (index == intervalCount)
327 intervals[intervalCount++] = interval;
328 // System.out.println("added " + intervalCount + " " + interval);
332 int pt = capacity - ++added;
333 intervals[pt] = interval;
334 // System.out.println("stashed " + pt + " " + interval + " for "
335 // + index + " " + intervals[index]);
338 offsets = new int[capacity];
341 offsets[pt] = offsets[index];
346 minStart = Math.min(minStart, start);
347 maxStart = Math.max(maxStart, start);
352 private IntervalI[] finalizeAddition(IntervalI[] dest)
360 if (intervalCount > 0 && dest != intervals)
362 System.arraycopy(intervals, 0, dest, 0, intervalCount);
364 capacity = dest.length;
368 // array is [(intervalCount)...null...(added)]
370 int ntotal = intervalCount + added;
371 for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;)
374 while (--pt >= 0 && offsets[pt] == 0)
383 // shift upper intervals right
387 System.arraycopy(intervals, pt, dest, ptShift, nOK);
393 for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
395 dest[--ptShift] = intervals[offset];
400 intervalCount = ntotal;
401 capacity = dest.length;
405 public int binaryIdentitySearch(IntervalI interval)
407 return binaryIdentitySearch(interval, null);
410 * for remove() and contains()
416 * @return index or, if not found, -1 - "would be here"
418 public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore)
421 int r0 = interval.getBegin();
422 int r1 = interval.getEnd();
423 int end = intervalCount - 1;
424 if (end < 0 || r0 < minStart)
430 return -1 - intervalCount;
434 int mid = (start + end) >>> 1;
435 IntervalI r = intervals[mid];
436 switch (compareRange(r, r0, r1))
445 IntervalI iv = intervals[mid];
446 if ((bsIgnore == null || !bsIgnore.get(mid))
447 && iv.equalsInterval(interval))
450 // found one; just scan up and down now, first checking the range, but
451 // also checking other possible aspects of equivalence.
454 for (int i = mid; ++i <= end;)
456 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
460 if ((bsIgnore == null || !bsIgnore.get(i))
461 && iv.equalsInterval(interval))
466 for (int i = mid; --i >= start;)
468 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1)
472 if ((bsIgnore == null || !bsIgnore.get(i))
473 && iv.equalsInterval(interval))
484 // private int binaryInsertionSearch(long from, long to)
486 // int matched = intervalCount;
487 // int end = matched - 1;
488 // int start = matched;
489 // if (end < 0 || from > intervals[end].getEnd()
490 // || from < intervals[start = 0].getBegin())
492 // while (start <= end)
494 // int mid = (start + end) >>> 1;
495 // switch (compareRange(intervals[mid], from, to))
516 intervalCount = added = 0;
520 minStart = maxEnd = Integer.MAX_VALUE;
521 maxStart = Integer.MIN_VALUE;
524 * Compare an interval t to a from/to range for insertion purposes
529 * @return 0 if same, 1 if start is after from, or start equals from and
530 * [bigendian: end is before to | littleendian: end is after to], else
533 private int compareRange(IntervalI t, long from, long to)
537 System.out.println("???");
539 int order = Long.signum(t.getBegin() - from);
541 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
546 public boolean contains(Object entry)
548 if (entry == null || intervalCount == 0)
552 if (!isSorted || deleted > 0)
556 return (findInterval((IntervalI) entry) >= 0);
559 public boolean containsInterval(IntervalI outer, IntervalI inner)
562 int index = binaryIdentitySearch(inner, null);
565 while ((index = index - Math.abs(offsets[index])) >= 0)
567 if (intervals[index] == outer)
576 private void ensureFinalized()
580 if (!isSorted || added > 0 || deleted > 0)
584 if (offsets == null || offsets.length < intervalCount)
586 offsets = new int[intervalCount];
594 * Find all overlaps within the given range, inclusively.
596 * @return a list sorted in ascending order of start position
600 public List<T> findOverlaps(long from, long to)
602 List<T> list = findOverlaps(from, to, null);
603 Collections.reverse(list);
608 * Find all overlaps within the given range, inclusively.
610 * @return a list sorted in descending order of start position
614 @SuppressWarnings("unchecked")
616 public List<T> findOverlaps(long from, long to, List<T> result)
620 result = new ArrayList<>();
622 switch (intervalCount + added)
627 IntervalI sf = intervals[0];
628 if (sf.getBegin() <= to && sf.getEnd() >= from)
637 if (from > maxEnd || to < minStart)
641 int index = binaryLastIntervalSearch(from, to, ret);
648 if (index1 > index + 1)
650 while (--index1 > index)
652 result.add((T) intervals[index1]);
655 boolean isMonotonic = false;
658 IntervalI sf = intervals[index];
659 if (sf.getEnd() >= from)
663 else if (isMonotonic)
667 int offset = offsets[index];
668 isMonotonic = (offset < 0);
669 index -= (isMonotonic ? -offset : offset);
675 public IntervalI get(int i)
677 if (i < 0 || i >= intervalCount + added)
685 private int getContainedBy(int index, int begin)
689 IntervalI sf = intervals[index];
690 if (sf.getEnd() >= begin)
692 // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
693 // + "\nFS in " + sf.getIndex1() + ":" + sf);
696 index -= Math.abs(offsets[index]);
698 return IntervalI.NOT_CONTAINED;
702 public int getDepth()
705 if (intervalCount < 2)
707 return intervalCount;
710 IntervalI root = null;
711 for (int i = 0; i < intervalCount; i++)
713 IntervalI element = intervals[i];
714 if (offsets[i] == IntervalI.NOT_CONTAINED)
721 while ((index = index - Math.abs(offset = offsets[index])) >= 0)
723 element = intervals[index];
724 if (++depth > maxDepth && (element == root || offset < 0))
735 public int getWidth()
739 for (int i = offsets.length; --i >= 0;)
750 public boolean isValid()
757 * Answers an iterator over the intervals in the store, with no particular
758 * ordering guaranteed. The iterator does not support the optional
759 * <code>remove</code> operation (throws
760 * <code>UnsupportedOperationException</code> if attempted).
763 public Iterator<T> iterator()
766 return new Iterator<T>()
772 public boolean hasNext()
774 return next < intervalCount;
777 @SuppressWarnings("unchecked")
781 if (next >= intervalCount)
783 throw new NoSuchElementException();
785 return (T) intervals[next++];
791 private void linkFeatures()
793 if (intervalCount == 0)
797 maxEnd = intervals[0].getEnd();
798 offsets[0] = IntervalI.NOT_CONTAINED;
799 if (intervalCount == 1)
803 boolean isMonotonic = true;
804 for (int i = 1; i < intervalCount; i++)
806 IntervalI sf = intervals[i];
807 int begin = sf.getBegin();
808 int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1);
809 // System.out.println(sf + " is contained by "
810 // + (index < 0 ? null : starts[index]));
812 offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED
813 : isMonotonic ? index - i : i - index);
814 isMonotonic = (sf.getEnd() > maxEnd);
817 maxEnd = sf.getEnd();
824 public String prettyPrint()
826 switch (intervalCount + added)
831 return intervals[0] + "\n";
835 StringBuffer sb = new StringBuffer();
836 for (int i = 0; i < intervalCount; i++)
838 IntervalI range = intervals[i];
840 while ((index = index - Math.abs(offsets[index])) >= 0)
844 sb.append(range.toString()).append('\n');
846 return sb.toString();
850 public synchronized boolean remove(Object o)
854 // throw new NullPointerException();
856 return (o != null && intervalCount > 0
857 && removeInterval((IntervalI) o));
861 * Find the interval or return where it should go, possibly into the add
865 * @return index (nonnegative) or index where it would go (negative)
868 private int findInterval(IntervalI interval)
873 int pt = binaryIdentitySearch(interval, null);
874 // if (addPt == intervalCount || offsets[pt] == 0)
876 if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
881 int start = interval.getBegin();
882 int end = interval.getEnd();
886 while ((pt = offsets[pt]) != 0)
888 IntervalI iv = intervals[pt];
889 switch (compareRange(iv, start, end))
894 if (iv.equalsInterval(interval))
908 int i = intervalCount;
909 while (--i >= 0 && !intervals[i].equalsInterval(interval))
918 * Uses a binary search to find the entry and removes it if found.
923 protected boolean removeInterval(IntervalI interval)
926 if (!isSorted || added > 0)
930 int i = binaryIdentitySearch(interval, bsDeleted);
937 if (bsDeleted == null)
939 bsDeleted = new BitSet(intervalCount);
948 return (isTainted = true);
951 private void finalizeDeletion()
958 // ......xxx.....xxxx.....xxxxx....
962 for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;)
964 i = bsDeleted.nextClearBit(i + 1);
965 int pt1 = bsDeleted.nextSetBit(i + 1);
971 System.arraycopy(intervals, i, intervals, pt, n);
973 if (pt1 == intervalCount)
975 for (i = pt1; --i >= pt;)
979 intervalCount -= deleted;
991 public boolean revalidate()
1002 return intervalCount + added - deleted;
1006 public Object[] toArray()
1009 return super.toArray();
1013 * Sort intervals by start (lowest first) and end (highest first).
1019 intervals = finalizeAddition(new IntervalI[intervalCount + added]);
1021 else if (deleted > 0)
1027 Arrays.sort(intervals, 0, intervalCount, icompare);
1029 updateMinMaxStart();
1033 private void updateMinMaxStart()
1035 if (intervalCount > 0)
1037 minStart = intervals[0].getBegin();
1038 maxStart = intervals[intervalCount - 1].getBegin();
1042 minStart = Integer.MAX_VALUE;
1043 maxStart = Integer.MIN_VALUE;
1048 public String toString()
1050 return prettyPrint();