4 Copyright (c) 2018, Mungo Carstairs
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
17 * Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 package intervalstore.nonc;
34 import java.util.AbstractCollection;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.Comparator;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.NoSuchElementException;
44 import intervalstore.api.IntervalI;
45 import intervalstore.api.IntervalStoreI;
49 * A second idea, doing a double binary sort for the full interval. Seemed like
50 * a good idea, but is 50% slower.
52 * A Collection class to store interval-associated data, with options for "lazy"
53 * sorting so as to speed incremental construction of the data prior to issuing
57 * Accepts duplicate entries but not null values.
61 * @author Bob Hanson 2019.08.06
64 * any type providing <code>getBegin()</code>, <code>getEnd()</code>
65 * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
67 public class IntervalStore<T extends IntervalI>
68 extends AbstractCollection<T> implements IntervalStoreI<T>
72 * Search for the last interval that starts before or at the specified from/to
73 * range and the first interval that starts after it. In the situation that
74 * there are multiple intervals starting at from, this method returns the
83 public int binaryLastIntervalSearch(long from,
86 int start = 0, start2 = 0;
88 int end = intervalCount - 1, end2 = intervalCount;
93 mid = (start + end) >>> 1;
96 switch (Long.signum(begin - from))
117 start = Math.max(start2, end);
122 mid = (start + end) >>> 1;
124 begin = e.getBegin();
140 * My preference is for a bigendian comparison, but you may differ.
142 private Comparator<? super IntervalI> icompare;
145 * bigendian is what NCList does; change icompare to switch to that
147 private boolean bigendian;
149 private final boolean DO_PRESORT;
151 private boolean isSorted;
153 private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
154 maxEnd = Integer.MAX_VALUE;
156 // private Comparator<IntervalI> icompare = new IntervalComparator();
158 private boolean isTainted;
160 private int capacity = 8;
162 private IntervalI[] intervals = new IntervalI[capacity];
164 private int[] offsets;
166 private int[] ret = new int[1];
168 private int intervalCount;
174 private BitSet bsDeleted;
179 public IntervalStore()
184 public IntervalStore(boolean presort)
190 * Constructor given a list of intervals. Note that the list may get sorted as
191 * a side-effect of calling this constructor.
193 public IntervalStore(List<T> intervals)
195 this(intervals, true);
199 * Allows a presort option, which can speed up initial loading of individual
200 * features but will delay the first findOverlap if set to true.
205 public IntervalStore(List<T> intervals, boolean presort)
207 this(intervals, presort, null, false);
213 * intervals to initialize with (others may still be added)
215 * whether or not to presort the list as additions are made
217 * IntervalI.COMPARATOR_LITTLEENDIAN or
218 * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that
219 * sorts by description as well, for example.
221 * true if the comparator sorts [10-30] before [10-20]
223 public IntervalStore(List<T> intervals, boolean presort,
224 Comparator<? super IntervalI> comparator, boolean bigendian)
226 if (intervals != null)
229 this.intervals = new IntervalI[capacity = intervalCount = intervals
232 DO_PRESORT = presort;
233 icompare = (comparator != null ? comparator
234 : bigendian ? IntervalI.COMPARATOR_BIGENDIAN
235 : IntervalI.COMPARATOR_LITTLEENDIAN);
236 this.bigendian = bigendian;
238 if (DO_PRESORT && intervalCount > 1)
242 isSorted = DO_PRESORT;
247 * Adds one interval to the store, allowing duplicates.
252 public boolean add(T interval)
254 return add(interval, true);
258 * Adds one interval to the store, optionally checking for duplicates.
261 * @param allowDuplicates
263 public boolean add(T interval, boolean allowDuplicates)
265 if (interval == null)
278 synchronized (intervals)
280 int index = intervalCount;
281 int start = interval.getBegin();
283 if (intervalCount + added + 1 >= capacity)
285 intervals = finalizeAddition(
286 new IntervalI[capacity = capacity << 1]);
290 if (DO_PRESORT && isSorted)
292 if (intervalCount == 0)
298 index = findInterval(interval);
299 if (!allowDuplicates && index >= 0)
312 if (!allowDuplicates && findInterval(interval) >= 0)
319 if (index == intervalCount)
321 intervals[intervalCount++] = interval;
322 // System.out.println("added " + intervalCount + " " + interval);
326 int pt = capacity - ++added;
327 intervals[pt] = interval;
328 // System.out.println("stashed " + pt + " " + interval + " for "
329 // + index + " " + intervals[index]);
331 offsets = new int[capacity];
333 offsets[pt] = offsets[index];
338 minStart = Math.min(minStart, start);
339 maxStart = Math.max(maxStart, start);
344 private IntervalI[] finalizeAddition(IntervalI[] dest)
350 if (intervalCount > 0 && dest != intervals)
352 System.arraycopy(intervals, 0, dest, 0, intervalCount);
354 capacity = dest.length;
358 // array is [(intervalCount)...null...(added)]
360 int ntotal = intervalCount + added;
361 for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;)
364 while (--pt >= 0 && offsets[pt] == 0)
369 // shift upper intervals right
372 System.arraycopy(intervals, pt, dest, ptShift, nOK);
375 for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
377 dest[--ptShift] = intervals[offset];
382 intervalCount = ntotal;
383 capacity = dest.length;
387 public int binaryIdentitySearch(IntervalI interval)
389 return binaryIdentitySearch(interval, null);
392 * for remove() and contains()
398 * @return index or, if not found, -1 - "would be here"
400 public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore)
403 int r0 = interval.getBegin();
404 int r1 = interval.getEnd();
405 int end = intervalCount - 1;
406 if (end < 0 || r0 < minStart)
409 return -1 - intervalCount;
412 int mid = (start + end) >>> 1;
413 IntervalI r = intervals[mid];
414 switch (compareRange(r, r0, r1))
423 IntervalI iv = intervals[mid];
424 if ((bsIgnore == null || !bsIgnore.get(mid))
425 && iv.equalsInterval(interval))
427 // found one; just scan up and down now, first checking the range, but
428 // also checking other possible aspects of equivalence.
430 for (int i = mid; ++i <= end;)
432 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
434 if ((bsIgnore == null || !bsIgnore.get(i))
435 && iv.equalsInterval(interval))
438 for (int i = mid; --i >= start;)
440 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1)
442 if ((bsIgnore == null || !bsIgnore.get(i))
443 && iv.equalsInterval(interval))
452 // private int binaryInsertionSearch(long from, long to)
454 // int matched = intervalCount;
455 // int end = matched - 1;
456 // int start = matched;
457 // if (end < 0 || from > intervals[end].getEnd()
458 // || from < intervals[start = 0].getBegin())
460 // while (start <= end)
462 // int mid = (start + end) >>> 1;
463 // switch (compareRange(intervals[mid], from, to))
484 intervalCount = added = 0;
488 minStart = maxEnd = Integer.MAX_VALUE;
489 maxStart = Integer.MIN_VALUE;
492 * Compare an interval t to a from/to range for insertion purposes
497 * @return 0 if same, 1 if start is after from, or start equals from and
498 * [bigendian: end is before to | littleendian: end is after to], else
501 private int compareRange(IntervalI t, long from, long to)
504 System.out.println("???");
505 int order = Long.signum(t.getBegin() - from);
507 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
512 public boolean contains(Object entry)
514 if (entry == null || intervalCount == 0)
516 if (!isSorted || deleted > 0)
520 return (findInterval((IntervalI) entry) >= 0);
523 public boolean containsInterval(IntervalI outer, IntervalI inner)
526 int index = binaryIdentitySearch(inner, null);
528 while ((index = index - Math.abs(offsets[index])) >= 0)
530 if (intervals[index] == outer)
538 private void ensureFinalized()
540 if (isTainted && intervalCount + added > 1)
542 if (!isSorted || added > 0 || deleted > 0)
546 if (offsets == null || offsets.length < intervalCount)
547 offsets = new int[intervalCount];
554 * Find all overlaps within the given range, inclusively.
556 * @return a list sorted in ascending order of start position
560 public List<T> findOverlaps(long from, long to)
562 List<T> list = findOverlaps(from, to, null);
563 Collections.reverse(list);
568 * Find all overlaps within the given range, inclusively.
570 * @return a list sorted in descending order of start position
574 @SuppressWarnings("unchecked")
576 public List<T> findOverlaps(long from, long to, List<T> result)
580 result = new ArrayList<>();
582 switch (intervalCount)
587 IntervalI sf = intervals[0];
588 if (sf.getBegin() <= to && sf.getEnd() >= from)
597 if (from > maxEnd || to < minStart)
599 int index = binaryLastIntervalSearch(from, to, ret);
604 if (index1 > index + 1)
606 while (--index1 > index)
608 result.add((T) intervals[index1]);
611 boolean isMonotonic = false;
614 IntervalI sf = intervals[index];
615 if (sf.getEnd() >= from)
619 else if (isMonotonic)
623 int offset = offsets[index];
624 isMonotonic = (offset < 0);
625 index -= (isMonotonic ? -offset : offset);
631 public IntervalI get(int i)
633 if (i < 0 || i >= intervalCount + added)
639 private int getContainedBy(int index, int begin)
643 IntervalI sf = intervals[index];
644 if (sf.getEnd() >= begin)
646 // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
647 // + "\nFS in " + sf.getIndex1() + ":" + sf);
650 index -= Math.abs(offsets[index]);
652 return IntervalI.NOT_CONTAINED;
656 public int getDepth()
659 if (intervalCount < 2)
661 return intervalCount;
664 IntervalI root = null;
665 for (int i = 0; i < intervalCount; i++)
667 IntervalI element = intervals[i];
668 if (offsets[i] == IntervalI.NOT_CONTAINED)
675 while ((index = index - Math.abs(offset = offsets[index])) >= 0)
677 element = intervals[index];
678 if (++depth > maxDepth && (element == root || offset < 0))
689 public int getWidth()
693 for (int i = offsets.length; --i >= 0;)
704 public boolean isValid()
711 * Answers an iterator over the intervals in the store, with no particular
712 * ordering guaranteed. The iterator does not support the optional
713 * <code>remove</code> operation (throws
714 * <code>UnsupportedOperationException</code> if attempted).
717 public Iterator<T> iterator()
719 finalizeAddition(null);
720 return new Iterator<T>()
726 public boolean hasNext()
728 return next < intervalCount;
731 @SuppressWarnings("unchecked")
735 if (next >= intervalCount)
736 throw new NoSuchElementException();
737 return (T) intervals[next++];
743 private void linkFeatures()
745 if (intervalCount == 0)
747 maxEnd = intervals[0].getEnd();
748 offsets[0] = IntervalI.NOT_CONTAINED;
749 if (intervalCount == 1)
753 boolean isMonotonic = true;
754 for (int i = 1; i < intervalCount; i++)
756 IntervalI sf = intervals[i];
757 int begin = sf.getBegin();
758 int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1);
759 // System.out.println(sf + " is contained by "
760 // + (index < 0 ? null : starts[index]));
762 offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED
763 : isMonotonic ? index - i : i - index);
764 isMonotonic = (sf.getEnd() > maxEnd);
767 maxEnd = sf.getEnd();
774 public String prettyPrint()
776 switch (intervalCount)
781 return intervals[0] + "\n";
785 StringBuffer sb = new StringBuffer();
786 for (int i = 0; i < intervalCount; i++)
788 IntervalI range = intervals[i];
790 while ((index = index - Math.abs(offsets[index])) >= 0)
794 sb.append(range.toString()).append('\n');
796 return sb.toString();
800 public synchronized boolean remove(Object o)
804 // throw new NullPointerException();
806 return (o != null && intervalCount > 0
807 && removeInterval((IntervalI) o));
811 * Find the interval or return where it should go, possibly into the add
815 * @return index (nonnegative) or index where it would go (negative)
818 private int findInterval(IntervalI interval)
823 int pt = binaryIdentitySearch(interval, null);
824 // if (addPt == intervalCount || offsets[pt] == 0)
826 if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
829 int start = interval.getBegin();
830 int end = interval.getEnd();
834 while ((pt = offsets[pt]) != 0)
836 IntervalI iv = intervals[pt];
837 switch (compareRange(iv, start, end))
842 if (iv.equalsInterval(interval))
854 int i = intervalCount;
855 while (--i >= 0 && !intervals[i].equalsInterval(interval))
862 * Uses a binary search to find the entry and removes it if found.
867 protected boolean removeInterval(IntervalI interval)
870 if (!isSorted || added > 0)
874 int i = binaryIdentitySearch(interval, bsDeleted);
879 if (bsDeleted == null)
880 bsDeleted = new BitSet(intervalCount);
886 return (isTainted = true);
889 private void finalizeDeletion()
894 // ......xxx.....xxxx.....xxxxx....
898 for (int i = bsDeleted.nextSetBit(0); i >= 0;)
901 i = bsDeleted.nextClearBit(i + 1);
902 int pt1 = bsDeleted.nextSetBit(i + 1);
907 System.arraycopy(intervals, i, intervals, pt, pt1 - i);
910 intervalCount -= deleted;
917 public boolean revalidate()
928 return intervalCount + added - deleted;
932 * Sort intervals by start (lowest first) and end (highest first).
938 intervals = finalizeAddition(new IntervalI[intervalCount + added]);
940 else if (deleted > 0)
946 Arrays.sort(intervals, 0, intervalCount, icompare);
952 private void updateMinMaxStart()
954 if (intervalCount > 0)
956 minStart = intervals[0].getBegin();
957 maxStart = intervals[intervalCount - 1].getBegin();
961 minStart = Integer.MAX_VALUE;
962 maxStart = Integer.MIN_VALUE;
967 public String toString()
969 return prettyPrint();