4 Copyright (c) 2018, Mungo Carstairs
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
17 * Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 package intervalstore.nonc;
34 import jalview.datamodel.SequenceFeature;
36 import java.util.AbstractCollection;
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.BitSet;
40 import java.util.Collections;
41 import java.util.Comparator;
42 import java.util.Iterator;
43 import java.util.List;
44 import java.util.NoSuchElementException;
46 import intervalstore.api.IntervalI;
47 import intervalstore.api.IntervalStoreI;
51 * A second idea, doing a double binary sort for the full interval. Seemed like
52 * a good idea, but is 50% slower.
54 * A Collection class to store interval-associated data, with options for "lazy"
55 * sorting so as to speed incremental construction of the data prior to issuing
59 * Accepts duplicate entries but not null values.
63 * @author Bob Hanson 2019.08.06
66 * any type providing <code>getBegin()</code>, <code>getEnd()</code>
67 * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
69 public class IntervalStore0<T extends IntervalI>
70 extends AbstractCollection<T> implements IntervalStoreI<T>
73 static int NOT_CONTAINED = Integer.MIN_VALUE;
75 static int CONTAINMENT_UNKNOWN = 0;
78 * Search for the last interval that starts before or at the specified from/to
79 * range and the first interval that starts after it. In the situation that
80 * there are multiple intervals starting at from, this method returns the
89 public int binaryLastIntervalSearch(long from, long to, int[] ret)
91 int start = 0, start2 = 0;
93 int end = intervalCount - 1, end2 = intervalCount;
98 mid = (start + end) >>> 1;
100 begin = e.getBegin();
101 switch (Long.signum(begin - from))
122 start = Math.max(start2, end);
127 mid = (start + end) >>> 1;
129 begin = e.getBegin();
145 * My preference is for a bigendian comparison, but you may differ.
147 private Comparator<? super IntervalI> icompare;
150 * bigendian is what NCList does; change icompare to switch to that
152 private boolean bigendian;
154 private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
155 maxEnd = Integer.MAX_VALUE;
157 // private Comparator<IntervalI> icompare = new IntervalComparator();
159 private boolean isTainted;
161 private int capacity = 8;
163 protected IntervalI[] intervals = new IntervalI[capacity];
165 private int[] offsets;
167 private int[] ret = new int[1];
169 protected int intervalCount;
175 private BitSet bsDeleted;
180 public IntervalStore0()
186 * Constructor given a list of intervals. Note that the list may get sorted as
187 * a side-effect of calling this constructor.
189 public IntervalStore0(List<T> intervals)
191 this(intervals, null, false);
197 * intervals to initialize with (others may still be added)
199 * IntervalI.COMPARATOR_BEGIN_ASC_END_ASC (default) or
200 * IntervalI.COMPARE_BEGIN_ASC_END_DESC, but this could also be one
201 * that sorts by description as well, for example.
203 * true if the comparator sorts [10-30] before [10-20]
205 public IntervalStore0(List<T> intervals,
206 Comparator<? super IntervalI> comparator, boolean bigendian)
208 if (intervals != null)
211 this.intervals = new IntervalI[capacity = intervalCount = intervals
214 // COMPARE_BEGIN_ASC_END_DESC is standard, meaning
215 // the order will be [10,100] before [10,80]
216 // this order doesn't really matter much.
217 icompare = (comparator != null ? comparator
218 : bigendian ? IntervalI.COMPARE_BEGIN_ASC_END_DESC
219 : IntervalI.COMPARE_BEGIN_ASC_END_ASC);
220 this.bigendian = bigendian;
231 * Adds one interval to the store, allowing duplicates.
236 public boolean add(T interval)
238 return add(interval, true);
242 * Adds one interval to the store, optionally checking for duplicates.
245 * @param allowDuplicates
248 public boolean add(T interval, boolean allowDuplicates)
250 if (interval == null)
265 synchronized (intervals)
267 int index = intervalCount;
268 int start = interval.getBegin();
270 if (intervalCount + added + 1 >= capacity)
272 intervals = finalizeAddition(
273 new IntervalI[capacity = capacity << 1]);
277 // if (DO_PRESORT && isSorted)
279 if (intervalCount == 0)
285 index = findInterval(interval, allowDuplicates);
286 if (!allowDuplicates && index >= 0)
303 // if (!allowDuplicates && findInterval(interval) >= 0)
310 if (index == intervalCount)
312 intervals[intervalCount++] = interval;
313 // System.out.println("added " + intervalCount + " " + interval);
317 int pt = capacity - ++added;
318 intervals[pt] = interval;
319 // System.out.println("stashed " + pt + " " + interval + " for "
320 // + index + " " + intervals[index]);
323 offsets = new int[capacity];
326 offsets[pt] = offsets[index];
331 minStart = Math.min(minStart, start);
332 maxStart = Math.max(maxStart, start);
337 private IntervalI[] finalizeAddition(IntervalI[] dest)
345 if (intervalCount > 0 && dest != intervals)
347 System.arraycopy(intervals, 0, dest, 0, intervalCount);
349 capacity = dest.length;
353 // array is [(intervalCount)...null...(added)]
355 int ntotal = intervalCount + added;
356 for (int ptShift = ntotal, pt = intervalCount; pt >= 0;)
359 while (--pt >= 0 && offsets[pt] == 0)
368 // shift upper intervals right
372 System.arraycopy(intervals, pt, dest, ptShift, nOK);
378 for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
380 dest[--ptShift] = intervals[offset];
385 intervalCount = ntotal;
386 capacity = dest.length;
391 * A binary search for a duplicate.
396 public int binaryIdentitySearch(IntervalI interval)
398 return binaryIdentitySearch(interval, null, false);
402 * for remove() and contains()
409 * don't do a full identity check, just a range check
410 * @return index or, if not found, -1 - "would be here"
412 private int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore,
416 int r0 = interval.getBegin();
417 int r1 = interval.getEnd();
418 int end = intervalCount - 1;
419 if (end < 0 || r0 < minStart)
425 return -1 - intervalCount;
429 int mid = (start + end) >>> 1;
430 IntervalI r = intervals[mid];
431 switch (compareRange(r, r0, r1))
440 IntervalI iv = intervals[mid];
441 if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(mid))
442 && sameInterval(interval, iv))
445 // found one; just scan up and down now, first checking the range, but
446 // also checking other possible aspects of equivalence.
449 for (int i = mid; ++i <= end;)
451 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
455 if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(i))
456 && sameInterval(interval, iv))
461 for (int i = mid; --i >= start;)
463 if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1)
467 if ((bsIgnore == null || !bsIgnore.get(i))
468 && sameInterval(interval, iv))
479 // private int binaryInsertionSearch(long from, long to)
481 // int matched = intervalCount;
482 // int end = matched - 1;
483 // int start = matched;
484 // if (end < 0 || from > intervals[end].getEnd()
485 // || from < intervals[start = 0].getBegin())
487 // while (start <= end)
489 // int mid = (start + end) >>> 1;
490 // switch (compareRange(intervals[mid], from, to))
510 intervalCount = added = 0;
514 minStart = maxEnd = Integer.MAX_VALUE;
515 maxStart = Integer.MIN_VALUE;
519 * Compare an interval t to a from/to range for insertion purposes
524 * @return 0 if same, 1 if start is after from, or start equals from and
525 * [bigendian: end is before to | littleendian: end is after to], else
528 private int compareRange(IntervalI t, long from, long to)
530 int order = Long.signum(t.getBegin() - from);
532 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
537 public boolean contains(Object entry)
539 if (entry == null || intervalCount == 0)
548 return (findInterval((IntervalI) entry, false) >= 0);
551 public boolean containsInterval(IntervalI outer, IntervalI inner)
554 int index = binaryIdentitySearch(inner);
557 while ((index = index - Math.abs(offsets[index])) >= 0)
559 if (intervals[index] == outer)
568 private void ensureFinalized()
573 added > 0 || deleted > 0)
577 if (offsets == null || offsets.length < intervalCount)
579 offsets = new int[intervalCount];
587 * Find all overlaps within the given range, inclusively.
589 * @return a list sorted in ascending order of start position
593 public List<T> findOverlaps(long from, long to)
595 List<T> list = findOverlaps(from, to, null);
596 Collections.reverse(list);
601 * Find all overlaps within the given range, inclusively.
603 * @return a list sorted in descending order of start position
607 @SuppressWarnings("unchecked")
609 public List<T> findOverlaps(long from, long to, List<T> result)
613 result = new ArrayList<>();
615 switch (intervalCount + added)
620 IntervalI sf = intervals[0];
621 if (sf.getBegin() <= to && sf.getEnd() >= from)
630 if (from > maxEnd || to < minStart)
634 int index = binaryLastIntervalSearch(from, to, ret);
641 if (index1 > index + 1)
643 while (--index1 > index)
645 result.add((T) intervals[index1]);
648 boolean isMonotonic = false;
651 IntervalI sf = intervals[index];
652 if (sf.getEnd() >= from)
656 else if (isMonotonic)
660 int offset = offsets[index];
661 isMonotonic = (offset < 0);
662 index -= (isMonotonic ? -offset : offset);
667 public IntervalI get(int i)
669 if (i < 0 || i >= intervalCount + added)
677 private int getContainedBy(int index, int begin)
681 IntervalI sf = intervals[index];
682 if (sf.getEnd() >= begin)
684 // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
685 // + "\nFS in " + sf.getIndex1() + ":" + sf);
688 index -= Math.abs(offsets[index]);
690 return NOT_CONTAINED;
694 public int getDepth()
697 if (intervalCount < 2)
699 return intervalCount;
702 IntervalI root = null;
703 for (int i = 0; i < intervalCount; i++)
705 IntervalI element = intervals[i];
706 if (offsets[i] == NOT_CONTAINED)
713 while ((index = index - Math.abs(offset = offsets[index])) >= 0)
715 element = intervals[index];
716 if (++depth > maxDepth && (element == root || offset < 0))
726 public int getWidth()
730 for (int i = offsets.length; --i >= 0;)
740 public boolean isValid()
747 * Answers an iterator over the intervals in the store, with no particular
748 * ordering guaranteed. The iterator does not support the optional
749 * <code>remove</code> operation (throws
750 * <code>UnsupportedOperationException</code> if attempted).
753 public Iterator<T> iterator()
756 return new Iterator<T>()
762 public boolean hasNext()
764 return next < intervalCount;
767 @SuppressWarnings("unchecked")
771 if (next >= intervalCount)
773 throw new NoSuchElementException();
775 return (T) intervals[next++];
781 private void linkFeatures()
783 if (intervalCount == 0)
787 maxEnd = intervals[0].getEnd();
788 offsets[0] = NOT_CONTAINED;
789 if (intervalCount == 1)
793 boolean isMonotonic = true;
794 for (int i = 1; i < intervalCount; i++)
796 IntervalI sf = intervals[i];
797 int begin = sf.getBegin();
798 int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1);
799 // System.out.println(sf + " is contained by "
800 // + (index < 0 ? null : starts[index]));
802 offsets[i] = (index < 0 ? NOT_CONTAINED
803 : isMonotonic ? index - i : i - index);
804 isMonotonic = (sf.getEnd() > maxEnd);
807 maxEnd = sf.getEnd();
814 public String prettyPrint()
816 switch (intervalCount + added)
821 return intervals[0] + "\n";
825 StringBuffer sb = new StringBuffer();
826 for (int i = 0; i < intervalCount; i++)
828 IntervalI range = intervals[i];
830 while ((index = index - Math.abs(offsets[index])) >= 0)
834 sb.append(range.toString()).append('\n');
836 return sb.toString();
840 public synchronized boolean remove(Object o)
844 // throw new NullPointerException();
846 return (o != null && intervalCount > 0
847 && removeInterval((IntervalI) o));
851 * Find the interval or return where it should go, possibly into the add
856 * don't do a full check for identity, just check for range
857 * @return index (nonnegative) or index where it would go (negative)
860 private int findInterval(IntervalI interval, boolean rangeOnly)
865 int pt = binaryIdentitySearch(interval, null, rangeOnly);
866 // if (addPt == intervalCount || offsets[pt] == 0)
868 if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
873 int start = interval.getBegin();
874 int end = interval.getEnd();
878 while ((pt = offsets[pt]) != 0)
880 IntervalI iv = intervals[pt];
881 switch (compareRange(iv, start, end))
886 if (!rangeOnly && sameInterval(iv, interval))
900 // int i = intervalCount;
901 // while (--i >= 0 && !sameRange(intervals[i], interval))
910 * Uses a binary search to find the entry and removes it if found.
915 protected boolean removeInterval(IntervalI interval)
923 int i = binaryIdentitySearch(interval, bsDeleted, false);
930 if (bsDeleted == null)
932 bsDeleted = new BitSet(intervalCount);
941 return (isTainted = true);
944 private void finalizeDeletion()
951 // ......xxx.....xxxx.....xxxxx....
955 for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;)
957 i = bsDeleted.nextClearBit(i + 1);
958 int pt1 = bsDeleted.nextSetBit(i + 1);
964 System.arraycopy(intervals, i, intervals, pt, n);
966 if (pt1 == intervalCount)
968 for (i = pt1; --i >= pt;)
972 intervalCount -= deleted;
982 public boolean revalidate()
993 return intervalCount + added - deleted;
997 public Object[] toArray()
1000 return super.toArray();
1004 * Sort intervals by start (lowest first) and end (highest first).
1010 intervals = finalizeAddition(new IntervalI[intervalCount + added]);
1012 else if (deleted > 0)
1018 Arrays.sort(intervals, 0, intervalCount, icompare);
1020 updateMinMaxStart();
1024 private void updateMinMaxStart()
1026 if (intervalCount > 0)
1028 minStart = intervals[0].getBegin();
1029 maxStart = intervals[intervalCount - 1].getBegin();
1033 minStart = Integer.MAX_VALUE;
1034 maxStart = Integer.MIN_VALUE;
1039 public String toString()
1041 return prettyPrint();
1044 public boolean canCheckForDuplicates()
1050 * Tests for instance equality without using {@code instanceof}
1055 * @throws ClassCastException
1057 boolean sameInterval(IntervalI i1, IntervalI i2)
1059 return ((SequenceFeature) i1).equals((SequenceFeature) i2, false);