4 Copyright (c) 2018, Mungo Carstairs
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
17 * Neither the name of the copyright holder nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 package intervalstore.nonc;
34 import java.util.AbstractCollection;
35 import java.util.ArrayList;
36 import java.util.Collections;
37 import java.util.Comparator;
38 import java.util.Iterator;
39 import java.util.List;
41 import intervalstore.api.IntervalI;
42 import intervalstore.api.IntervalStoreI;
47 * A Collection class to store interval-associated data, with options for "lazy"
48 * sorting so as to speed incremental construction of the data prior to issuing
52 * with O(log N) performance for overlap queries, insertion and deletion (where
53 * N is the size of the store).
55 * Accepts duplicate entries but not null values.
57 * @author Bob Hanson 2019.08.06
60 * any type providing <code>getBegin()</code>, <code>getEnd()</code>
61 * <code>getContainedBy()</code>, and <code>setContainedBy()</code>
63 public class IntervalStore<T extends IntervalI>
64 extends AbstractCollection<T> implements IntervalStoreI<T>
67 private final boolean DO_PRESORT;
69 private boolean isSorted;
71 private List<T> intervals;
73 private boolean isTainted;
75 private IntervalI[] orderedIntervalStarts;
77 private static Comparator<IntervalI> icompare = new IntervalComparator();
81 System.out.println("intervalstore.nonc.IntervalStore initialized");
84 // private Comparator<IntervalI> icompare = new IntervalComparator();
89 public IntervalStore()
92 intervals = new ArrayList<>();
96 public IntervalStore(boolean presort)
98 intervals = new ArrayList<>();
103 * Constructor given a list of intervals. Note that the list may get sorted as
104 * a side-effect of calling this constructor.
106 public IntervalStore(List<T> intervals)
108 this(intervals, true);
112 * Allows a presort option, which can speed up initial loading of individual
113 * features but will delay the first findOverlap if set to true.
118 public IntervalStore(List<T> intervals, boolean presort)
120 this.intervals = intervals;
121 DO_PRESORT = presort;
130 * Adds one interval to the store.
135 public boolean add(T interval)
137 if (interval == null)
142 synchronized (intervals)
146 int insertPosition = findFirstBegin(intervals, interval.getBegin());
147 intervals.add(insertPosition, interval);
152 intervals.add(interval);
164 orderedIntervalStarts = null;
169 public boolean contains(Object entry)
171 return listContains(intervals, entry);
174 private void ensureFinalized()
176 if (isTainted && intervals.size() >= 2)
182 orderedIntervalStarts = intervals.toArray(new IntervalI[0]);
183 linkFeatures(orderedIntervalStarts);
189 * Sort intervals by start (lowest first) and end (highest first).
193 Collections.sort(intervals, icompare);
197 protected int findFirstBegin(List<T> list, long pos)
200 int end = list.size() - 1;
201 int matched = list.size();
205 int mid = (start + end) / 2;
206 if (list.get(mid).getBegin() >= pos)
219 protected int findFirstEnd(List<T> list, long pos)
222 int end = list.size() - 1;
223 int matched = list.size();
227 int mid = (start + end) / 2;
228 if (list.get(mid).getEnd() >= pos)
242 * Adds non-nested intervals to the result list that lie within the target
249 protected void findIntervalOverlaps(long from, long to,
253 int startIndex = findFirstEnd(intervals, from);
254 final int startIndex1 = startIndex;
256 while (i < intervals.size())
258 T sf = intervals.get(i);
259 if (sf.getBegin() > to)
263 if (sf.getBegin() <= to && sf.getEnd() >= from)
273 public List<T> findOverlaps(long start, long end)
275 return findOverlaps(start, end, null);
278 @SuppressWarnings("unchecked")
280 public List<T> findOverlaps(long start, long end, List<T> result)
285 result = new ArrayList<>();
287 int n = intervals.size();
293 T sf = intervals.get(0);
294 if (sf.getBegin() <= end && sf.getEnd() >= start)
303 // (1) Find the closest feature to this position.
305 int index = getClosestFeature(orderedIntervalStarts, start);
306 IntervalI sf = (index < 0 ? null : orderedIntervalStarts[index]);
308 // (2) Traverse the containedBy field, checking for overlap.
312 if (sf.getEnd() >= start)
316 sf = sf.getContainedBy();
319 // (3) For an interval, find the last feature that starts in this interval,
320 // and add all features up through that feature.
324 // fill in with all features that start within this interval, fully
326 int index2 = getClosestFeature(orderedIntervalStarts, end);
327 while (++index <= index2)
329 result.add((T) orderedIntervalStarts[index]);
336 private int getClosestFeature(IntervalI[] l, long pos)
339 int high = l.length - 1;
342 int mid = (low + high) >>> 1;
343 IntervalI f = l[mid];
344 switch (Long.signum(f.getBegin() - pos))
354 while (++mid <= high && l[mid].getBegin() == pos)
361 return (high < 0 ? -1 : high);
364 @SuppressWarnings("unchecked")
365 public T getContainedBy(T sf, T sf0)
367 int begin = sf0.getBegin();
370 if (begin <= sf.getEnd())
372 // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
373 // + "\nFS in " + sf.getIndex1() + ":" + sf);
376 sf = (T) sf.getContainedBy();
382 public int getDepth()
384 int n = intervals.size();
392 for (int i = 0; i < n; i++)
394 T element = intervals.get(i);
395 IntervalI container = element;
396 if (element.getContainedBy() == null)
401 while ((container = container.getContainedBy()) != null)
403 if (++depth > maxDepth && container == root)
414 public boolean isValid()
421 * Answers an iterator over the intervals in the store, with no particular
422 * ordering guaranteed. The iterator does not support the optional
423 * <code>remove</code> operation (throws
424 * <code>UnsupportedOperationException</code> if attempted).
427 public Iterator<T> iterator()
429 return intervals.iterator();
433 @SuppressWarnings("unchecked")
434 private void linkFeatures(IntervalI[] features)
436 int n = features.length;
441 int maxEnd = features[0].getEnd();
442 for (int i = 1; i < n; i++)
444 T sf = (T) features[i];
445 if (sf.getBegin() <= maxEnd)
447 sf.setContainedBy(getContainedBy((T) features[i - 1], sf));
449 if (sf.getEnd() > maxEnd)
451 maxEnd = sf.getEnd();
458 * Answers true if the list contains the interval, else false. This method is
459 * optimised for the condition that the list is sorted on interval start
460 * position ascending, and will give unreliable results if this does not hold.
466 protected boolean listContains(List<T> intervals, Object entry)
468 if (intervals == null || entry == null)
475 return intervals.contains(entry);
478 @SuppressWarnings("unchecked")
479 T interval = (T) entry;
482 * locate the first entry in the list which does not precede the interval
484 int pos = findFirstBegin(intervals, interval.getBegin());
485 int len = intervals.size();
488 T sf = intervals.get(pos);
489 if (sf.getBegin() > interval.getBegin())
491 return false; // no match found
493 if (sf.getEnd() == interval.getEnd() && sf.equals(interval))
503 public synchronized boolean remove(Object o)
507 // throw new NullPointerException();
509 return (o != null && intervals.size() > 0
510 && removeInterval((IntervalI) o));
514 * Uses a binary search to find the entry and removes it if found.
519 protected boolean removeInterval(IntervalI entry)
523 return intervals.remove(entry);
527 * find the first interval that might match, i.e. whose
528 * start position is not less than the target range start
529 * (NB inequality test ensures the first match if any is found)
531 int startIndex = findFirstBegin(intervals, entry.getBegin());
534 * traverse intervals to look for a match
536 int from = entry.getBegin();
537 int to = entry.getEnd();
538 for (int i = startIndex, size = intervals.size(); i < size; i++)
540 T sf = intervals.get(i);
541 if (sf.getBegin() > from)
545 if (sf.getEnd() == to && sf.equals(entry))
547 intervals.remove(i).setContainedBy(null);
548 return (isTainted = true);
557 return intervals.size();
561 public String toString()
563 return prettyPrint();
567 public int getWidth()
571 for (int i = intervals.size(); --i >= 0;)
573 if (intervals.get(i).getContainedBy() == null)
582 public String prettyPrint()
584 int n = intervals.size();
591 return intervals.get(0) + "\n";
595 StringBuffer sb = new StringBuffer();
596 for (int i = 0; i < n; i++)
598 IntervalI range = orderedIntervalStarts[i];
599 IntervalI container = range.getContainedBy();
600 while (container != null)
603 container = container.getContainedBy();
605 sb.append(range.toString()).append('\n');
607 return sb.toString();
611 public boolean revalidate()
620 public IntervalI get(int i)
623 return (i < 0 || i >= orderedIntervalStarts.length ? null
624 : orderedIntervalStarts[i]);