From 5bf9d1929f1821170e5052976cf4e7435fb29ab4 Mon Sep 17 00:00:00 2001 From: hansonr Date: Wed, 28 Aug 2019 10:01:03 -0500 Subject: [PATCH 1/1] JAL-3253 JAL-3397 adds (original MC) intervalstore code to implement modified api interfaces --- src/intervalstore/impl/BinarySearcher.java | 91 ++++ src/intervalstore/impl/IntervalStore.java | 543 ++++++++++++++++++++ src/intervalstore/impl/NCList.java | 752 ++++++++++++++++++++++++++++ src/intervalstore/impl/NCListBuilder.java | 143 ++++++ src/intervalstore/impl/NCNode.java | 381 ++++++++++++++ 5 files changed, 1910 insertions(+) create mode 100644 src/intervalstore/impl/BinarySearcher.java create mode 100644 src/intervalstore/impl/IntervalStore.java create mode 100644 src/intervalstore/impl/NCList.java create mode 100644 src/intervalstore/impl/NCListBuilder.java create mode 100644 src/intervalstore/impl/NCNode.java diff --git a/src/intervalstore/impl/BinarySearcher.java b/src/intervalstore/impl/BinarySearcher.java new file mode 100644 index 0000000..1086e91 --- /dev/null +++ b/src/intervalstore/impl/BinarySearcher.java @@ -0,0 +1,91 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import java.util.List; +import java.util.function.Function; + +/** + * Provides a method to perform binary search of an ordered list for the first + * entry that satisfies a supplied condition + * + * @author gmcarstairs + */ +public final class BinarySearcher +{ + private BinarySearcher() + { + } + + /** + * Performs a binary search of the list to find the index of the first entry + * for which the test returns true. Answers the length of the list if there is + * no such entry. + *

+ * For correct behaviour, the provided list must be ordered consistent with + * the test, that is, any entries returning false must precede any entries + * returning true. Note that this means that this method is not + * usable to search for equality (to find a specific entry), as all unequal + * entries will answer false to the test. To do that, use + * Collections.binarySearch instead. + * + * @param list + * @param test + * @return + * @see java.util.Collections#binarySearch(List, Object) + */ + public static int findFirst(List list, + Function test) + { + int start = 0; + int end = list.size() - 1; + int matched = list.size(); + + while (start <= end) + { + int mid = (start + end) / 2; + T entry = list.get(mid); + boolean itsTrue = test.apply(entry); + if (itsTrue) + { + matched = mid; + end = mid - 1; + } + else + { + start = mid + 1; + } + } + + return matched; + } +} diff --git a/src/intervalstore/impl/IntervalStore.java b/src/intervalstore/impl/IntervalStore.java new file mode 100644 index 0000000..8634ad4 --- /dev/null +++ b/src/intervalstore/impl/IntervalStore.java @@ -0,0 +1,543 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import java.util.AbstractCollection; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import intervalstore.api.IntervalI; +import intervalstore.api.IntervalStoreI; + +/** + * A collection class to store interval-associated data, with O(log N) + * performance for overlap queries, insertion and deletion (where N is the size + * of the store). Accepts duplicate entries but not null values. + * + * @author gmcarstairs + * + * @param + * any type providing getBegin() and getEnd() + */ +public class IntervalStore + extends AbstractCollection implements IntervalStoreI +{ + /** + * An iterator over the intervals held in this store, with no particular + * ordering guaranteed. The iterator does not support the optional + * remove operation (throws + * UnsupportedOperationException if attempted). + * + * @author gmcarstairs + * + * @param + */ + private class IntervalIterator implements Iterator + { + /* + * iterator over top level non-nested intervals + */ + Iterator topLevelIterator; + + /* + * iterator over NCList (if any) + */ + Iterator nestedIterator; + + /** + * Constructor initialises iterators over the top level list and any nested + * NCList + * + * @param intervalStore + */ + public IntervalIterator( + IntervalStore intervalStore) + { + topLevelIterator = nonNested.iterator(); + if (nested != null) + { + nestedIterator = nested.iterator(); + } + } + + @Override + public boolean hasNext() + { + return topLevelIterator.hasNext() ? true + : (nestedIterator != null && nestedIterator.hasNext()); + } + + @SuppressWarnings("unchecked") + @Override + public V next() + { + if (topLevelIterator.hasNext()) + { + return (V) topLevelIterator.next(); + } + if (nestedIterator != null) + { + return (V) nestedIterator.next(); + } + throw new NoSuchElementException(); + } + + } + + private List nonNested; + + private NCList nested; + + /** + * Constructor + */ + public IntervalStore() + { + nonNested = new ArrayList<>(); + } + + /** + * Constructor given a list of intervals. Note that the list may get sorted as + * a side-effect of calling this constructor. + */ + public IntervalStore(List intervals) + { + this(); + + /* + * partition into subranges whose root intervals + * have no mutual containment (if no intervals are nested, + * each subrange is of length 1 i.e. a single interval) + */ + List sublists = new NCListBuilder() + .partitionNestedSublists(intervals); + + /* + * add all 'subrange root intervals' (and any co-located intervals) + * to our top level list of 'non-nested' intervals; + * put aside any left over for our NCList + */ + List nested = new ArrayList<>(); + + for (IntervalI subrange : sublists) + { + int listIndex = subrange.getBegin(); + IntervalI root = intervals.get(listIndex); + while (listIndex <= subrange.getEnd()) + { + T t = intervals.get(listIndex); + if (root.equalsInterval(t)) + { + nonNested.add(t); + } + else + { + nested.add(t); + } + listIndex++; + } + } + + if (!nested.isEmpty()) + { + this.nested = new NCList<>(nested); + } + } + + /** + * Adds one interval to the store. + * + * @param interval + */ + @Override + public boolean add(T interval) + { + if (interval == null) + { + return false; + } + if (!addNonNestedInterval(interval)) + { + /* + * detected a nested interval - put it in the NCList structure + */ + addNestedInterval(interval); + } + return true; + } + + @Override + public boolean contains(Object entry) + { + if (listContains(nonNested, entry)) + { + return true; + } + + return nested == null ? false : nested.contains(entry); + } + + protected boolean addNonNestedInterval(T entry) + { + synchronized (nonNested) + { + /* + * find the first stored interval which doesn't precede the new one + */ + int insertPosition = BinarySearcher.findFirst(nonNested, + val -> val.getBegin() >= entry.getBegin()); + /* + * fail if we detect interval enclosure + * - of the new interval by the one before or after it + * - of the next interval by the new one + */ + if (insertPosition > 0) + { + if (nonNested.get(insertPosition - 1).properlyContainsInterval(entry)) + { + return false; + } + } + if (insertPosition < nonNested.size()) + { + T following = nonNested.get(insertPosition); + if (entry.properlyContainsInterval(following) + || following.properlyContainsInterval(entry)) + { + return false; + } + } + + /* + * checks passed - add the interval + */ + nonNested.add(insertPosition, entry); + + return true; + } + } + + @Override + public List findOverlaps(long from, long to) + { + List result = new ArrayList<>(); + + findNonNestedOverlaps(from, to, result); + + if (nested != null) + { + result.addAll(nested.findOverlaps(from, to)); + } + + return result; + } + + @Override + public String prettyPrint() + { + String pp = nonNested.toString(); + if (nested != null) + { + pp += System.lineSeparator() + nested.prettyPrint(); + } + return pp; + } + + @Override + public boolean isValid() + { + for (int i = 0; i < nonNested.size() - 1; i++) + { + IntervalI i1 = nonNested.get(i); + IntervalI i2 = nonNested.get(i + 1); + + if (i2.getBegin() < i1.getBegin()) + { + System.err.println("nonNested wrong start order : " + i1.toString() + + ", " + i2.toString()); + return false; + } + if (i1.properlyContainsInterval(i2) + || i2.properlyContainsInterval(i1)) + { + System.err.println("nonNested invalid containment!: " + + i1.toString() + + ", " + i2.toString()); + return false; + } + } + return nested == null ? true : nested.isValid(); + } + + @Override + public int size() + { + int i = nonNested.size(); + if (nested != null) + { + i += nested.size(); + } + return i; + } + + @Override + public synchronized boolean remove(Object o) + { + if (o == null) + { + return false; + } + try + { + @SuppressWarnings("unchecked") + T entry = (T) o; + + /* + * try the non-nested positional intervals first + */ + boolean removed = removeNonNested(entry); + + /* + * if not found, try nested intervals + */ + if (!removed && nested != null) + { + removed = nested.remove(entry); + } + + return removed; + } catch (ClassCastException e) + { + return false; + } + } + + /** + * Removes the given entry from the list of non-nested entries, returning true + * if found and removed, or false if not found. Specifically, removes the + * first item in the list for which item.equals(entry). + * + * @param entry + * @return + */ + protected boolean removeNonNested(T entry) + { + /* + * find the first interval that might match, i.e. whose + * start position is not less than the target range start + * (NB inequality test ensures the first match if any is found) + */ + int startIndex = BinarySearcher.findFirst(nonNested, + val -> val.getBegin() >= entry.getBegin()); + + /* + * traverse intervals to look for a match + */ + int from = entry.getBegin(); + int i = startIndex; + int size = nonNested.size(); + while (i < size) + { + T sf = nonNested.get(i); + if (sf.getBegin() > from) + { + break; + } + if (sf.equals(entry)) + { + nonNested.remove(i); + return true; + } + i++; + } + return false; + } + + @Override + public int getDepth() + { + if (size() == 0) + { + return 0; + } + return (nonNested.isEmpty() ? 0 : 1) + + (nested == null ? 0 : nested.getDepth()); + } + + /** + * Adds one interval to the NCList that can manage nested intervals (creating + * the NCList if necessary) + */ + protected synchronized void addNestedInterval(T interval) + { + if (nested == null) + { + nested = new NCList<>(); + } + nested.add(interval); + } + + /** + * Answers true if the list contains the interval, else false. This method is + * optimised for the condition that the list is sorted on interval start + * position ascending, and will give unreliable results if this does not hold. + * + * @param intervals + * @param entry + * @return + */ + protected boolean listContains(List intervals, Object entry) + { + if (intervals == null || entry == null || !(entry instanceof IntervalI)) + { + return false; + } + + IntervalI interval = (IntervalI) entry; + + /* + * locate the first entry in the list which does not precede the interval + */ + int pos = BinarySearcher.findFirst(intervals, + val -> val.getBegin() >= interval.getBegin()); + int len = intervals.size(); + while (pos < len) + { + T sf = intervals.get(pos); + if (sf.getBegin() > interval.getBegin()) + { + return false; // no match found + } + if (sf.equals(interval)) + { + return true; + } + pos++; + } + return false; + } + + /** + * Answers an iterator over the intervals in the store, with no particular + * ordering guaranteed. The iterator does not support the optional + * remove operation (throws + * UnsupportedOperationException if attempted). + */ + @Override + public Iterator iterator() + { + return new IntervalIterator<>(this); + } + + @Override + public void clear() + { + this.nonNested.clear(); + this.nested = new NCList<>(); + } + + /** + * Adds non-nested intervals to the result list that lie within the target + * range + * + * @param from + * @param to + * @param result + */ + protected void findNonNestedOverlaps(long from, long to, + List result) + { + /* + * find the first interval whose end position is + * after the target range start + */ + int startIndex = BinarySearcher.findFirst(nonNested, + val -> val.getEnd() >= from); + + final int startIndex1 = startIndex; + int i = startIndex1; + while (i < nonNested.size()) + { + T sf = nonNested.get(i); + if (sf.getBegin() > to) + { + break; + } + if (sf.getBegin() <= to && sf.getEnd() >= from) + { + result.add(sf); + } + i++; + } + } + + @Override + public String toString() + { + String s = nonNested.toString(); + if (nested != null) + { + s = s + System.lineSeparator() + nested.toString(); + } + return s; + } + + @Override + public int getWidth() + { + // TODO Auto-generated method stub + return 0; + } + + @Override + public List findOverlaps(long start, long end, List result) + { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean revalidate() + { + // TODO Auto-generated method stub + return false; + } + + @Override + public IntervalI get(int i) + { + // TODO Auto-generated method stub + return null; + } +} diff --git a/src/intervalstore/impl/NCList.java b/src/intervalstore/impl/NCList.java new file mode 100644 index 0000000..0bf6e1b --- /dev/null +++ b/src/intervalstore/impl/NCList.java @@ -0,0 +1,752 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import java.util.AbstractCollection; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import intervalstore.api.IntervalI; +import intervalstore.impl.Range; + +/** + * An adapted implementation of NCList as described in the paper + * + *

+ * Nested Containment List (NCList): a new algorithm for accelerating
+ * interval query of genome alignment and interval databases
+ * - Alexander V. Alekseyenko, Christopher J. Lee
+ * https://doi.org/10.1093/bioinformatics/btl647
+ * 
+ */ +public class NCList extends AbstractCollection +{ + /** + * A depth-first iterator over the elements stored in the NCList + */ + private class NCListIterator implements Iterator + { + int subrangeIndex; + + Iterator nodeIterator; + + /** + * Constructor bootstraps a pointer to an iterator over the first subrange + * (if any) + */ + NCListIterator() + { + subrangeIndex = nextSubrange(-1); + } + + /** + * Moves the subrange iterator to the next subrange, and answers its index + * in the list of subranges. If there are no more, sets the iterator to null + * and answers -1. + * + * @return + */ + private int nextSubrange(int after) + { + int nextIndex = after + 1; + if (nextIndex < subranges.size()) + { + nodeIterator = subranges.get(nextIndex).iterator(); + return nextIndex; + } + nodeIterator = null; + return -1; + } + + @Override + public boolean hasNext() + { + return nodeIterator != null && nodeIterator.hasNext(); + } + + /** + * Answers the next element returned by the current NCNode's iterator, and + * advances the iterator (to the next NCNode if necessary) + */ + @Override + public T next() + { + if (nodeIterator == null || !nodeIterator.hasNext()) + { + throw new NoSuchElementException(); + } + T result = nodeIterator.next(); + + if (!nodeIterator.hasNext()) + { + subrangeIndex = nextSubrange(subrangeIndex); + } + return result; + } + + } + + /* + * the number of interval instances represented + */ + private int size; + + /* + * a list, in start position order, of sublists of ranges ordered so + * that each contains (or is the same as) the one that follows it + */ + private List> subranges; + + /** + * Constructor given a list of things that are each located on a contiguous + * interval. Note that the constructor may reorder the list. + *

+ * We assume here that for each range, start <= end. Behaviour for reverse + * ordered ranges is undefined. + * + * @param ranges + */ + public NCList(List ranges) + { + this(); + build(ranges); + } + + /** + * Sorts and groups ranges into sublists where each sublist represents an + * interval and its contained subintervals + * + * @param ranges + */ + protected void build(List ranges) + { + /* + * sort and partition into subranges + * which have no mutual containment + */ + List sublists = partitionNestedSublists(ranges); + + /* + * convert each subrange to an NCNode consisting of a range and + * (possibly) its contained NCList + */ + for (IntervalI sublist : sublists) + { + subranges.add(new NCNode<>( + ranges.subList(sublist.getBegin(), sublist.getEnd() + 1))); + } + + size = ranges.size(); + } + + /** + * Default constructor + */ + public NCList() + { + subranges = new ArrayList<>(); + } + + /** + * Sorts and traverses the ranges to identify sublists, whose start intervals + * are overlapping or disjoint but not mutually contained. Answers a list of + * start-end indices of the sorted list of ranges. + * + * @param ranges + * @return + */ + protected List partitionNestedSublists(List ranges) + { + List sublists = new ArrayList<>(); + + if (ranges.isEmpty()) + { + return sublists; + } + + /* + * sort by start ascending, length descending, so that + * contained intervals follow their containing interval + */ + Collections.sort(ranges, new NCListBuilder<>().getComparator()); + + int listStartIndex = 0; + + IntervalI lastParent = ranges.get(0); + boolean first = true; + + for (int i = 0; i < ranges.size(); i++) + { + IntervalI nextInterval = ranges.get(i); + if (!first && !lastParent.properlyContainsInterval(nextInterval)) + { + /* + * this interval is not properly contained in the parent; + * close off the last sublist + */ + sublists.add(new Range(listStartIndex, i - 1)); + listStartIndex = i; + lastParent = nextInterval; + } + first = false; + } + + sublists.add(new Range(listStartIndex, ranges.size() - 1)); + return sublists; + } + + /** + * Adds one entry to the stored set + * + * @param entry + */ + @Override + public synchronized boolean add(final T entry) + { + final NCNode newNode = new NCNode<>(entry); + addNode(newNode); + return true; + } + + /** + * Adds one NCNode to this NCList + *

+ * This method does not update the size (interval count) of this + * NCList, as it may be used to rearrange nodes without changing their count. + * Callers should increment the count if needed. + * + * @param newNode + */ + protected void addNode(final NCNode newNode) + { + final long start = newNode.getBegin(); + final long end = newNode.getEnd(); + size += newNode.size(); + + /* + * cases: + * 1) precedes all subranges - add as NCNode on front of list + * 2) follows all subranges - add as NCNode on end of list + * 3) matches a subrange - add as a sibling in the list + * 4) properly enclosed by a subrange - add recursively to subrange + * 5) properly encloses one or more subranges - push them inside it + * 6) spans two subranges - insert between them + */ + + /* + * find the first subrange whose end does not precede entry's start + */ + int candidateIndex = findFirstOverlap(start); + + /* + * search for maximal span of subranges i-k that the new entry + * encloses; or a subrange that encloses the new entry + */ + boolean enclosing = false; + int firstEnclosed = 0; + int lastEnclosed = 0; + + for (int j = candidateIndex; j < subranges.size(); j++) + { + NCNode subrange = subranges.get(j); + + if (subrange.equalsInterval(newNode)) + { + /* + * matching interval - insert adjacent + */ + subranges.add(j, newNode); + return; + } + + if (end < subrange.getBegin() && !enclosing) + { + /* + * new entry lies between subranges j-1 j + */ + subranges.add(j, newNode); + return; + } + + if (subrange.properlyContainsInterval(newNode)) + { + /* + * push new entry inside this subrange as it encloses it + */ + subrange.addNode(newNode); + return; + } + + if (start <= subrange.getBegin()) + { + if (end >= subrange.getEnd()) + { + /* + * new entry encloses this subrange (and possibly preceding ones); + * continue to find the maximal list it encloses + */ + if (!enclosing) + { + firstEnclosed = j; + } + lastEnclosed = j; + enclosing = true; + continue; + } + else + { + /* + * entry spans from before this subrange to inside it + */ + if (enclosing) + { + /* + * entry encloses one or more preceding subranges + */ + push(newNode, firstEnclosed, lastEnclosed); + } + else + { + /* + * entry overlaps two subranges but doesn't enclose either + * so just add it + */ + subranges.add(j, newNode); + } + return; + } + } + } + + /* + * drops through to here if new range encloses all others + * or overlaps the last one + */ + if (enclosing) + { + push(newNode, firstEnclosed, lastEnclosed); + } + else + { + subranges.add(newNode); + } + } + + @Override + public boolean contains(Object entry) + { + if (!(entry instanceof IntervalI)) + { + return false; + } + IntervalI interval = (IntervalI) entry; + + /* + * find the first sublist that might overlap, i.e. + * the first whose end position is >= from + */ + int candidateIndex = findFirstOverlap(interval.getBegin()); + + int to = interval.getEnd(); + + for (int i = candidateIndex; i < subranges.size(); i++) + { + NCNode candidate = subranges.get(i); + if (candidate.getBegin() > to) + { + /* + * we are past the end of our target range + */ + break; + } + if (candidate.contains(interval)) + { + return true; + } + } + return false; + } + + /** + * Update the tree so that the new node encloses current subranges i to j + * (inclusive). That is, replace subranges i-j (inclusive) with a new subrange + * that contains them. + * + * @param node + * @param i + * @param j + * @throws IllegalArgumentException + * if any of the subranges is not contained by the node's start-end + * range + */ + protected synchronized void push(NCNode node, final int i, + final int j) + { + for (int k = i; k <= j; k++) + { + NCNode n = subranges.get(k); + if (!node.containsInterval(n)) { + throw new IllegalArgumentException("Can't push " + n.toString() + + " inside " + node.toString()); + } + node.addNode(n); + } + + for (int k = j; k >= i; k--) + { + subranges.remove(k); + } + subranges.add(i, node); + } + + /** + * Answers a list of contained intervals that overlap the given range + * + * @param from + * @param to + * @return + */ + public List findOverlaps(long from, long to) + { + List result = new ArrayList<>(); + + findOverlaps(from, to, result); + + return result; + } + + /** + * Recursively searches the NCList adding any items that overlap the from-to + * range to the result list + * + * @param from + * @param to + * @param result + */ + protected void findOverlaps(long from, long to, List result) + { + /* + * find the first sublist that might overlap, i.e. + * the first whose end position is >= from + */ + int candidateIndex = findFirstOverlap(from); + + for (int i = candidateIndex; i < subranges.size(); i++) + { + NCNode candidate = subranges.get(i); + if (candidate.getBegin() > to) + { + /* + * we are past the end of our target range + */ + break; + } + candidate.findOverlaps(from, to, result); + } + + } + + /** + * Search subranges for the first one whose end position is not before the + * target range's start position, i.e. the first one that may overlap the + * target range. Returns the index in the list of the first such range found, + * or the length of the list if none found. + * + * @param from + * @return + */ + protected int findFirstOverlap(final long from) + { + return BinarySearcher.findFirst(subranges, + val -> val.getEnd() >= from); + } + + /** + * Formats the tree as a bracketed list e.g. + * + *

+   * [1-100 [10-30 [10-20]], 15-30 [20-20]]
+   * 
+ */ + @Override + public String toString() + { + return subranges.toString(); + } + + /** + * Answers the NCList as an indented list + * + * @return + */ + public String prettyPrint() + { + StringBuilder sb = new StringBuilder(512); + int offset = 0; + int indent = 2; + prettyPrint(sb, offset, indent); + sb.append(System.lineSeparator()); + return sb.toString(); + } + + /** + * @param sb + * @param offset + * @param indent + */ + void prettyPrint(StringBuilder sb, int offset, int indent) + { + boolean first = true; + for (NCNode subrange : subranges) + { + if (!first) + { + sb.append(System.lineSeparator()); + } + first = false; + subrange.prettyPrint(sb, offset, indent); + } + } + + /** + * Answers true if the store's structure is valid (nesting containment rules + * are obeyed), else false. For use in testing and debugging. + * + * @return + */ + public boolean isValid() + { + int count = 0; + for (NCNode subrange : subranges) + { + count += subrange.size(); + } + if (count != size) + { + return false; + } + return isValid(Integer.MIN_VALUE, Integer.MAX_VALUE); + } + + /** + * Answers true if the data held satisfy the rules of construction of an + * NCList bounded within the given start-end range, else false. + *

+ * Each subrange must lie within start-end (inclusive). Subranges must be + * ordered by start position ascending, and within that by end position + * descending. + *

+ * + * @param start + * @param end + * @return + */ + boolean isValid(final int start, final int end) + { + NCNode lastRange = null; + + for (NCNode subrange : subranges) + { + if (subrange.getBegin() < start) + { + System.err.println("error in NCList: range " + subrange.toString() + + " starts before " + end); + return false; + } + if (subrange.getEnd() > end) + { + System.err.println("error in NCList: range " + subrange.toString() + + " ends after " + end); + return false; + } + + if (lastRange != null) + { + if (subrange.getBegin() < lastRange.getBegin()) + { + System.err.println("error in NCList: range " + subrange.toString() + + " starts before " + lastRange.toString()); + return false; + } + if (subrange.properlyContainsInterval(lastRange)) + { + System.err.println("error in NCList: range " + subrange.toString() + + " encloses preceding: " + lastRange.toString()); + return false; + } + if (lastRange.properlyContainsInterval(subrange)) + { + System.err.println("error in NCList: range " + subrange.toString() + + " enclosed by preceding: " + lastRange.toString()); + return false; + } + } + lastRange = subrange; + + if (!subrange.isValid()) + { + return false; + } + } + return true; + } + + /** + * Answers the number of intervals stored + * + * @return + */ + @Override + public int size() + { + return size; + } + + /** + * Answers a list of all entries stored, in no guaranteed order. This method + * is not synchronized, so is not thread-safe. + */ + public List getEntries() + { + List result = new ArrayList<>(); + getEntries(result); + return result; + } + + /** + * Adds all contained entries to the given list + * + * @param result + */ + void getEntries(List result) + { + for (NCNode subrange : subranges) + { + subrange.getEntries(result); + } + } + + /** + * Removes the first interval Ifound that is equal to T + * (I.equals(T)). Answers true if an interval is removed, false + * if no match is found. This method is synchronized so thread-safe. + * + * @param entry + * @return + */ + public synchronized boolean remove(T entry) + { + if (entry == null) + { + return false; + } + int i = findFirstOverlap(entry.getBegin()); + + for (; i < subranges.size(); i++) + { + NCNode subrange = subranges.get(i); + if (subrange.getBegin() > entry.getBegin()) + { + /* + * not found + */ + return false; + } + NCList subRegions = subrange.getSubRegions(); + + if (subrange.getRegion().equals(entry)) + { + /* + * if the subrange is rooted on this entry, remove it, + * and remove and promote its subregions (if any) + */ + subranges.remove(i); + size -= subrange.size(); + if (subRegions != null) + { + for (NCNode r : subRegions.subranges) + { + addNode(r); + } + } + return true; + } + else + { + if (subrange.remove(entry)) + { + size--; + return true; + } + } + } + return false; + } + + /** + * Answers the depth of interval nesting of this object, where 1 means there + * are no nested sub-intervals + * + * @return + */ + public int getDepth() + { + int subDepth = 0; + for (NCNode subrange : subranges) + { + subDepth = Math.max(subDepth, subrange.getDepth()); + } + + return subDepth; + } + + /** + * Answers an iterator over the contained intervals, with no particular order + * guaranteed. The iterator does not support the optional remove + * operation (throws UnsupportedOperationException if attempted). + */ + @Override + public Iterator iterator() + { + return new NCListIterator(); + } + + @Override + public synchronized void clear() + { + subranges.clear(); + size = 0; + } +} diff --git a/src/intervalstore/impl/NCListBuilder.java b/src/intervalstore/impl/NCListBuilder.java new file mode 100644 index 0000000..4c87306 --- /dev/null +++ b/src/intervalstore/impl/NCListBuilder.java @@ -0,0 +1,143 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import intervalstore.api.IntervalI; +import intervalstore.impl.Range; + +/** + * A comparator that orders ranges by either start position ascending. If + * position matches, ordering is by length descending. This provides the + * canonical ordering of intervals into subranges in order to build a nested + * containment list. + * + * @author gmcarstairs + */ +public class NCListBuilder +{ + class NCListComparator implements Comparator + { + /** + * Compares two intervals in a way that will sort a list by start position + * ascending, then by length descending. Answers + *

    + *
  • a negative value if o1.begin < o2.begin
  • + *
  • else a positive value if o1.begin > o2.begin
  • + *
  • else a negative value if o1.end > o2.end
  • + *
  • else a positive value of o1.end < o2.end
  • + *
  • else zero
  • + */ + @Override + public int compare(V o1, V o2) + { + int order = Integer.compare(o1.getBegin(), o2.getBegin()); + if (order == 0) + { + /* + * if tied on start position, longer length sorts to left + * i.e. the negation of normal ordering by length + */ + order = Integer.compare(o2.getEnd(), o1.getEnd()); + } + return order; + } + } + + private Comparator comparator = new NCListComparator<>(); + + /** + * Default constructor + */ + public NCListBuilder() + { + } + + /** + * Answers a comparator which sorts items of type T by start position + * ascending, and within that by end position (length) descending) + * + * @return + */ + Comparator getComparator() + { + return comparator; + } + + /** + * Sorts and traverses the ranges to identify sublists, whose start intervals + * are overlapping or disjoint but not mutually contained. Answers a list of + * start-end indices of the sorted list of ranges. + * + * @param ranges + * @return + */ + List partitionNestedSublists(List ranges) + { + List sublists = new ArrayList<>(); + + /* + * sort by start ascending, length descending, so that + * contained intervals follow their containing interval + */ + Collections.sort(ranges, comparator); + + int listStartIndex = 0; + + IntervalI lastParent = ranges.get(0); + boolean first = true; + + for (int i = 0; i < ranges.size(); i++) + { + IntervalI nextInterval = ranges.get(i); + if (!first && !lastParent.properlyContainsInterval(nextInterval)) + { + /* + * this interval is not properly contained in the parent; + * close off the last sublist + */ + sublists.add(new Range(listStartIndex, i - 1)); + listStartIndex = i; + lastParent = nextInterval; + } + first = false; + } + + sublists.add(new Range(listStartIndex, ranges.size() - 1)); + return sublists; + } +} diff --git a/src/intervalstore/impl/NCNode.java b/src/intervalstore/impl/NCNode.java new file mode 100644 index 0000000..16ae0b7 --- /dev/null +++ b/src/intervalstore/impl/NCNode.java @@ -0,0 +1,381 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import intervalstore.api.IntervalI; + +/** + * Each node of the NCList tree consists of a range, and (optionally) the NCList + * of ranges it encloses + * + * @param + */ +class NCNode implements IntervalI +{ + /** + * A depth-first iterator over the intervals stored in the NCNode. The + * optional remove operation is not supported. + * + * @author gmcarstairs + * + */ + private class NCNodeIterator implements Iterator + { + boolean first = true; + Iterator subregionIterator; + + @Override + public boolean hasNext() + { + return first + || (subregionIterator != null && subregionIterator.hasNext()); + } + + /** + * Answers the next interval - initially the top level interval for this + * node, thereafter the intervals returned by the NCList's iterator + */ + @Override + public T next() + { + if (first) + { + subregionIterator = subregions == null ? null + : subregions.iterator(); + first = false; + return region; + } + if (subregionIterator == null || !subregionIterator.hasNext()) + { + throw new NoSuchElementException(); + } + return subregionIterator.next(); + } + } + + private T region; + + /* + * null, or an object holding contained subregions of this nodes region + */ + private NCList subregions; + + /** + * Constructor given a list of ranges. The list not be empty, and should be + * ordered so that the first range contains all the others. If not, behaviour + * will likely be invalid. + * + * @param ranges + * @throws IllegalArgumentException + * if the list is empty + */ + NCNode(List ranges) + { + if (ranges.isEmpty()) + { + throw new IllegalArgumentException("List may not be empty"); + } + region = ranges.get(0); + + if (ranges.size() > 1) + { + subregions = new NCList<>(ranges.subList(1, ranges.size())); + } + } + + /** + * Constructor given a single range + * + * @param range + */ + NCNode(T range) + { + region = range; + } + + @Override + public int getBegin() + { + return region.getBegin(); + } + + @Override + public int getEnd() + { + return region.getEnd(); + } + + /** + * Formats the node as a bracketed list e.g. + * + *
    +   * [1-100 [10-30 [10-20]], 15-30 [20-20]]
    +   * 
    + * + * where the format for each interval is as given by T.toString() + */ + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(10 * size()); + sb.append(region.toString()); + if (subregions != null) + { + sb.append(" ").append(subregions.toString()); + } + return sb.toString(); + } + + void prettyPrint(StringBuilder sb, int offset, int indent) + { + for (int i = 0; i < offset; i++) + { + sb.append(" "); + } + sb.append(region.toString()); + if (subregions != null) + { + sb.append(System.lineSeparator()); + subregions.prettyPrint(sb, offset + 2, indent); + } + } + + /** + * Add any ranges that overlap the from-to range to the result list + * + * @param from + * @param to + * @param result + */ + void findOverlaps(long from, long to, List result) + { + if (region.getBegin() <= to && region.getEnd() >= from) + { + result.add(region); + if (subregions != null) + { + subregions.findOverlaps(from, to, result); + } + } + } + + /** + * Add one node to this node's subregions. + * + * @param entry + * @throws IllegalArgumentException + * if the added node is not contained by the node's start-end range + */ + synchronized void addNode(NCNode entry) + { + if (!region.containsInterval(entry)) + { + throw new IllegalArgumentException( + String.format("adding improper subrange %d-%d to range %d-%d", + entry.getBegin(), entry.getEnd(), region.getBegin(), + region.getEnd())); + } + if (subregions == null) + { + subregions = new NCList<>(); + } + + subregions.addNode(entry); + } + + /** + * Answers true if the data held satisfy the rules of construction of an + * NCList, else false + * + * @return + */ + boolean isValid() + { + /* + * we don't handle reverse ranges + */ + if (region != null && region.getBegin() > region.getEnd()) + { + return false; + } + if (subregions == null) + { + return true; + } + if (subregions.isEmpty()) + { + /* + * we expect empty subregions to be nulled + */ + return false; + } + return subregions.isValid(getBegin(), getEnd()); + } + + /** + * Adds all contained entries to the given list + * + * @param entries + */ + void getEntries(List entries) + { + entries.add(region); + if (subregions != null) + { + subregions.getEntries(entries); + } + } + + /** + * Answers true if this object contains the given entry (by object equals + * test), else false + * + * @param entry + * @return + */ + boolean contains(IntervalI entry) + { + if (entry == null) + { + return false; + } + if (entry.equals(region)) + { + return true; + } + return subregions == null ? false : subregions.contains(entry); + } + + /** + * Answers the 'root' region modelled by this object + * + * @return + */ + T getRegion() + { + return region; + } + + /** + * Answers the (possibly null) contained regions within this object + * + * @return + */ + NCList getSubRegions() + { + return subregions; + } + + /** + * Answers the (deep) size of this node i.e. the number of intervals it models + * + * @return + */ + int size() + { + return subregions == null ? 1 : 1 + subregions.size(); + } + + /** + * Answers the depth of NCNode / NCList nesting in the data tree + * + * @return + */ + int getDepth() + { + return subregions == null ? 1 : 1 + subregions.getDepth(); + } + + /** + * Answers an iterator over the intervals stored in this node. The iterator + * does not support the optional remove operation (throws + * UnsupportedOperationException if attempted). + * + * @return + */ + public Iterator iterator() + { + return new NCNodeIterator(); + } + + /** + * Removes the first interval found equal to the given entry. Answers true if + * a matching interval is found and removed, else false. + * + * @param entry + * @return + */ + boolean remove(T entry) + { + if (region.equals(entry)) + { + /* + * this case must be handled by NCList, to allow any + * children of a deleted interval to be promoted + */ + throw new IllegalArgumentException("NCNode can't remove self"); + } + if (subregions == null) + { + return false; + } + if (subregions.remove(entry)) + { + if (subregions.isEmpty()) + { + subregions = null; + } + return true; + } + return false; + } + + @SuppressWarnings("unchecked") + @Override + public boolean equals(Object o) + { + return o != null && o instanceof NCNode + && equalsInterval((NCNode) o); + } + + @Override + public boolean equalsInterval(IntervalI i) + { + return getBegin() == i.getBegin() && getEnd() == i.getEnd(); + + } + +} -- 1.7.10.2