/* BSD 3-Clause License Copyright (c) 2018, Mungo Carstairs All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package intervalstore.nonc; import jalview.datamodel.SequenceFeature; import java.util.AbstractCollection; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import intervalstore.api.IntervalI; import intervalstore.api.IntervalStoreI; /** * * A second idea, doing a double binary sort for the full interval. Seemed like * a good idea, but is 50% slower. * * A Collection class to store interval-associated data, with options for "lazy" * sorting so as to speed incremental construction of the data prior to issuing * a findOverlap call. * * * Accepts duplicate entries but not null values. * * * * @author Bob Hanson 2019.08.06 * * @param * any type providing getBegin(), getEnd() * getContainedBy(), and setContainedBy() */ public class IntervalStore0 extends AbstractCollection implements IntervalStoreI { static int NOT_CONTAINED = Integer.MIN_VALUE; static int CONTAINMENT_UNKNOWN = 0; /** * Search for the last interval that starts before or at the specified from/to * range and the first interval that starts after it. In the situation that * there are multiple intervals starting at from, this method returns the * first of those. * * @param a * @param from * @param to * @param ret * @return */ public int binaryLastIntervalSearch(long from, long to, int[] ret) { int start = 0, start2 = 0; int matched = 0; int end = intervalCount - 1, end2 = intervalCount; int mid, begin; IntervalI e; while (start <= end) { mid = (start + end) >>> 1; e = intervals[mid]; begin = e.getBegin(); switch (Long.signum(begin - from)) { case -1: matched = mid; start = mid + 1; break; case 0: case 1: end = mid - 1; if (begin > to) { end2 = mid; } else { start2 = mid; } break; } } ret[0] = end2; start = Math.max(start2, end); end = end2 - 1; while (start <= end) { mid = (start + end) >>> 1; e = intervals[mid]; begin = e.getBegin(); if (begin > to) { ret[0] = mid; end = mid - 1; } else { start = mid + 1; } } return matched; } /** * My preference is for a bigendian comparison, but you may differ. */ private Comparator icompare; /** * bigendian is what NCList does; change icompare to switch to that */ private boolean bigendian; private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE, maxEnd = Integer.MAX_VALUE; // private Comparator icompare = new IntervalComparator(); private boolean isTainted; private int capacity = 8; protected IntervalI[] intervals = new IntervalI[capacity]; private int[] offsets; private int[] ret = new int[1]; protected int intervalCount; private int added; private int deleted; private BitSet bsDeleted; /** * Constructor */ public IntervalStore0() { this(null); } /** * Constructor given a list of intervals. Note that the list may get sorted as * a side-effect of calling this constructor. */ public IntervalStore0(List intervals) { this(intervals, null, false); } /** * * @param intervals * intervals to initialize with (others may still be added) * @param comparator * IntervalI.COMPARATOR_BEGIN_ASC_END_ASC (default) or * IntervalI.COMPARE_BEGIN_ASC_END_DESC, but this could also be one * that sorts by description as well, for example. * @param bigendian * true if the comparator sorts [10-30] before [10-20] */ public IntervalStore0(List intervals, Comparator comparator, boolean bigendian) { if (intervals != null) { intervals.toArray( this.intervals = new IntervalI[capacity = intervalCount = intervals .size()]); } // COMPARE_BEGIN_ASC_END_DESC is standard, meaning // the order will be [10,100] before [10,80] // this order doesn't really matter much. icompare = (comparator != null ? comparator : bigendian ? IntervalI.COMPARE_BEGIN_ASC_END_DESC : IntervalI.COMPARE_BEGIN_ASC_END_ASC); this.bigendian = bigendian; if (// DO_PRESORT && intervalCount > 1) { sort(); } isTainted = true; } /** * Adds one interval to the store, allowing duplicates. * * @param interval */ @Override public boolean add(T interval) { return add(interval, true); } /** * Adds one interval to the store, optionally checking for duplicates. * * @param interval * @param allowDuplicates */ @Override public boolean add(T interval, boolean allowDuplicates) { if (interval == null) { return false; } if (deleted > 0) { finalizeDeletion(); } if (!isTainted) { offsets = null; isTainted = true; } synchronized (intervals) { int index = intervalCount; int start = interval.getBegin(); if (intervalCount + added + 1 >= capacity) { intervals = finalizeAddition( new IntervalI[capacity = capacity << 1]); } // if (DO_PRESORT && isSorted) // { if (intervalCount == 0) { // ignore } else { index = findInterval(interval, allowDuplicates); if (!allowDuplicates && index >= 0) { return false; } if (index < 0) { index = -1 - index; } else { index++; } } // } // else // { // if (!allowDuplicates && findInterval(interval) >= 0) // { // return false; // } // isSorted = false; // } if (index == intervalCount) { intervals[intervalCount++] = interval; // System.out.println("added " + intervalCount + " " + interval); } else { int pt = capacity - ++added; intervals[pt] = interval; // System.out.println("stashed " + pt + " " + interval + " for " // + index + " " + intervals[index]); if (offsets == null) { offsets = new int[capacity]; } offsets[pt] = offsets[index]; offsets[index] = pt; } minStart = Math.min(minStart, start); maxStart = Math.max(maxStart, start); return true; } } private IntervalI[] finalizeAddition(IntervalI[] dest) { if (dest == null) { dest = intervals; } if (added == 0) { if (intervalCount > 0 && dest != intervals) { System.arraycopy(intervals, 0, dest, 0, intervalCount); } capacity = dest.length; return dest; } // array is [(intervalCount)...null...(added)] int ntotal = intervalCount + added; for (int ptShift = ntotal, pt = intervalCount; pt >= 0;) { int pt0 = pt; while (--pt >= 0 && offsets[pt] == 0) { ; } if (pt < 0) { pt = 0; } int nOK = pt0 - pt; // shift upper intervals right ptShift -= nOK; if (nOK > 0) { System.arraycopy(intervals, pt, dest, ptShift, nOK); } if (added == 0) { break; } for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) { dest[--ptShift] = intervals[offset]; --added; } } offsets = null; intervalCount = ntotal; capacity = dest.length; return dest; } /** * A binary search for a duplicate. * * @param interval * @return */ public int binaryIdentitySearch(IntervalI interval) { return binaryIdentitySearch(interval, null, false); } /** * for remove() and contains() * * @param list * @param interval * @param bsIgnore * for deleted * @param rangeOnly * don't do a full identity check, just a range check * @return index or, if not found, -1 - "would be here" */ private int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore, boolean rangeOnly) { int start = 0; int r0 = interval.getBegin(); int r1 = interval.getEnd(); int end = intervalCount - 1; if (end < 0 || r0 < minStart) { return -1; } if (r0 > maxStart) { return -1 - intervalCount; } while (start <= end) { int mid = (start + end) >>> 1; IntervalI r = intervals[mid]; switch (compareRange(r, r0, r1)) { case -1: start = mid + 1; continue; case 1: end = mid - 1; continue; case 0: IntervalI iv = intervals[mid]; if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(mid)) && sameInterval(interval, iv)) { return mid; // found one; just scan up and down now, first checking the range, but // also checking other possible aspects of equivalence. } for (int i = mid; ++i <= end;) { if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1) { break; } if (!rangeOnly && (bsIgnore == null || !bsIgnore.get(i)) && sameInterval(interval, iv)) { return i; } } for (int i = mid; --i >= start;) { if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1) { return -1 - ++i; } if ((bsIgnore == null || !bsIgnore.get(i)) && sameInterval(interval, iv)) { return i; } } return -1 - start; } } return -1 - start; } // private int binaryInsertionSearch(long from, long to) // { // int matched = intervalCount; // int end = matched - 1; // int start = matched; // if (end < 0 || from > intervals[end].getEnd() // || from < intervals[start = 0].getBegin()) // return start; // while (start <= end) // { // int mid = (start + end) >>> 1; // switch (compareRange(intervals[mid], from, to)) // { // case 0: // return mid; // case 1: // matched = mid; // end = mid - 1; // continue; // case -1: // start = mid + 1; // continue; // } // // } // return matched; // } @Override public void clear() { intervalCount = added = 0; // isSorted = true; isTainted = true; offsets = null; minStart = maxEnd = Integer.MAX_VALUE; maxStart = Integer.MIN_VALUE; } /** * Compare an interval t to a from/to range for insertion purposes * * @param t * @param from * @param to * @return 0 if same, 1 if start is after from, or start equals from and * [bigendian: end is before to | littleendian: end is after to], else * -1 */ private int compareRange(IntervalI t, long from, long to) { int order = Long.signum(t.getBegin() - from); return (order == 0 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to) : order); } @Override public boolean contains(Object entry) { if (entry == null || intervalCount == 0) { return false; } if (// !isSorted || deleted > 0) { sort(); } return (findInterval((IntervalI) entry, false) >= 0); } public boolean containsInterval(IntervalI outer, IntervalI inner) { ensureFinalized(); int index = binaryIdentitySearch(inner); if (index >= 0) { while ((index = index - Math.abs(offsets[index])) >= 0) { if (intervals[index] == outer) { return true; } } } return false; } private void ensureFinalized() { if (isTainted) { if (// !isSorted || added > 0 || deleted > 0) { sort(); } if (offsets == null || offsets.length < intervalCount) { offsets = new int[intervalCount]; } linkFeatures(); isTainted = false; } } /** * Find all overlaps within the given range, inclusively. * * @return a list sorted in ascending order of start position * */ @Override public List findOverlaps(long from, long to) { List list = findOverlaps(from, to, null); Collections.reverse(list); return list; } /** * Find all overlaps within the given range, inclusively. * * @return a list sorted in descending order of start position * */ @SuppressWarnings("unchecked") @Override public List findOverlaps(long from, long to, List result) { if (result == null) { result = new ArrayList<>(); } switch (intervalCount + added) { case 0: return result; case 1: IntervalI sf = intervals[0]; if (sf.getBegin() <= to && sf.getEnd() >= from) { result.add((T) sf); } return result; } ensureFinalized(); if (from > maxEnd || to < minStart) { return result; } int index = binaryLastIntervalSearch(from, to, ret); int index1 = ret[0]; if (index1 < 0) { return result; } if (index1 > index + 1) { while (--index1 > index) { result.add((T) intervals[index1]); } } boolean isMonotonic = false; while (index >= 0) { IntervalI sf = intervals[index]; if (sf.getEnd() >= from) { result.add((T) sf); } else if (isMonotonic) { break; } int offset = offsets[index]; isMonotonic = (offset < 0); index -= (isMonotonic ? -offset : offset); } return result; } public IntervalI get(int i) { if (i < 0 || i >= intervalCount + added) { return null; } ensureFinalized(); return intervals[i]; } private int getContainedBy(int index, int begin) { while (index >= 0) { IntervalI sf = intervals[index]; if (sf.getEnd() >= begin) { // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0 // + "\nFS in " + sf.getIndex1() + ":" + sf); return index; } index -= Math.abs(offsets[index]); } return NOT_CONTAINED; } @Override public int getDepth() { ensureFinalized(); if (intervalCount < 2) { return intervalCount; } int maxDepth = 1; IntervalI root = null; for (int i = 0; i < intervalCount; i++) { IntervalI element = intervals[i]; if (offsets[i] == NOT_CONTAINED) { root = element; } int depth = 1; int index = i; int offset; while ((index = index - Math.abs(offset = offsets[index])) >= 0) { element = intervals[index]; if (++depth > maxDepth && (element == root || offset < 0)) { maxDepth = depth; break; } } } return maxDepth; } public int getWidth() { ensureFinalized(); int w = 0; for (int i = offsets.length; --i >= 0;) { if (offsets[i] > 0) { w++; } } return w; } public boolean isValid() { ensureFinalized(); return true; } /** * Answers an iterator over the intervals in the store, with no particular * ordering guaranteed. The iterator does not support the optional * remove operation (throws * UnsupportedOperationException if attempted). */ @Override public Iterator iterator() { ensureFinalized(); return new Iterator() { private int next; @Override public boolean hasNext() { return next < intervalCount; } @SuppressWarnings("unchecked") @Override public T next() { if (next >= intervalCount) { throw new NoSuchElementException(); } return (T) intervals[next++]; } }; } private void linkFeatures() { if (intervalCount == 0) { return; } maxEnd = intervals[0].getEnd(); offsets[0] = NOT_CONTAINED; if (intervalCount == 1) { return; } boolean isMonotonic = true; for (int i = 1; i < intervalCount; i++) { IntervalI sf = intervals[i]; int begin = sf.getBegin(); int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1); // System.out.println(sf + " is contained by " // + (index < 0 ? null : starts[index])); offsets[i] = (index < 0 ? NOT_CONTAINED : isMonotonic ? index - i : i - index); isMonotonic = (sf.getEnd() > maxEnd); if (isMonotonic) { maxEnd = sf.getEnd(); } } } @Override public String prettyPrint() { switch (intervalCount + added) { case 0: return ""; case 1: return intervals[0] + "\n"; } ensureFinalized(); String sep = "\t"; StringBuffer sb = new StringBuffer(); for (int i = 0; i < intervalCount; i++) { IntervalI range = intervals[i]; int index = i; while ((index = index - Math.abs(offsets[index])) >= 0) { sb.append(sep); } sb.append(range.toString()).append('\n'); } return sb.toString(); } @Override public synchronized boolean remove(Object o) { // if (o == null) // { // throw new NullPointerException(); // } return (o != null && intervalCount > 0 && removeInterval((IntervalI) o)); } /** * Find the interval or return where it should go, possibly into the add * buffer * * @param interval * @param rangeOnly * don't do a full check for identity, just check for range * @return index (nonnegative) or index where it would go (negative) */ private int findInterval(IntervalI interval, boolean rangeOnly) { // if (isSorted) // { int pt = binaryIdentitySearch(interval, null, rangeOnly); // if (addPt == intervalCount || offsets[pt] == 0) // return pt; if (pt >= 0 || added == 0 || pt == -1 - intervalCount) { return pt; } pt = -1 - pt; int start = interval.getBegin(); int end = interval.getEnd(); int match = pt; while ((pt = offsets[pt]) != 0) { IntervalI iv = intervals[pt]; switch (compareRange(iv, start, end)) { case -1: break; case 0: if (!rangeOnly && sameInterval(iv, interval)) { return pt; } // fall through case 1: match = pt; continue; } } return -1 - match; // } // else // { // int i = intervalCount; // while (--i >= 0 && !sameRange(intervals[i], interval)) // { // ; // } // return i; // } } /** * Uses a binary search to find the entry and removes it if found. * * @param interval * @return */ protected boolean removeInterval(IntervalI interval) { if (// !isSorted || added > 0) { sort(); } int i = binaryIdentitySearch(interval, bsDeleted, false); if (i < 0) { return false; } if (deleted == 0) { if (bsDeleted == null) { bsDeleted = new BitSet(intervalCount); } else { bsDeleted.clear(); } } bsDeleted.set(i); deleted++; return (isTainted = true); } private void finalizeDeletion() { if (deleted == 0) { return; } // ......xxx.....xxxx.....xxxxx.... // ......^i,pt // ...... ....... // ............ for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;) { i = bsDeleted.nextClearBit(i + 1); int pt1 = bsDeleted.nextSetBit(i + 1); if (pt1 < 0) { pt1 = intervalCount; } int n = pt1 - i; System.arraycopy(intervals, i, intervals, pt, n); pt += n; if (pt1 == intervalCount) { for (i = pt1; --i >= pt;) { intervals[i] = null; } intervalCount -= deleted; deleted = 0; bsDeleted.clear(); break; } i = pt1; } } public boolean revalidate() { isTainted = true; // isSorted = false; ensureFinalized(); return true; } @Override public int size() { return intervalCount + added - deleted; } @Override public Object[] toArray() { ensureFinalized(); return super.toArray(); } /** * Sort intervals by start (lowest first) and end (highest first). */ private void sort() { if (added > 0) { intervals = finalizeAddition(new IntervalI[intervalCount + added]); } else if (deleted > 0) { finalizeDeletion(); } else { Arrays.sort(intervals, 0, intervalCount, icompare); } updateMinMaxStart(); // isSorted = true; } private void updateMinMaxStart() { if (intervalCount > 0) { minStart = intervals[0].getBegin(); maxStart = intervals[intervalCount - 1].getBegin(); } else { minStart = Integer.MAX_VALUE; maxStart = Integer.MIN_VALUE; } } @Override public String toString() { return prettyPrint(); } public boolean canCheckForDuplicates() { return true; } /** * CAUTION! This presumes that equalsInterval does check descriptions. Note * that bartongroup.IntervalStoreJ does NOT do this and * jalview.datamodel.features.SequenceFeature does not, either. But * nonc.SimpleFeature in test DOES, because it overrides equalsInterval. * * The reason we do it this way is to avoid an unnecessary and costly test for * obj instanceof IntervalI. * * Added by Mungo to use equals, but that requires use of instance checking, * which is not significant in Java (apparently), but is a bigger deal in * JavaScript. So here we have to hack * bobhanson.IntervalStoreJ.nonc.InervalStore to bypass equals. * * @param i1 * @param i2 * @return */ boolean sameInterval(IntervalI i1, IntervalI i2) { // avoiding equals() for JavaScript performance return ((SequenceFeature) i1).equals((SequenceFeature) i2, false); } }