import java.util.AbstractCollection;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
+import java.util.NoSuchElementException;
import intervalstore.api.IntervalI;
import intervalstore.api.IntervalStoreI;
-
/**
*
+ * A second idea, doing a double binary sort for the full interval. Seemed like
+ * a good idea, but is 50% slower.
+ *
* A Collection class to store interval-associated data, with options for "lazy"
* sorting so as to speed incremental construction of the data prior to issuing
* a findOverlap call.
*
*
- * with O(log N) performance for overlap queries, insertion and deletion (where
- * N is the size of the store).
- *
* Accepts duplicate entries but not null values.
*
+ *
+ *
* @author Bob Hanson 2019.08.06
*
* @param <T>
extends AbstractCollection<T> implements IntervalStoreI<T>
{
+ /**
+ * Search for the last interval that starts before or at the specified from/to
+ * range and the first interval that starts after it. In the situation that
+ * there are multiple intervals starting at from, this method returns the
+ * first of those.
+ *
+ * @param a
+ * @param from
+ * @param to
+ * @param ret
+ * @return
+ */
+ public int binaryLastIntervalSearch(long from,
+ long to, int[] ret)
+ {
+ int start = 0, start2 = 0;
+ int matched = 0;
+ int end = intervalCount - 1, end2 = intervalCount;
+ int mid, begin;
+ IntervalI e;
+ while (start <= end)
+ {
+ mid = (start + end) >>> 1;
+ e = intervals[mid];
+ begin = e.getBegin();
+ switch (Long.signum(begin - from))
+ {
+ case -1:
+ matched = mid;
+ start = mid + 1;
+ break;
+ case 0:
+ case 1:
+ end = mid - 1;
+ if (begin > to)
+ {
+ end2 = mid;
+ }
+ else
+ {
+ start2 = mid;
+ }
+ break;
+ }
+ }
+ ret[0] = end2;
+ start = Math.max(start2, end);
+ end = end2 - 1;
+
+ while (start <= end)
+ {
+ mid = (start + end) >>> 1;
+ e = intervals[mid];
+ begin = e.getBegin();
+ if (begin > to)
+ {
+ ret[0] = mid;
+ end = mid - 1;
+ }
+ else
+ {
+ start = mid + 1;
+ }
+
+ }
+ return matched;
+ }
+
+ /**
+ * My preference is for a bigendian comparison, but you may differ.
+ */
+ private Comparator<? super IntervalI> icompare;
+
+ /**
+ * bigendian is what NCList does; change icompare to switch to that
+ */
+ private boolean bigendian;
+
private final boolean DO_PRESORT;
private boolean isSorted;
- private List<T> intervals;
+ private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE,
+ maxEnd = Integer.MAX_VALUE;
+
+ // private Comparator<IntervalI> icompare = new IntervalComparator();
private boolean isTainted;
- private IntervalI[] orderedIntervalStarts;
+ private int capacity = 8;
- private static Comparator<IntervalI> icompare = new IntervalComparator();
+ private IntervalI[] intervals = new IntervalI[capacity];
- static
- {
- System.out.println("intervalstore.nonc.IntervalStore initialized");
- }
+ private int[] offsets;
- // private Comparator<IntervalI> icompare = new IntervalComparator();
+ private int[] ret = new int[1];
+
+ private int intervalCount;
+
+ private int added;
+
+ private int deleted;
+
+ private BitSet bsDeleted;
/**
* Constructor
*/
public IntervalStore()
{
+ this(true);
+ }
- intervals = new ArrayList<>();
- DO_PRESORT = true;
- };
-
public IntervalStore(boolean presort)
{
- intervals = new ArrayList<>();
- DO_PRESORT = presort;
- };
+ this(null, presort);
+ }
/**
* Constructor given a list of intervals. Note that the list may get sorted as
*/
public IntervalStore(List<T> intervals, boolean presort)
{
- this.intervals = intervals;
+ this(intervals, presort, null, false);
+ }
+
+ /**
+ *
+ * @param intervals
+ * intervals to initialize with (others may still be added)
+ * @param presort
+ * whether or not to presort the list as additions are made
+ * @param comparator
+ * IntervalI.COMPARATOR_LITTLEENDIAN or
+ * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that
+ * sorts by description as well, for example.
+ * @param bigendian
+ * true if the comparator sorts [10-30] before [10-20]
+ */
+ public IntervalStore(List<T> intervals, boolean presort,
+ Comparator<? super IntervalI> comparator, boolean bigendian)
+ {
+ if (intervals != null)
+ {
+ intervals.toArray(
+ this.intervals = new IntervalI[capacity = intervalCount = intervals
+ .size()]);
+ }
DO_PRESORT = presort;
- if (DO_PRESORT)
+ icompare = (comparator != null ? comparator
+ : bigendian ? IntervalI.COMPARATOR_BIGENDIAN
+ : IntervalI.COMPARATOR_LITTLEENDIAN);
+ this.bigendian = bigendian;
+
+ if (DO_PRESORT && intervalCount > 1)
{
sort();
}
+ isSorted = DO_PRESORT;
isTainted = true;
}
/**
- * Adds one interval to the store.
+ * Adds one interval to the store, allowing duplicates.
*
* @param interval
*/
@Override
public boolean add(T interval)
{
+ return add(interval, true);
+ }
+
+ /**
+ * Adds one interval to the store, optionally checking for duplicates.
+ *
+ * @param interval
+ * @param allowDuplicates
+ */
+ public boolean add(T interval, boolean allowDuplicates)
+ {
if (interval == null)
{
return false;
}
+ if (deleted > 0)
+ finalizeDeletion();
+ if (!isTainted)
+ {
+ offsets = null;
+ isTainted = true;
+ }
+
synchronized (intervals)
{
- if (DO_PRESORT)
+ int index = intervalCount;
+ int start = interval.getBegin();
+
+ if (intervalCount + added + 1 >= capacity)
{
- int insertPosition = findFirstBegin(intervals, interval.getBegin());
- intervals.add(insertPosition, interval);
- isSorted = true;
+ intervals = finalizeAddition(
+ new IntervalI[capacity = capacity << 1]);
+
+ }
+
+ if (DO_PRESORT && isSorted)
+ {
+ if (intervalCount == 0)
+ {
+ // ignore
+ }
+ else
+ {
+ index = findInterval(interval);
+ if (!allowDuplicates && index >= 0)
+ {
+ return false;
+ }
+ if (index < 0)
+ index = -1 - index;
+ else
+ index++;
+ }
+
}
else
{
- intervals.add(interval);
- isSorted = false;
+ if (!allowDuplicates && findInterval(interval) >= 0)
+ {
+ return false;
+ }
+
}
- isTainted = true;
- return true;
- }
- }
- @Override
- public void clear()
- {
- intervals.clear();
- orderedIntervalStarts = null;
- isTainted = true;
- }
+ if (index == intervalCount)
+ {
+ intervals[intervalCount++] = interval;
+ // System.out.println("added " + intervalCount + " " + interval);
+ }
+ else
+ {
+ int pt = capacity - ++added;
+ intervals[pt] = interval;
+ // System.out.println("stashed " + pt + " " + interval + " for "
+ // + index + " " + intervals[index]);
+ if (offsets == null)
+ offsets = new int[capacity];
- @Override
- public boolean contains(Object entry)
- {
- return listContains(intervals, entry);
+ offsets[pt] = offsets[index];
+
+ offsets[index] = pt;
+ }
+
+ minStart = Math.min(minStart, start);
+ maxStart = Math.max(maxStart, start);
+ return true;
+ }
}
- private void ensureFinalized()
+ private IntervalI[] finalizeAddition(IntervalI[] dest)
{
- if (isTainted && intervals.size() >= 2)
+ if (dest == null)
+ dest = intervals;
+ if (added == 0)
{
- if (!isSorted)
+ if (intervalCount > 0 && dest != intervals)
{
- sort();
+ System.arraycopy(intervals, 0, dest, 0, intervalCount);
}
- orderedIntervalStarts = intervals.toArray(new IntervalI[0]);
- linkFeatures(orderedIntervalStarts);
- isTainted = false;
+ capacity = dest.length;
+ return dest;
}
- }
- /**
- * Sort intervals by start (lowest first) and end (highest first).
- */
- private void sort()
- {
- Collections.sort(intervals, icompare);
- isSorted = true;
- }
+ // array is [(intervalCount)...null...(added)]
- protected int findFirstBegin(List<T> list, long pos)
- {
- int start = 0;
- int end = list.size() - 1;
- int matched = list.size();
-
- while (start <= end)
+ int ntotal = intervalCount + added;
+ for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;)
{
- int mid = (start + end) / 2;
- if (list.get(mid).getBegin() >= pos)
- {
- matched = mid;
- end = mid - 1;
- }
- else
- {
- start = mid + 1;
- }
+ int pt0 = pt;
+ while (--pt >= 0 && offsets[pt] == 0)
+ ;
+ if (pt < 0)
+ pt = 0;
+ int nOK = pt0 - pt;
+ // shift upper intervals right
+ ptShift -= nOK;
+ if (nOK > 0)
+ System.arraycopy(intervals, pt, dest, ptShift, nOK);
+ if (added == 0)
+ break;
+ for (int offset = offsets[pt]; offset > 0; offset = offsets[offset])
+ {
+ dest[--ptShift] = intervals[offset];
+ --added;
+ }
}
- return matched;
+ offsets = null;
+ intervalCount = ntotal;
+ capacity = dest.length;
+ return dest;
}
- protected int findFirstEnd(List<T> list, long pos)
+ public int binaryIdentitySearch(IntervalI interval)
+ {
+ return binaryIdentitySearch(interval, null);
+ }
+ /**
+ * for remove() and contains()
+ *
+ * @param list
+ * @param interval
+ * @param bsIgnore
+ * for deleted
+ * @return index or, if not found, -1 - "would be here"
+ */
+ public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore)
{
int start = 0;
- int end = list.size() - 1;
- int matched = list.size();
-
+ int r0 = interval.getBegin();
+ int r1 = interval.getEnd();
+ int end = intervalCount - 1;
+ if (end < 0 || r0 < minStart)
+ return -1;
+ if (r0 > maxStart)
+ return -1 - intervalCount;
while (start <= end)
{
- int mid = (start + end) / 2;
- if (list.get(mid).getEnd() >= pos)
- {
- matched = mid;
- end = mid - 1;
- }
- else
+ int mid = (start + end) >>> 1;
+ IntervalI r = intervals[mid];
+ switch (compareRange(r, r0, r1))
{
+ case -1:
start = mid + 1;
+ continue;
+ case 1:
+ end = mid - 1;
+ continue;
+ case 0:
+ IntervalI iv = intervals[mid];
+ if ((bsIgnore == null || !bsIgnore.get(mid))
+ && iv.equalsInterval(interval))
+ return mid;
+ // found one; just scan up and down now, first checking the range, but
+ // also checking other possible aspects of equivalence.
+
+ for (int i = mid; ++i <= end;)
+ {
+ if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1)
+ break;
+ if ((bsIgnore == null || !bsIgnore.get(i))
+ && iv.equalsInterval(interval))
+ return i;
+ }
+ for (int i = mid; --i >= start;)
+ {
+ if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1)
+ return -1 - ++i;
+ if ((bsIgnore == null || !bsIgnore.get(i))
+ && iv.equalsInterval(interval))
+ return i;
+ }
+ return -1 - start;
}
}
- return matched;
+ return -1 - start;
}
+ // private int binaryInsertionSearch(long from, long to)
+ // {
+ // int matched = intervalCount;
+ // int end = matched - 1;
+ // int start = matched;
+ // if (end < 0 || from > intervals[end].getEnd()
+ // || from < intervals[start = 0].getBegin())
+ // return start;
+ // while (start <= end)
+ // {
+ // int mid = (start + end) >>> 1;
+ // switch (compareRange(intervals[mid], from, to))
+ // {
+ // case 0:
+ // return mid;
+ // case 1:
+ // matched = mid;
+ // end = mid - 1;
+ // continue;
+ // case -1:
+ // start = mid + 1;
+ // continue;
+ // }
+ //
+ // }
+ // return matched;
+ // }
+
+
+ @Override
+ public void clear()
+ {
+ intervalCount = added = 0;
+ isSorted = true;
+ isTainted = true;
+ offsets = null;
+ minStart = maxEnd = Integer.MAX_VALUE;
+ maxStart = Integer.MIN_VALUE;
+ }
/**
- * Adds non-nested intervals to the result list that lie within the target
- * range
+ * Compare an interval t to a from/to range for insertion purposes
*
+ * @param t
* @param from
* @param to
- * @param result
+ * @return 0 if same, 1 if start is after from, or start equals from and
+ * [bigendian: end is before to | littleendian: end is after to], else
+ * -1
*/
- protected void findIntervalOverlaps(long from, long to,
- List<T> result)
+ private int compareRange(IntervalI t, long from, long to)
{
+ if (t == null)
+ System.out.println("???");
+ int order = Long.signum(t.getBegin() - from);
+ return (order == 0
+ ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to)
+ : order);
+ }
- int startIndex = findFirstEnd(intervals, from);
- final int startIndex1 = startIndex;
- int i = startIndex1;
- while (i < intervals.size())
+ @Override
+ public boolean contains(Object entry)
+ {
+ if (entry == null || intervalCount == 0)
+ return false;
+ if (!isSorted || deleted > 0)
{
- T sf = intervals.get(i);
- if (sf.getBegin() > to)
+ sort();
+ }
+ return (findInterval((IntervalI) entry) >= 0);
+ }
+
+ public boolean containsInterval(IntervalI outer, IntervalI inner)
+ {
+ ensureFinalized();
+ int index = binaryIdentitySearch(inner, null);
+ if (index >= 0)
+ while ((index = index - Math.abs(offsets[index])) >= 0)
{
- break;
+ if (intervals[index] == outer)
+ {
+ return true;
+ }
}
- if (sf.getBegin() <= to && sf.getEnd() >= from)
+ return false;
+ }
+
+ private void ensureFinalized()
+ {
+ if (isTainted && intervalCount + added > 1)
+ {
+ if (!isSorted || added > 0 || deleted > 0)
{
- result.add(sf);
+ sort();
}
- i++;
+ if (offsets == null || offsets.length < intervalCount)
+ offsets = new int[intervalCount];
+ linkFeatures();
+ isTainted = false;
}
}
-
+ /**
+ * Find all overlaps within the given range, inclusively.
+ *
+ * @return a list sorted in ascending order of start position
+ *
+ */
@Override
- public List<T> findOverlaps(long start, long end)
+ public List<T> findOverlaps(long from, long to)
{
- return findOverlaps(start, end, null);
+ List<T> list = findOverlaps(from, to, null);
+ Collections.reverse(list);
+ return list;
}
+ /**
+ * Find all overlaps within the given range, inclusively.
+ *
+ * @return a list sorted in descending order of start position
+ *
+ */
+
@SuppressWarnings("unchecked")
@Override
- public List<T> findOverlaps(long start, long end, List<T> result)
+ public List<T> findOverlaps(long from, long to, List<T> result)
{
-
if (result == null)
{
result = new ArrayList<>();
}
- int n = intervals.size();
- switch (n)
+ switch (intervalCount)
{
case 0:
return result;
case 1:
- T sf = intervals.get(0);
- if (sf.getBegin() <= end && sf.getEnd() >= start)
+ IntervalI sf = intervals[0];
+ if (sf.getBegin() <= to && sf.getEnd() >= from)
{
- result.add(sf);
+ result.add((T) sf);
}
return result;
}
ensureFinalized();
- // (1) Find the closest feature to this position.
-
- int index = getClosestFeature(orderedIntervalStarts, start);
- IntervalI sf = (index < 0 ? null : orderedIntervalStarts[index]);
-
- // (2) Traverse the containedBy field, checking for overlap.
+ if (from > maxEnd || to < minStart)
+ return result;
+ int index = binaryLastIntervalSearch(from, to, ret);
+ int index1 = ret[0];
+ if (index1 < 0)
+ return result;
- while (sf != null)
+ if (index1 > index + 1)
{
- if (sf.getEnd() >= start)
+ while (--index1 > index)
{
- result.add((T) sf);
+ result.add((T) intervals[index1]);
}
- sf = sf.getContainedBy();
}
-
- // (3) For an interval, find the last feature that starts in this interval,
- // and add all features up through that feature.
-
- if (end >= start)
+ boolean isMonotonic = false;
+ while (index >= 0)
{
- // fill in with all features that start within this interval, fully
- // inclusive
- int index2 = getClosestFeature(orderedIntervalStarts, end);
- while (++index <= index2)
+ IntervalI sf = intervals[index];
+ if (sf.getEnd() >= from)
{
- result.add((T) orderedIntervalStarts[index]);
+ result.add((T) sf);
}
-
+ else if (isMonotonic)
+ {
+ break;
+ }
+ int offset = offsets[index];
+ isMonotonic = (offset < 0);
+ index -= (isMonotonic ? -offset : offset);
}
return result;
}
- private int getClosestFeature(IntervalI[] l, long pos)
+ @Override
+ public IntervalI get(int i)
{
- int low = 0;
- int high = l.length - 1;
- while (low <= high)
- {
- int mid = (low + high) >>> 1;
- IntervalI f = l[mid];
- switch (Long.signum(f.getBegin() - pos))
- {
- case -1:
- low = mid + 1;
- continue;
- case 1:
- high = mid - 1;
- continue;
- case 0:
-
- while (++mid <= high && l[mid].getBegin() == pos)
- {
- ;
- }
- return --mid;
- }
- }
- return (high < 0 ? -1 : high);
+ if (i < 0 || i >= intervalCount + added)
+ return null;
+ ensureFinalized();
+ return intervals[i];
}
- @SuppressWarnings("unchecked")
- public T getContainedBy(T sf, T sf0)
+ private int getContainedBy(int index, int begin)
{
- int begin = sf0.getBegin();
- while (sf != null)
+ while (index >= 0)
{
- if (begin <= sf.getEnd())
+ IntervalI sf = intervals[index];
+ if (sf.getEnd() >= begin)
{
// System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
// + "\nFS in " + sf.getIndex1() + ":" + sf);
- return sf;
+ return index;
}
- sf = (T) sf.getContainedBy();
+ index -= Math.abs(offsets[index]);
}
- return null;
+ return IntervalI.NOT_CONTAINED;
}
@Override
public int getDepth()
{
- int n = intervals.size();
- if (n < 2)
+ ensureFinalized();
+ if (intervalCount < 2)
{
- return n;
+ return intervalCount;
}
- ensureFinalized();
int maxDepth = 1;
- T root = null;
- for (int i = 0; i < n; i++)
+ IntervalI root = null;
+ for (int i = 0; i < intervalCount; i++)
{
- T element = intervals.get(i);
- IntervalI container = element;
- if (element.getContainedBy() == null)
+ IntervalI element = intervals[i];
+ if (offsets[i] == IntervalI.NOT_CONTAINED)
{
root = element;
}
int depth = 1;
- while ((container = container.getContainedBy()) != null)
+ int index = i;
+ int offset;
+ while ((index = index - Math.abs(offset = offsets[index])) >= 0)
{
- if (++depth > maxDepth && container == root)
+ element = intervals[index];
+ if (++depth > maxDepth && (element == root || offset < 0))
{
maxDepth = depth;
break;
}
@Override
+ public int getWidth()
+ {
+ ensureFinalized();
+ int w = 0;
+ for (int i = offsets.length; --i >= 0;)
+ {
+ if (offsets[i] > 0)
+ {
+ w++;
+ }
+ }
+ return w;
+ }
+
+ @Override
public boolean isValid()
{
ensureFinalized();
@Override
public Iterator<T> iterator()
{
- return intervals.iterator();
+ finalizeAddition(null);
+ return new Iterator<T>()
+ {
+
+ private int next;
+ @Override
+ public boolean hasNext()
+ {
+ return next < intervalCount;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T next()
+ {
+ if (next >= intervalCount)
+ throw new NoSuchElementException();
+ return (T) intervals[next++];
+ }
+
+ };
}
- @SuppressWarnings("unchecked")
- private void linkFeatures(IntervalI[] features)
+ private void linkFeatures()
{
- int n = features.length;
- if (n < 2)
+ if (intervalCount == 0)
+ return;
+ maxEnd = intervals[0].getEnd();
+ offsets[0] = IntervalI.NOT_CONTAINED;
+ if (intervalCount == 1)
{
return;
}
- int maxEnd = features[0].getEnd();
- for (int i = 1; i < n; i++)
+ boolean isMonotonic = true;
+ for (int i = 1; i < intervalCount; i++)
{
- T sf = (T) features[i];
- if (sf.getBegin() <= maxEnd)
- {
- sf.setContainedBy(getContainedBy((T) features[i - 1], sf));
- }
- if (sf.getEnd() > maxEnd)
+ IntervalI sf = intervals[i];
+ int begin = sf.getBegin();
+ int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1);
+ // System.out.println(sf + " is contained by "
+ // + (index < 0 ? null : starts[index]));
+
+ offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED
+ : isMonotonic ? index - i : i - index);
+ isMonotonic = (sf.getEnd() > maxEnd);
+ if (isMonotonic)
{
maxEnd = sf.getEnd();
}
}
- /**
- * Answers true if the list contains the interval, else false. This method is
- * optimised for the condition that the list is sorted on interval start
- * position ascending, and will give unreliable results if this does not hold.
- *
- * @param intervals
- * @param entry
- * @return
- */
- protected boolean listContains(List<T> intervals, Object entry)
+ @Override
+ public String prettyPrint()
{
- if (intervals == null || entry == null)
+ switch (intervalCount)
{
- return false;
- }
-
- if (!isSorted)
- {
- return intervals.contains(entry);
+ case 0:
+ return "";
+ case 1:
+ return intervals[0] + "\n";
}
-
- @SuppressWarnings("unchecked")
- T interval = (T) entry;
-
- /*
- * locate the first entry in the list which does not precede the interval
- */
- int pos = findFirstBegin(intervals, interval.getBegin());
- int len = intervals.size();
- while (pos < len)
+ ensureFinalized();
+ String sep = "\t";
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < intervalCount; i++)
{
- T sf = intervals.get(pos);
- if (sf.getBegin() > interval.getBegin())
+ IntervalI range = intervals[i];
+ int index = i;
+ while ((index = index - Math.abs(offsets[index])) >= 0)
{
- return false; // no match found
- }
- if (sf.getEnd() == interval.getEnd() && sf.equals(interval))
- {
- return true;
+ sb.append(sep);
}
- pos++;
+ sb.append(range.toString()).append('\n');
}
- return false;
+ return sb.toString();
}
@Override
// {
// throw new NullPointerException();
// }
- return (o != null && intervals.size() > 0
+ return (o != null && intervalCount > 0
&& removeInterval((IntervalI) o));
}
/**
- * Uses a binary search to find the entry and removes it if found.
+ * Find the interval or return where it should go, possibly into the add
+ * buffer
*
- * @param entry
- * @return
+ * @param interval
+ * @return index (nonnegative) or index where it would go (negative)
*/
- protected boolean removeInterval(IntervalI entry)
+
+ private int findInterval(IntervalI interval)
{
- if (!isSorted)
+
+ if (isSorted)
+ {
+ int pt = binaryIdentitySearch(interval, null);
+ // if (addPt == intervalCount || offsets[pt] == 0)
+ // return pt;
+ if (pt >= 0 || added == 0 || pt == -1 - intervalCount)
+ return pt;
+ pt = -1 - pt;
+ int start = interval.getBegin();
+ int end = interval.getEnd();
+
+ int match = pt;
+
+ while ((pt = offsets[pt]) != 0)
+ {
+ IntervalI iv = intervals[pt];
+ switch (compareRange(iv, start, end))
+ {
+ case -1:
+ break;
+ case 0:
+ if (iv.equalsInterval(interval))
+ return pt;
+ // fall through
+ case 1:
+ match = pt;
+ continue;
+ }
+ }
+ return -1 - match;
+ }
+ else
{
- return intervals.remove(entry);
+ int i = intervalCount;
+ while (--i >= 0 && !intervals[i].equalsInterval(interval))
+ ;
+ return i;
}
+ }
- /*
- * find the first interval that might match, i.e. whose
- * start position is not less than the target range start
- * (NB inequality test ensures the first match if any is found)
- */
- int startIndex = findFirstBegin(intervals, entry.getBegin());
+ /**
+ * Uses a binary search to find the entry and removes it if found.
+ *
+ * @param interval
+ * @return
+ */
+ protected boolean removeInterval(IntervalI interval)
+ {
- /*
- * traverse intervals to look for a match
- */
- int from = entry.getBegin();
- int to = entry.getEnd();
- for (int i = startIndex, size = intervals.size(); i < size; i++)
+ if (!isSorted || added > 0)
{
- T sf = intervals.get(i);
- if (sf.getBegin() > from)
- {
- return false;
- }
- if (sf.getEnd() == to && sf.equals(entry))
- {
- intervals.remove(i).setContainedBy(null);
- return (isTainted = true);
- }
+ sort();
}
- return false;
+ int i = binaryIdentitySearch(interval, bsDeleted);
+ if (i < 0)
+ return false;
+ if (deleted == 0)
+ {
+ if (bsDeleted == null)
+ bsDeleted = new BitSet(intervalCount);
+ else
+ bsDeleted.clear();
+ }
+ bsDeleted.set(i);
+ deleted++;
+ return (isTainted = true);
}
- @Override
- public int size()
+ private void finalizeDeletion()
{
- return intervals.size();
+ if (deleted == 0)
+ return;
+
+ // ......xxx.....xxxx.....xxxxx....
+ // ......^i,pt
+ // ...... .......
+ // ............
+ for (int i = bsDeleted.nextSetBit(0); i >= 0;)
+ {
+ int pt = i;
+ i = bsDeleted.nextClearBit(i + 1);
+ int pt1 = bsDeleted.nextSetBit(i + 1);
+ if (pt1 < 0)
+ pt1 = intervalCount;
+ if (pt1 == pt)
+ break;
+ System.arraycopy(intervals, i, intervals, pt, pt1 - i);
+ i = pt1;
+ }
+ intervalCount -= deleted;
+ deleted = 0;
+ bsDeleted.clear();
+
}
@Override
- public String toString()
+ public boolean revalidate()
{
- return prettyPrint();
+ isTainted = true;
+ isSorted = false;
+ ensureFinalized();
+ return true;
}
@Override
- public int getWidth()
+ public int size()
{
- ensureFinalized();
- int w = 0;
- for (int i = intervals.size(); --i >= 0;)
- {
- if (intervals.get(i).getContainedBy() == null)
- {
- w++;
- }
- }
- return w;
+ return intervalCount + added - deleted;
}
- @Override
- public String prettyPrint()
+ /**
+ * Sort intervals by start (lowest first) and end (highest first).
+ */
+ private void sort()
{
- int n = intervals.size();
- if (n == 0)
+ if (added > 0)
{
- return "";
+ intervals = finalizeAddition(new IntervalI[intervalCount + added]);
}
- if (n == 1)
+ else if (deleted > 0)
{
- return intervals.get(0) + "\n";
+ finalizeDeletion();
}
- ensureFinalized();
- String sep = "\t";
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < n; i++)
+ else
{
- IntervalI range = orderedIntervalStarts[i];
- IntervalI container = range.getContainedBy();
- while (container != null)
- {
- sb.append(sep);
- container = container.getContainedBy();
- }
- sb.append(range.toString()).append('\n');
+ Arrays.sort(intervals, 0, intervalCount, icompare);
}
- return sb.toString();
+ updateMinMaxStart();
+ isSorted = true;
}
- @Override
- public boolean revalidate()
+ private void updateMinMaxStart()
{
- isTainted = true;
- isSorted = true;
- ensureFinalized();
- return true;
+ if (intervalCount > 0)
+ {
+ minStart = intervals[0].getBegin();
+ maxStart = intervals[intervalCount - 1].getBegin();
+ }
+ else
+ {
+ minStart = Integer.MAX_VALUE;
+ maxStart = Integer.MIN_VALUE;
+ }
}
@Override
- public IntervalI get(int i)
+ public String toString()
{
- ensureFinalized();
- return (i < 0 || i >= orderedIntervalStarts.length ? null
- : orderedIntervalStarts[i]);
+ return prettyPrint();
}
}
import java.util.ArrayList;
import java.util.List;
+import intervalstore.nonc.IntervalStore;
+
/**
* An adaption of FeatureStore that is efficient and lightweight, accelerating
* processing speed in JavaScript.
{
boolean contactsTainted = true;
+ private IntervalStore<SequenceFeature> featureStore;
- /**
- * internal reference to features as an ArrayList
- */
- private ArrayList<SequenceFeature> featureList;
+ //
+ // /**
+ // * internal reference to features as an ArrayList
+ // */
+ // private ArrayList<SequenceFeature> featureList;
+ //
+ // /**
+ // * contact features ordered by first contact position
+ // */
+ // private SequenceFeature[] orderedFeatureStarts;
- /**
- * contact features ordered by first contact position
- */
- private SequenceFeature[] orderedFeatureStarts;
-
- /**
- * indicates that we need to rebuild orderedFeatureStarts and reset index
- * fields
- */
- private boolean isTainted = true;
+ // /**
+ // * indicates that we need to rebuild orderedFeatureStarts and reset index
+ // * fields
+ // */
+ // private boolean isTainted = true;
/**
* Constructor
*/
public FeatureStoreJS()
{
- features = featureList = new ArrayList<>();
+ // the only reference to features field in this class -- for the superclass
+
+ features = featureStore = new IntervalStore<>(); // featureList = new
+ // ArrayList<>();
}
/**
/**
* Adds one feature to the IntervalStore that can manage nested features
* (creating the IntervalStore if necessary)
+ *
+ * @return false if could not add it (late check for contained
*/
@Override
- protected synchronized void addPositionalFeature(SequenceFeature feature)
+ protected synchronized boolean addPositionalFeature(
+ SequenceFeature feature)
{
- featureList.add(findFirstBegin(featureList, feature.begin), feature);
- isTainted = true;
+
+ return featureStore.add(feature, false);
+ // features.add(feature);
+ // featureList.add(findFirstBegin(featureList, feature.begin), feature);
+ // isTainted = true;
}
@Override
- protected boolean containsFeature(SequenceFeature feature)
+ protected boolean checkContainsPositionalFeatureForAdd(
+ SequenceFeature feature)
{
+ // the non-NCList IntervalStore does this as part of the add for greater
+ // efficiency (one binary search)
+ return false;
+ }
- return getEquivalentFeatureIndex(featureList, feature) >= 0;
+ @Override
+ protected boolean containsFeature(SequenceFeature feature)
+ {
+ return featureStore.contains(feature);
+ // return getEquivalentFeatureIndex(featureList, feature) >= 0;
}
@Override
protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
{
- int pos = getEquivalentFeatureIndex(featureList, sf);
- if (pos < 0)
- {
- return false;
- }
- featureList.remove(pos);
- return (isTainted = true);
+ return featureStore.remove(sf);
+ // int pos = getEquivalentFeatureIndex(featureList, sf);
+ // if (pos < 0)
+ // {
+ // return false;
+ // }
+ // featureList.remove(pos);
+ // return (isTainted = true);
}
/**
findContactFeatures(start, end, result);
}
}
- if (featureList.size() > 0)
+ if (featureStore.size() > 0)
{
- getOverlaps(start, end, result);
+ featureStore.findOverlaps(start, end, result);
+ // getOverlaps(start, end, result);
}
return result;
}
- /**
- * A binary search identical to the one used for contact start/end, but here
- * we return the feature itself. Unlike Collection.BinarySearch, all we have
- * to be is close, not exact, and we make sure if there is a string of
- * identical starts, then we slide to the end so that we can check all of
- * them.
- *
- * @param l
- * @param pos
- * @return
- */
- private int getClosestFeature(SequenceFeature[] l, long pos)
- {
- int low = 0;
- int high = l.length - 1;
- while (low <= high)
- {
- int mid = (low + high) >>> 1;
- SequenceFeature f = l[mid];
- switch (Long.signum(f.begin - pos))
- {
- case -1:
- low = mid + 1;
- continue;
- case 1:
- high = mid - 1;
- continue;
- case 0:
-
- while (++mid <= high && l[mid].begin == pos)
- {
- ;
- }
- return --mid;
- }
- }
- return (high < 0 ? -1 : high);
- }
+ // /**
+ // * A binary search identical to the one used for contact start/end, but here
+ // * we return the feature itself. Unlike Collection.BinarySearch, all we have
+ // * to be is close, not exact, and we make sure if there is a string of
+ // * identical starts, then we slide to the end so that we can check all of
+ // * them.
+ // *
+ // * @param l
+ // * @param pos
+ // * @return
+ // */
+ // private int getClosestFeature(SequenceFeature[] l, long pos)
+ // {
+ // int low = 0;
+ // int high = l.length - 1;
+ // while (low <= high)
+ // {
+ // int mid = (low + high) >>> 1;
+ // SequenceFeature f = l[mid];
+ // switch (Long.signum(f.begin - pos))
+ // {
+ // case -1:
+ // low = mid + 1;
+ // continue;
+ // case 1:
+ // high = mid - 1;
+ // continue;
+ // case 0:
+ //
+ // while (++mid <= high && l[mid].begin == pos)
+ // {
+ // ;
+ // }
+ // return --mid;
+ // }
+ // }
+ // return (high < 0 ? -1 : high);
+ // }
/**
* Adds to the result list any contact features whose end (second contact
}
}
- /**
- * Since we are traversing the sorted feature array in a forward direction,
- * all elements prior to the one we are working on have been fully linked. All
- * we are doing is following those links until we find the first array feature
- * with a containedBy element that has an end >= our begin point. It is
- * generally a very short list -- maybe one or two depths. But it might be
- * more than that.
- *
- * @param sf
- * @param sf0
- * @return
- */
- private SequenceFeature getContainedBy(SequenceFeature sf,
- SequenceFeature sf0)
- {
- int begin = sf0.begin;
- while (sf != null)
- {
- if (begin <= sf.end)
- {
- // System.out.println("\nFSJS found " + sf0 + "\nFS in " + sf);
- return sf;
- }
- sf = sf.containedBy;
- }
- return null;
- }
-
- /**
- * Fast find of known features already on the list; slower removal of
- * equivalent feature, not necessarily identical.
- *
- * @param feature
- * @return 0-based index for this feature in featureList
- */
- @Override
- protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
- SequenceFeature feature)
- {
- int pos = feature.index1 - 1;
- if (!isTainted && pos >= 0)
- {
- return pos;
- }
- return super.getEquivalentFeatureIndex(list, feature);
- }
-
- /**
- * Find all overlaps; special case when there is only one feature. The
- * required array of start-sorted SequenceFeature is created lazily.
- *
- * @param start
- * @param end
- * @param result
- */
- private void getOverlaps(long start, long end,
- List<SequenceFeature> result)
- {
- int n = featureList.size();
- switch (n)
- {
- case 0:
- return;
- case 1:
- justCheckOne(featureList.get(0), start, end, result);
- return;
- default:
- if (isTainted)
- {
- orderedFeatureStarts = featureList
- .toArray(new SequenceFeature[featureList.size()]);
- linkFeatures(orderedFeatureStarts);
- isTainted = false;
- }
- break;
- }
-
- // (1) Find the closest feature to this position.
-
- int index = getClosestFeature(orderedFeatureStarts, start);
- SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
-
- // (2) Traverse the containedBy field, checking for overlap.
-
- while (sf != null)
- {
- if (sf.end >= start)
- {
- result.add(sf);
- }
- sf = sf.containedBy;
- }
-
- // (3) For an interval, find the last feature that starts in this interval,
- // and add all features up through that feature.
-
- if (end > start)
- {
- // fill in with all features that start within this interval, fully
- // inclusive
- int index2 = getClosestFeature(orderedFeatureStarts, end);
- while (++index <= index2)
- {
- result.add(orderedFeatureStarts[index]);
- }
-
- }
- }
-
- /**
- * Quick check when we only have one feature.
- *
- * @param sf
- * @param start
- * @param end
- * @param result
- */
- private void justCheckOne(SequenceFeature sf, long start, long end,
- List<SequenceFeature> result)
- {
- if (sf.begin <= end && sf.end >= start)
- {
- result.add(sf);
- }
- return;
- }
-
- /**
- * Run through the sorted sequence array once, building the containedBy linked
- * list references. Does a check first to make sure there is actually
- * something out there that is overlapping. A null for sf.containedBy means
- * there are no overlaps for this feature.
- *
- * @param features
- */
- private void linkFeatures(SequenceFeature[] features)
- {
- int n = features.length;
- switch (n)
- {
- case 0:
- return;
- case 1:
- features[0].index1 = 1;
- return;
- }
- int maxEnd = features[0].end;
- for (int i = 1; i < n;)
- {
- SequenceFeature sf = features[i];
- if (sf.begin <= maxEnd)
- {
- sf.containedBy = getContainedBy(features[i - 1], sf);
- }
- if (sf.end > maxEnd)
- {
- maxEnd = sf.end;
- }
- sf.index1 = ++i;
- }
- }
+ // /**
+ // * Since we are traversing the sorted feature array in a forward direction,
+ // * all elements prior to the one we are working on have been fully linked.
+ // All
+ // * we are doing is following those links until we find the first array
+ // feature
+ // * with a containedBy element that has an end >= our begin point. It is
+ // * generally a very short list -- maybe one or two depths. But it might be
+ // * more than that.
+ // *
+ // * @param sf
+ // * @param sf0
+ // * @return
+ // */
+ // private SequenceFeature getContainedBy(SequenceFeature sf,
+ // SequenceFeature sf0)
+ // {
+ // int begin = sf0.begin;
+ // while (sf != null)
+ // {
+ // if (begin <= sf.end)
+ // {
+ // // System.out.println("\nFSJS found " + sf0 + "\nFS in " + sf);
+ // return sf;
+ // }
+ // sf = sf.containedBy;
+ // }
+ // return null;
+ // }
+
+ // /**
+ // * Fast find of known features already on the list; slower removal of
+ // * equivalent feature, not necessarily identical.
+ // *
+ // * @param feature
+ // * @return 0-based index for this feature in featureList
+ // */
+ // @Override
+ // protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
+ // SequenceFeature feature)
+ // {
+ // int pos = feature.index - 1;
+ // if (!isTainted && pos >= 0)
+ // {
+ // return pos;
+ // }
+ // return super.getEquivalentFeatureIndex(list, feature);
+ // }
+ //
+ // /**
+ // * Find all overlaps; special case when there is only one feature. The
+ // * required array of start-sorted SequenceFeature is created lazily.
+ // *
+ // * @param start
+ // * @param end
+ // * @param result
+ // */
+ // private void getOverlaps(long start, long end,
+ // List<SequenceFeature> result)
+ // {
+ // int n = featureList.size();
+ // switch (n)
+ // {
+ // case 0:
+ // return;
+ // case 1:
+ // justCheckOne(featureList.get(0), start, end, result);
+ // return;
+ // default:
+ // if (isTainted)
+ // {
+ // orderedFeatureStarts = featureList
+ // .toArray(new SequenceFeature[featureList.size()]);
+ // linkFeatures(orderedFeatureStarts);
+ // isTainted = false;
+ // }
+ // break;
+ // }
+ //
+ // // (1) Find the closest feature to this position.
+ //
+ // int index = getClosestFeature(orderedFeatureStarts, start);
+ // SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
+ //
+ // // (2) Traverse the containedBy field, checking for overlap.
+ //
+ // while (sf != null)
+ // {
+ // if (sf.end >= start)
+ // {
+ // result.add(sf);
+ // }
+ // sf = sf.containedBy;
+ // }
+ //
+ // // (3) For an interval, find the last feature that starts in this interval,
+ // // and add all features up through that feature.
+ //
+ // if (end > start)
+ // {
+ // // fill in with all features that start within this interval, fully
+ // // inclusive
+ // int index2 = getClosestFeature(orderedFeatureStarts, end);
+ // while (++index <= index2)
+ // {
+ // result.add(orderedFeatureStarts[index]);
+ // }
+ //
+ // }
+ // }
+ //
+ // /**
+ // * Quick check when we only have one feature.
+ // *
+ // * @param sf
+ // * @param start
+ // * @param end
+ // * @param result
+ // */
+ // private void justCheckOne(SequenceFeature sf, long start, long end,
+ // List<SequenceFeature> result)
+ // {
+ // if (sf.begin <= end && sf.end >= start)
+ // {
+ // result.add(sf);
+ // }
+ // return;
+ // }
+ //
+ // /**
+ // * Run through the sorted sequence array once, building the containedBy
+ // linked
+ // * list references. Does a check first to make sure there is actually
+ // * something out there that is overlapping. A null for sf.containedBy means
+ // * there are no overlaps for this feature.
+ // *
+ // * @param features
+ // */
+ // private void linkFeatures(SequenceFeature[] features)
+ // {
+ // int n = features.length;
+ // switch (n)
+ // {
+ // case 0:
+ // return;
+ // case 1:
+ // features[0].index = 1;
+ // return;
+ // }
+ // int maxEnd = features[0].end;
+ // for (int i = 1; i < n;)
+ // {
+ // SequenceFeature sf = features[i];
+ // if (sf.begin <= maxEnd)
+ // {
+ // sf.containedBy = getContainedBy(features[i - 1], sf);
+ // }
+ // if (sf.end > maxEnd)
+ // {
+ // maxEnd = sf.end;
+ // }
+ // sf.index = ++i;
+ // }
+ // }
}