*/
package intervalstore.api;
-import java.util.Collections;
import java.util.Comparator;
-import java.util.List;
public interface IntervalI
{
/**
- * a comparator for sorting intervals by start position ascending
+ * Compares intervals by start position ascending and end position descending
*/
- static Comparator<? super IntervalI> FORWARD_STRAND = new Comparator<IntervalI>()
+ static Comparator<? super IntervalI> COMPARE_BEGIN_ASC_END_DESC = new Comparator<IntervalI>()
{
@Override
public int compare(IntervalI o1, IntervalI o2)
{
- return Integer.compare(o1.getBegin(), o2.getBegin());
+ int ret = Integer.signum(o1.getBegin() - o2.getBegin());
+ return (ret == 0 ? Integer.signum(o2.getEnd() - o1.getEnd()) : ret);
}
};
/**
- * a comparator for sorting intervals by end position descending
+ * Compares intervals by start position ascending and end position ascending
*/
- static Comparator<? super IntervalI> REVERSE_STRAND = new Comparator<IntervalI>()
+ static Comparator<? super IntervalI> COMPARE_BEGIN_ASC_END_ASC = new Comparator<IntervalI>()
{
@Override
public int compare(IntervalI o1, IntervalI o2)
{
- return Integer.compare(o2.getEnd(), o1.getEnd());
+ int ret = Integer.signum(o1.getBegin() - o2.getBegin());
+ return (ret == 0 ? Integer.signum(o1.getEnd() - o2.getEnd()) : ret);
}
};
+ /**
+ * Compares intervals by start position ascending
+ */
+ static Comparator<? super IntervalI> COMPARE_BEGIN_ASC = new Comparator<IntervalI>()
+ {
+ @Override
+ public int compare(IntervalI o1, IntervalI o2)
+ {
+ return Integer.signum(o1.getBegin() - o2.getBegin());
+ }
+ };
+
+ /**
+ * Compares intervals by end position descending
+ */
+ static Comparator<? super IntervalI> COMPARE_END_DESC = new Comparator<IntervalI>()
+ {
+ @Override
+ public int compare(IntervalI o1, IntervalI o2)
+ {
+ return Integer.signum(o2.getEnd() - o1.getEnd());
+ }
+ };
+
+ /**
+ * Answers the start position of the interval
+ *
+ * @return
+ */
int getBegin();
+ /**
+ * Answers the end position of the interval
+ *
+ * @return
+ */
int getEnd();
/**
&& (i.getBegin() > getBegin() || i.getEnd() < getEnd());
}
+ /**
+ * Answers true if the interval has the same begin and end as this one, else
+ * false
+ *
+ * @param i
+ * @return
+ */
default boolean equalsInterval(IntervalI i)
{
return i != null && i.getBegin() == getBegin()
&& i.getEnd() == getEnd();
}
+ /**
+ * Answers true if interval i overlaps this one (they share at least one
+ * position), else false
+ *
+ * @param i
+ * @return
+ */
default boolean overlapsInterval(IntervalI i)
{
if (i == null)
}
return true; // i internal to this
}
-
- /**
- * Sorts the list by start position ascending (if forwardString==true), or by
- * end position descending
- *
- * @param intervals
- * @param forwardStrand
- */
- static void sortIntervals(List<? extends IntervalI> intervals,
- final boolean forwardStrand)
- {
- Collections.sort(intervals,
- forwardStrand ? FORWARD_STRAND : REVERSE_STRAND);
- }
}
import java.util.Collection;
import java.util.List;
-import intervalstore.impl.NCList;
-
+/**
+ * An interface describing a store of (possibly overlapping) features which may
+ * be queried to find features which overlap a given start-end range
+ *
+ * @author gmcarstairs
+ *
+ * @param <T>
+ */
public interface IntervalStoreI<T extends IntervalI> extends Collection<T>
{
-
/**
* Returns a (possibly empty) list of items whose extent overlaps the given
* range
List<T> findOverlaps(long from, long to);
/**
- * Returns a string representation of the data where containment is shown by
- * indentation on new lines
+ * Returns a (possibly empty) list of items whose extent overlaps the given
+ * range. If the {@code results} parameter is not null, items are appended to
+ * this list and the (possibly extended) list is returned. There is no check
+ * for duplicate entries in the result list.
*
+ * @param from
+ * start of overlap range (inclusive)
+ * @param to
+ * end of overlap range (inclusive)
+ * @param result
* @return
*/
- String prettyPrint();
+ List<T> findOverlaps(long from, long to, List<T> result);
+
+ /**
+ * Adds the entry to the store, unless {@code allowDuplicates} is false and
+ * the entry is already contained in the store. The test for containment
+ * should match that used in the {@code contains} method.
+ */
+ boolean add(T entry, boolean allowDuplicates);
/**
- * Answers true if the data held satisfy the rules of construction of an
- * IntervalStore, else false.
+ * Returns a string representation of the data where containment is shown by
+ * indentation on new lines
*
* @return
*/
- boolean isValid();
+ String prettyPrint();
/**
- * Answers the level of nesting of intervals in the store, as
- * <ul>
- * <li>0 if the store is empty</li>
- * <li>1 if all intervals are 'top level' (non nested)</li>
- * <li>else 1 plus the depth of the enclosed NCList</li>
- * </ul>
+ * Answers the depth of nesting of intervals in the store. The precise
+ * definition depends on the implementation.
*
* @return
- * @see NCList#getDepth()
*/
int getDepth();
package intervalstore.impl;
import java.util.List;
-import java.util.function.Function;
+
+import intervalstore.api.IntervalI;
/**
- * Provides a method to perform binary search of an ordered list for the first
- * entry that satisfies a supplied condition
+ * Provides a method to perform binary search of an ordered list of
+ * {@code IntervalI} objects for the first entry that satisfies a supplied
+ * condition on either the begin or end value of the intervals
*
* @author gmcarstairs
*/
public final class BinarySearcher
{
+ /*
+ * compare using <, <=, =, >=, >
+ */
+ public enum Compare
+ {
+ LT, LE, EQ, GE, GT
+ }
+
private BinarySearcher()
{
}
* <code>Collections.binarySearch</code> instead.
*
* @param list
+ * the list to be searched
+ * @param compareValue
+ * a function that extracts the value to be compared from an entry
+ * @param compareTo
+ * the value to compare to
* @param test
+ * a function that compares (compareValue, compareTo) and returns
+ * true or false
* @return
* @see java.util.Collections#binarySearch(List, Object)
*/
- public static <T> int findFirst(List<? extends T> list,
- Function<T, Boolean> test)
+ public static int findFirst(List<? extends IntervalI> list,
+ boolean compareBegin,
+ Compare comp, int compareto)
{
int start = 0;
- int end = list.size() - 1;
int matched = list.size();
+ int end = matched - 1;
+ /*
+ * shortcut for the case that the last entry in the list
+ * fails the test (this arises when adding non-overlapping
+ * intervals in ascending start position order)
+ */
+ if (end > 0)
+ {
+ IntervalI last = list.get(end);
+ if (!compare(last, compareBegin, comp, compareto))
+ {
+ return matched;
+ }
+ }
+
while (start <= end)
{
int mid = (start + end) / 2;
- T entry = list.get(mid);
- boolean itsTrue = test.apply(entry);
- if (itsTrue)
+ IntervalI entry = list.get(mid);
+ if (compare(entry, compareBegin, comp, compareto))
{
matched = mid;
end = mid - 1;
return matched;
}
+
+ /**
+ * Applies the comparison specified by {@code comp} to either the
+ * {@code begin} value of {@code entry} (if {@code compareBegin} is true) or
+ * the {@code end} value, and the {@code compareTo} value, and returns the
+ * result of the comparison
+ *
+ * @param entry
+ * @param compareBegin
+ * @param comp
+ * @param compareTo
+ * @return
+ */
+ private static boolean compare(IntervalI entry, boolean compareBegin,
+ Compare comp, int compareTo)
+ {
+ int val = compareBegin ? entry.getBegin() : entry.getEnd();
+ boolean result;
+ switch (comp)
+ {
+ case LT:
+ result = val < compareTo;
+ break;
+ case LE:
+ result = val <= compareTo;
+ break;
+ case EQ:
+ result = val == compareTo;
+ break;
+ case GE:
+ result = val >= compareTo;
+ break;
+ case GT:
+ default:
+ result = val > compareTo;
+ }
+ return result;
+ }
}
import intervalstore.api.IntervalI;
import intervalstore.api.IntervalStoreI;
+import intervalstore.impl.BinarySearcher.Compare;
/**
* A collection class to store interval-associated data, with O(log N)
}
/**
- * Adds one interval to the store.
+ * Adds one interval to the store. Answers true unless the feature is a
+ * duplicate (already contained), in which case it is not added.
*
* @param interval
*/
@Override
public boolean add(T interval)
{
+ return add(interval, true);
+ }
+
+ @Override
+ public boolean add(T interval, boolean allowDuplicates)
+ {
if (interval == null)
{
return false;
}
+ if (!allowDuplicates && contains(interval))
+ {
+ return false;
+ }
+
if (!addNonNestedInterval(interval))
{
/*
/*
* find the first stored interval which doesn't precede the new one
*/
- int insertPosition = BinarySearcher.findFirst(nonNested,
- val -> val.getBegin() >= entry.getBegin());
+ int insertPosition = BinarySearcher.findFirst(nonNested, true,
+ Compare.GE, entry.getBegin());
/*
* fail if we detect interval enclosure
* - of the new interval by the one before or after it
@Override
public List<T> findOverlaps(long from, long to)
{
- List<T> result = new ArrayList<>();
+ return findOverlaps(from, to, new ArrayList<>());
+ }
+
+ @Override
+ public List<T> findOverlaps(long from, long to, List<T> result)
+ {
+ if (result == null)
+ {
+ result = new ArrayList<>();
+ }
findNonNestedOverlaps(from, to, result);
if (nested != null)
{
- result.addAll(nested.findOverlaps(from, to));
+ nested.findOverlaps(from, to, result);
}
return result;
return pp;
}
- @Override
+ /**
+ * Inspects the data store and answers true if it is validly constructed, else
+ * false. Provided for use in verification by test classes. *
+ *
+ * @return
+ */
public boolean isValid()
{
for (int i = 0; i < nonNested.size() - 1; i++)
* start position is not less than the target range start
* (NB inequality test ensures the first match if any is found)
*/
- int startIndex = BinarySearcher.findFirst(nonNested,
- val -> val.getBegin() >= entry.getBegin());
+ int startIndex = BinarySearcher.findFirst(nonNested, true, Compare.GE,
+ entry.getBegin());
/*
* traverse intervals to look for a match
return false;
}
+ /**
+ * Answers 0 if the store is empty, 1 if there are only top level intervals,
+ * else 1 plus the depth of the nested intervals (NCList)
+ */
@Override
public int getDepth()
{
/*
* locate the first entry in the list which does not precede the interval
*/
- int pos = BinarySearcher.findFirst(intervals,
- val -> val.getBegin() >= interval.getBegin());
+ int pos = BinarySearcher.findFirst(intervals, true, Compare.GE,
+ interval.getBegin());
int len = intervals.size();
while (pos < len)
{
* find the first interval whose end position is
* after the target range start
*/
- int startIndex = BinarySearcher.findFirst(nonNested,
- val -> val.getEnd() >= from);
+ int startIndex = BinarySearcher.findFirst(nonNested, false, Compare.GE,
+ (int) from);
final int startIndex1 = startIndex;
int i = startIndex1;
import java.util.NoSuchElementException;
import intervalstore.api.IntervalI;
+import intervalstore.impl.BinarySearcher.Compare;
/**
* An adapted implementation of NCList as described in the paper
* sort by start ascending, length descending, so that
* contained intervals follow their containing interval
*/
- Collections.sort(ranges, new NCListBuilder<>().getComparator());
+ Collections.sort(ranges, IntervalI.COMPARE_BEGIN_ASC_END_DESC);
int listStartIndex = 0;
*/
protected int findFirstOverlap(final long from)
{
- return BinarySearcher.findFirst(subranges,
- val -> val.getEnd() >= from);
+ return BinarySearcher.findFirst(subranges, false, Compare.GE,
+ (int) from);
}
/**
*/
public class NCListBuilder<T extends IntervalI>
{
+ /**
+ * Compares two intervals in a way that will sort a list by start position
+ * ascending, then by length descending
+ */
class NCListComparator<V extends IntervalI> implements Comparator<V>
{
- /**
- * Compares two intervals in a way that will sort a list by start position
- * ascending, then by length descending. Answers
- * <ul>
- * <li>a negative value if o1.begin < o2.begin</li>
- * <li>else a positive value if o1.begin > o2.begin</li>
- * <li>else a negative value if o1.end > o2.end</li>
- * <li>else a positive value of o1.end < o2.end</li>
- * <li>else zero</li
- * </ul>
- */
@Override
public int compare(V o1, V o2)
{
int order = Integer.compare(o1.getBegin(), o2.getBegin());
if (order == 0)
{
- /*
- * if tied on start position, longer length sorts to left
- * i.e. the negation of normal ordering by length
- */
order = Integer.compare(o2.getEnd(), o1.getEnd());
}
return order;
}
}
-
- private Comparator<T> comparator = new NCListComparator<>();
/**
* Default constructor
}
/**
- * Answers a comparator which sorts items of type T by start position
- * ascending, and within that by end position (length) descending)
- *
- * @return
- */
- Comparator<T> getComparator()
- {
- return comparator;
- }
-
- /**
* Sorts and traverses the ranges to identify sublists, whose start intervals
* are overlapping or disjoint but not mutually contained. Answers a list of
* start-end indices of the sorted list of ranges.
* sort by start ascending, length descending, so that
* contained intervals follow their containing interval
*/
- Collections.sort(ranges, comparator);
+ Collections.sort(ranges, IntervalI.COMPARE_BEGIN_ASC_END_DESC);
int listStartIndex = 0;
}
@Override
- public boolean equals(Object obj)
+ public boolean equals(Object o)
{
- if (obj instanceof Range)
- {
- Range r = (Range) obj;
- return (start == r.start && end == r.end);
- }
- return false;
+ return (o instanceof Range) && equalsInterval((Range) o);
}
}