JAL-3383 JAL-3397 JAL-3253-applet IntervalStore options
authorhansonr <hansonr@STO24954W.ad.stolaf.edu>
Tue, 6 Aug 2019 21:25:13 +0000 (16:25 -0500)
committerhansonr <hansonr@STO24954W.ad.stolaf.edu>
Tue, 6 Aug 2019 21:25:13 +0000 (16:25 -0500)
In SequenceFeatures:

INTERVAL_STORE_NCLIST  (default for Java)
 - no changes from previous

 INTERVAL_STORE_NONCLIST (Java option)
 - uses intervalstore.nonc.IntervalStore
 - optional "lazy" just-in-time sorting
 - passes the following TestNG tests:

IntervalStoreJ.test.intervalstore.nonc...
NoNCListBuilderTest.java
NoNCListIntervalIteratorTest.java
NoNCListIntervalStoreTest.java
NoNCListIntervalTest.java
NoNCListLoadTest.java
NoNCListRandomisedTest.java
NoNCListTimingTests.java

test.jalview.datamodel.features...
FeatureStoreNoNCTest.java
FeatureStoreJSTest.java
SequenceFeaturesTest.java

17 files changed:
src/intervalstore/api/IntervalI.java [new file with mode: 0644]
src/intervalstore/api/IntervalStoreI.java [new file with mode: 0644]
src/intervalstore/nonc/IntervalComparator.java [new file with mode: 0644]
src/intervalstore/nonc/IntervalStore.java [new file with mode: 0644]
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceFeature.java
src/jalview/datamodel/features/FeatureStore.java
src/jalview/datamodel/features/FeatureStoreImpl.java
src/jalview/datamodel/features/FeatureStoreJS.java
src/jalview/datamodel/features/SequenceFeatures.java
src/jalview/io/FileLoader.java
src/jalview/io/vamsas/Sequencefeature.java
src/jalview/project/Jalview2XML.java
test/jalview/datamodel/features/FeatureStoreNoNCTest.java [new file with mode: 0644]
unused/nonc/IntervalI.java [new file with mode: 0644]
unused/nonc/IntervalStore.java [new file with mode: 0644]
unused/nonc/IntervalStoreI.java [new file with mode: 0644]

diff --git a/src/intervalstore/api/IntervalI.java b/src/intervalstore/api/IntervalI.java
new file mode 100644 (file)
index 0000000..0dfd935
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2018, Mungo Carstairs
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package intervalstore.api;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+public interface IntervalI
+{
+  /**
+   * a comparator for sorting intervals by start position ascending
+   */
+  static Comparator<? super IntervalI> FORWARD_STRAND = new Comparator<IntervalI>()
+  {
+    @Override
+    public int compare(IntervalI o1, IntervalI o2)
+    {
+      return Integer.compare(o1.getBegin(), o2.getBegin());
+    }
+  };
+
+  /**
+   * a comparator for sorting intervals by end position descending
+   */
+  static Comparator<? super IntervalI> REVERSE_STRAND = new Comparator<IntervalI>()
+  {
+    @Override
+    public int compare(IntervalI o1, IntervalI o2)
+    {
+      return Integer.compare(o2.getEnd(), o1.getEnd());
+    }
+  };
+
+  int getBegin();
+
+  int getEnd();
+
+  /**
+   * Answers true if this interval contains (or matches) the given interval
+   * 
+   * @param i
+   * @return
+   */
+  default boolean containsInterval(IntervalI i)
+  {
+    return i != null
+            && i.getBegin() >= getBegin() && i.getEnd() <= getEnd();
+  }
+
+  /**
+   * Answers true if this interval properly contains the given interval, that
+   * is, it contains it and is larger than it
+   * 
+   * @param i
+   * @return
+   */
+  default boolean properlyContainsInterval(IntervalI i)
+  {
+    return containsInterval(i)
+            && (i.getBegin() > getBegin() || i.getEnd() < getEnd());
+  }
+
+  default boolean equalsInterval(IntervalI i)
+  {
+    return i != null && i.getBegin() == getBegin()
+            && i.getEnd() == getEnd();
+  }
+
+  default boolean overlapsInterval(IntervalI i)
+  {
+    if (i == null)
+    {
+      return false;
+    }
+    if (i.getBegin() < getBegin())
+    {
+      return i.getEnd() >= getBegin();
+    }
+    if (i.getEnd() > getEnd())
+    {
+      return i.getBegin() <= getEnd();
+    }
+    return true; // i internal to this
+  }
+
+  /**
+   * Sorts the list by start position ascending (if forwardString==true), or by
+   * end position descending
+   * 
+   * @param intervals
+   * @param forwardStrand
+   */
+  static void sortIntervals(List<? extends IntervalI> intervals,
+          final boolean forwardStrand)
+  {
+    Collections.sort(intervals,
+            forwardStrand ? FORWARD_STRAND : REVERSE_STRAND);
+  }
+
+  IntervalI getContainedBy();
+
+  void setContainedBy(IntervalI containedBy);
+
+}
diff --git a/src/intervalstore/api/IntervalStoreI.java b/src/intervalstore/api/IntervalStoreI.java
new file mode 100644 (file)
index 0000000..f925a15
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2018, Mungo Carstairs
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package intervalstore.api;
+
+import java.util.Collection;
+import java.util.List;
+
+import intervalstore.impl.NCList;
+
+public interface IntervalStoreI<T extends IntervalI> extends Collection<T>
+{
+
+  /**
+   * Returns a (possibly empty) list of items whose extent overlaps the given
+   * range
+   * 
+   * @param from
+   *          start of overlap range (inclusive)
+   * @param to
+   *          end of overlap range (inclusive)
+   * @return
+   */
+  List<T> findOverlaps(long from, long to);
+
+  /**
+   * Ensures that the IntervalStore is ready for findOverlap.
+   * 
+   * @return true iff the data held satisfy the rules of construction of an
+   *         IntervalStore.
+   * 
+   */
+  boolean isValid();
+
+  /**
+   * Answers the level of nesting of intervals in the store, as
+   * <ul>
+   * <li>0 if the store is empty</li>
+   * <li>1 if all intervals are 'top level' (non nested)</li>
+   * <li>else 1 plus the depth of the enclosed NCList</li>
+   * </ul>
+   * 
+   * @return
+   * @see NCList#getDepth()
+   */
+
+  int getDepth();
+
+  /**
+   * Return the number of top-level (not-contained) intervals.
+   * 
+   * @return
+   */
+  int getWidth();
+
+  List<T> findOverlaps(long start, long end, List<T> result);
+
+  String prettyPrint();
+
+  /**
+   * Resort and rebuild links.
+   * 
+   * @return
+   */
+  boolean revalidate();
+
+  IntervalI get(int i);
+
+}
\ No newline at end of file
diff --git a/src/intervalstore/nonc/IntervalComparator.java b/src/intervalstore/nonc/IntervalComparator.java
new file mode 100644 (file)
index 0000000..f5909f4
--- /dev/null
@@ -0,0 +1,17 @@
+package intervalstore.nonc;
+
+import java.util.Comparator;
+
+import intervalstore.api.IntervalI;
+
+public class IntervalComparator implements Comparator<IntervalI>
+{
+
+  @Override
+  public int compare(IntervalI o1, IntervalI o2)
+  {
+    int order = Integer.compare(o1.getBegin(), o2.getBegin());
+    return (order == 0 ? Integer.compare(o2.getEnd(), o1.getEnd()) : order);
+  }
+
+}
diff --git a/src/intervalstore/nonc/IntervalStore.java b/src/intervalstore/nonc/IntervalStore.java
new file mode 100644 (file)
index 0000000..91138d3
--- /dev/null
@@ -0,0 +1,627 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2018, Mungo Carstairs
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package intervalstore.nonc;
+
+import java.util.AbstractCollection;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import intervalstore.api.IntervalI;
+import intervalstore.api.IntervalStoreI;
+
+
+/**
+ * 
+ * A Collection class to store interval-associated data, with options for "lazy"
+ * sorting so as to speed incremental construction of the data prior to issuing
+ * a findOverlap call.
+ * 
+ * 
+ * with O(log N) performance for overlap queries, insertion and deletion (where
+ * N is the size of the store).
+ * 
+ * Accepts duplicate entries but not null values.
+ * 
+ * @author Bob Hanson 2019.08.06
+ *
+ * @param <T>
+ *          any type providing <code>getBegin()</code>, <code>getEnd()</code>
+ *          <code>getContainedBy()</code>, and <code>setContainedBy()</code>
+ */
+public class IntervalStore<T extends IntervalI>
+        extends AbstractCollection<T> implements IntervalStoreI<T>
+{
+
+  private final boolean DO_PRESORT;
+
+  private boolean isSorted;
+
+  private List<T> intervals;
+
+  private boolean isTainted;
+
+  private IntervalI[] orderedIntervalStarts;
+
+  private static Comparator<IntervalI> icompare = new IntervalComparator();
+
+  static
+  {
+    System.out.println("intervalstore.nonc.IntervalStore initialized");
+  }
+
+  // private Comparator<IntervalI> icompare = new IntervalComparator();
+
+  /**
+   * Constructor
+   */
+  public IntervalStore()
+  {
+
+    intervals = new ArrayList<>();
+    DO_PRESORT = true;
+  };
+  
+  public IntervalStore(boolean presort)
+  {
+    intervals = new ArrayList<>();
+    DO_PRESORT = presort;
+  };
+
+  /**
+   * Constructor given a list of intervals. Note that the list may get sorted as
+   * a side-effect of calling this constructor.
+   */
+  public IntervalStore(List<T> intervals)
+  {
+    this(intervals, true);
+  }
+
+  /**
+   * Allows a presort option, which can speed up initial loading of individual
+   * features but will delay the first findOverlap if set to true.
+   * 
+   * @param intervals
+   * @param presort
+   */
+  public IntervalStore(List<T> intervals, boolean presort)
+  {
+    this.intervals = intervals;
+    DO_PRESORT = presort;
+    if (DO_PRESORT)
+    {
+      sort();
+    }
+    isTainted = true;
+  }
+
+  /**
+   * Adds one interval to the store.
+   * 
+   * @param interval
+   */
+  @Override
+  public boolean add(T interval)
+  {
+    if (interval == null)
+    {
+      return false;
+    }
+
+    synchronized (intervals)
+    {
+      if (DO_PRESORT)
+      {
+        int insertPosition = findFirstBegin(intervals, interval.getBegin());
+        intervals.add(insertPosition, interval);
+        isSorted = true;
+      }
+      else
+      {
+        intervals.add(interval);
+        isSorted = false;
+      }
+      isTainted = true;
+      return true;
+    }
+  }
+
+  @Override
+  public void clear()
+  {
+    intervals.clear();
+    orderedIntervalStarts = null;
+    isTainted = true;
+  }
+
+  @Override
+  public boolean contains(Object entry)
+  {
+    return listContains(intervals, entry);
+  }
+
+  private void ensureFinalized()
+  {
+    if (isTainted && intervals.size() >= 2)
+    {
+      if (!isSorted)
+      {
+        sort();
+      }
+      orderedIntervalStarts = intervals.toArray(new IntervalI[0]);
+      linkFeatures(orderedIntervalStarts);
+      isTainted = false;
+    }
+  }
+
+  /**
+   * Sort intervals by start (lowest first) and end (highest first).
+   */
+  private void sort()
+  {
+    Collections.sort(intervals, icompare);
+    isSorted = true;
+  }
+
+  protected int findFirstBegin(List<T> list, long pos)
+  {
+    int start = 0;
+    int end = list.size() - 1;
+    int matched = list.size();
+
+    while (start <= end)
+    {
+      int mid = (start + end) / 2;
+      if (list.get(mid).getBegin() >= pos)
+      {
+        matched = mid;
+        end = mid - 1;
+      }
+      else
+      {
+        start = mid + 1;
+      }
+    }
+    return matched;
+  }
+
+  protected int findFirstEnd(List<T> list, long pos)
+  {
+    int start = 0;
+    int end = list.size() - 1;
+    int matched = list.size();
+
+    while (start <= end)
+    {
+      int mid = (start + end) / 2;
+      if (list.get(mid).getEnd() >= pos)
+      {
+        matched = mid;
+        end = mid - 1;
+      }
+      else
+      {
+        start = mid + 1;
+      }
+    }
+    return matched;
+  }
+
+  /**
+   * Adds non-nested intervals to the result list that lie within the target
+   * range
+   * 
+   * @param from
+   * @param to
+   * @param result
+   */
+  protected void findIntervalOverlaps(long from, long to,
+          List<T> result)
+  {
+
+    int startIndex = findFirstEnd(intervals, from);
+    final int startIndex1 = startIndex;
+    int i = startIndex1;
+    while (i < intervals.size())
+    {
+      T sf = intervals.get(i);
+      if (sf.getBegin() > to)
+      {
+        break;
+      }
+      if (sf.getBegin() <= to && sf.getEnd() >= from)
+      {
+        result.add(sf);
+      }
+      i++;
+    }
+  }
+
+
+  @Override
+  public List<T> findOverlaps(long start, long end)
+  {
+    return findOverlaps(start, end, null);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public List<T> findOverlaps(long start, long end, List<T> result)
+  {
+
+    if (result == null)
+    {
+      result = new ArrayList<>();
+    }
+    int n = intervals.size();
+    switch (n)
+    {
+    case 0:
+      return result;
+    case 1:
+      T sf = intervals.get(0);
+      if (sf.getBegin() <= end && sf.getEnd() >= start)
+      {
+        result.add(sf);
+      }
+      return result;
+    }
+
+    ensureFinalized();
+
+    // (1) Find the closest feature to this position.
+
+    int index = getClosestFeature(orderedIntervalStarts, start);
+    IntervalI sf = (index < 0 ? null : orderedIntervalStarts[index]);
+
+    // (2) Traverse the containedBy field, checking for overlap.
+
+    while (sf != null)
+    {
+      if (sf.getEnd() >= start)
+      {
+        result.add((T) sf);
+      }
+      sf = sf.getContainedBy();
+    }
+
+    // (3) For an interval, find the last feature that starts in this interval,
+    // and add all features up through that feature.
+
+    if (end >= start)
+    {
+      // fill in with all features that start within this interval, fully
+      // inclusive
+      int index2 = getClosestFeature(orderedIntervalStarts, end);
+      while (++index <= index2)
+      {
+        result.add((T) orderedIntervalStarts[index]);
+      }
+
+    }
+    return result;
+  }
+
+  private int getClosestFeature(IntervalI[] l, long pos)
+  {
+    int low = 0;
+    int high = l.length - 1;
+    while (low <= high)
+    {
+      int mid = (low + high) >>> 1;
+      IntervalI f = l[mid];
+      switch (Long.signum(f.getBegin() - pos))
+      {
+      case -1:
+        low = mid + 1;
+        continue;
+      case 1:
+        high = mid - 1;
+        continue;
+      case 0:
+
+        while (++mid <= high && l[mid].getBegin() == pos)
+        {
+          ;
+        }
+        return --mid;
+      }
+    }
+    return (high < 0 ? -1 : high);
+  }
+
+  @SuppressWarnings("unchecked")
+  public T getContainedBy(T sf, T sf0)
+  {
+    int begin = sf0.getBegin();
+    while (sf != null)
+    {
+      if (begin <= sf.getEnd())
+      {
+        // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
+        // + "\nFS in " + sf.getIndex1() + ":" + sf);
+        return sf;
+      }
+      sf = (T) sf.getContainedBy();
+    }
+    return null;
+  }
+
+  @Override
+  public int getDepth()
+  {
+    int n = intervals.size();
+    if (n < 2)
+    {
+      return n;
+    }
+    ensureFinalized();
+    int maxDepth = 1;
+    T root = null;
+    for (int i = 0; i < n; i++)
+    {
+      T element = intervals.get(i);
+      IntervalI container = element;
+      if (element.getContainedBy() == null)
+      {
+        root = element;
+      }
+      int depth = 1;
+      while ((container = container.getContainedBy()) != null)
+      {
+        if (++depth > maxDepth && container == root)
+        {
+          maxDepth = depth;
+          break;
+        }
+      }
+    }
+    return maxDepth;
+  }
+
+  @Override
+  public boolean isValid()
+  {
+    ensureFinalized();
+    return true;
+  }
+
+  /**
+   * Answers an iterator over the intervals in the store, with no particular
+   * ordering guaranteed. The iterator does not support the optional
+   * <code>remove</code> operation (throws
+   * <code>UnsupportedOperationException</code> if attempted).
+   */
+  @Override
+  public Iterator<T> iterator()
+  {
+    return intervals.iterator();
+
+  }
+
+  @SuppressWarnings("unchecked")
+  private void linkFeatures(IntervalI[] features)
+  {
+    int n = features.length;
+    if (n < 2)
+    {
+      return;
+    }
+    int maxEnd = features[0].getEnd();
+    for (int i = 1; i < n; i++)
+    {
+      T sf = (T) features[i];
+      if (sf.getBegin() <= maxEnd)
+      {
+        sf.setContainedBy(getContainedBy((T) features[i - 1], sf));
+      }
+      if (sf.getEnd() > maxEnd)
+      {
+        maxEnd = sf.getEnd();
+      }
+    }
+
+  }
+
+  /**
+   * Answers true if the list contains the interval, else false. This method is
+   * optimised for the condition that the list is sorted on interval start
+   * position ascending, and will give unreliable results if this does not hold.
+   * 
+   * @param intervals
+   * @param entry
+   * @return
+   */
+  protected boolean listContains(List<T> intervals, Object entry)
+  {
+    if (intervals == null || entry == null)
+    {
+      return false;
+    }
+
+    if (!isSorted)
+    {
+      return intervals.contains(entry);
+    }
+
+    @SuppressWarnings("unchecked")
+    T interval = (T) entry;
+
+    /*
+     * locate the first entry in the list which does not precede the interval
+     */
+    int pos = findFirstBegin(intervals, interval.getBegin());
+    int len = intervals.size();
+    while (pos < len)
+    {
+      T sf = intervals.get(pos);
+      if (sf.getBegin() > interval.getBegin())
+      {
+        return false; // no match found
+      }
+      if (sf.getEnd() == interval.getEnd() && sf.equals(interval))
+      {
+        return true;
+      }
+      pos++;
+    }
+    return false;
+  }
+
+  @Override
+  public synchronized boolean remove(Object o)
+  {
+    // if (o == null)
+    // {
+    // throw new NullPointerException();
+    // }
+    return (o != null && intervals.size() > 0
+            && removeInterval((IntervalI) o));
+  }
+
+  /**
+   * Uses a binary search to find the entry and removes it if found.
+   * 
+   * @param entry
+   * @return
+   */
+  protected boolean removeInterval(IntervalI entry)
+  {
+    if (!isSorted)
+    {
+      return intervals.remove(entry);
+    }
+
+    /*
+     * find the first interval that might match, i.e. whose 
+     * start position is not less than the target range start
+     * (NB inequality test ensures the first match if any is found)
+     */
+    int startIndex = findFirstBegin(intervals, entry.getBegin());
+
+    /*
+     * traverse intervals to look for a match
+     */
+    int from = entry.getBegin();
+    int to = entry.getEnd();
+    for (int i = startIndex, size = intervals.size(); i < size; i++)
+    {
+      T sf = intervals.get(i);
+      if (sf.getBegin() > from)
+      {
+        return false;
+      }
+      if (sf.getEnd() == to && sf.equals(entry))
+      {
+        intervals.remove(i).setContainedBy(null);
+        return (isTainted = true);
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public int size()
+  {
+    return intervals.size();
+  }
+
+  @Override
+  public String toString()
+  {
+    return prettyPrint();
+  }
+
+  @Override
+  public int getWidth()
+  {
+    ensureFinalized();
+    int w = 0;
+    for (int i = intervals.size(); --i >= 0;)
+    {
+      if (intervals.get(i).getContainedBy() == null)
+      {
+        w++;
+      }
+    }
+    return w;
+  }
+
+  @Override
+  public String prettyPrint()
+  {
+    int n = intervals.size();
+    if (n == 0)
+    {
+      return "";
+    }
+    if (n == 1)
+    {
+      return intervals.get(0) + "\n";
+    }
+    ensureFinalized();
+    String sep = "\t";
+    StringBuffer sb = new StringBuffer();
+    for (int i = 0; i < n; i++)
+    {
+      IntervalI range = orderedIntervalStarts[i];
+      IntervalI container = range.getContainedBy();
+      while (container != null)
+      {
+        sb.append(sep);
+        container = container.getContainedBy();
+      }
+      sb.append(range.toString()).append('\n');
+    }
+    return sb.toString();
+  }
+
+  @Override
+  public boolean revalidate()
+  {
+    isTainted = true;
+    isSorted = true;
+    ensureFinalized();
+    return true;
+  }
+
+  @Override
+  public IntervalI get(int i)
+  {
+    ensureFinalized();
+    return (i < 0 || i >= orderedIntervalStarts.length ? null
+            : orderedIntervalStarts[i]);
+  }
+
+}
index 4219c8e..bad33d1 100755 (executable)
@@ -392,6 +392,10 @@ public class Sequence extends ASequence implements SequenceI
     return sequenceFeatureStore.add(sf);
   }
 
+  /**
+   * @param sf
+   *          A known feature of this featureStore
+   */
   @Override
   public void deleteFeature(SequenceFeature sf)
   {
index bf2408c..a71c7b1 100755 (executable)
@@ -35,6 +35,8 @@ import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.Vector;
 
+import intervalstore.api.IntervalI;
+
 /**
  * A class that models a single contiguous feature on a sequence. If flag
  * 'contactFeature' is true, the start and end positions are interpreted instead
@@ -99,9 +101,14 @@ public class SequenceFeature implements FeatureLocationI
    */
   private String source;
 
-  // for Overview sort:
-  public int index;
+  /**
+   * 1-based index into the featureList used by FeatureStoreJS
+   */
+  public int index1;
 
+  /**
+   * containment nesting link used by FeatureStoreJS to track starting points
+   */
   public SequenceFeature containedBy;
 
   /**
@@ -741,6 +748,20 @@ public class SequenceFeature implements FeatureLocationI
   {
     source = theSource;
   }
+
+  @Override
+  public IntervalI getContainedBy()
+  {
+    return containedBy;
+  }
+
+  @Override
+  public void setContainedBy(IntervalI containedBy)
+  {
+    this.containedBy = (SequenceFeature) containedBy;
+
+  }
+
 }
 
 class SFSortByEnd implements Comparator<SequenceFeature>
index ccd8b66..8db35a3 100644 (file)
@@ -57,38 +57,52 @@ public abstract class FeatureStore implements FeatureStoreI
    * optimised for the condition that the list is sorted on feature start
    * position ascending, and will give unreliable results if this does not hold.
    * 
-   * @param features
+   * @param list
    * @param feature
    * @return
    */
   @Override
-  public boolean listContains(List<SequenceFeature> features,
+  public boolean listContains(List<SequenceFeature> list,
           SequenceFeature feature)
   {
-    if (features == null || feature == null)
+    if (list == null || feature == null)
     {
       return false;
     }
 
+    return (getEquivalentFeatureIndex(list, feature) >= 0);
+  }
+
+  /**
+   * Binary search for the index (&gt;= 0) of a feature in a list.
+   * 
+   * @param list
+   * @param feature
+   * @return index if found; -1 if not
+   */
+  protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
+          SequenceFeature feature)
+  {
+
     /*
      * locate the first entry in the list which does not precede the feature
      */
-    int pos = findFirstBegin(features, feature.begin);
-    int len = features.size();
+    int pos = findFirstBegin(list, feature.begin);
+    int len = list.size();
     while (pos < len)
     {
-      SequenceFeature sf = features.get(pos);
-      if (sf.getBegin() > feature.getBegin())
+      SequenceFeature sf = list.get(pos);
+      if (sf.begin > feature.begin)
       {
-        return false; // no match found
+        return -1; // no match found
       }
       if (sf.equals(feature))
       {
-        return true;
+        return pos;
       }
       pos++;
     }
-    return false;
+    return -1;
   }
 
   /**
@@ -262,7 +276,7 @@ public abstract class FeatureStore implements FeatureStoreI
     }
     else
     {
-      addNestedFeature(feature);
+      addPositionalFeature(feature);
     }
 
     /*
@@ -316,7 +330,7 @@ public abstract class FeatureStore implements FeatureStoreI
    * Adds one feature to the IntervalStore that can manage nested features
    * (creating the IntervalStore if necessary)
    */
-  abstract protected void addNestedFeature(SequenceFeature feature);
+  abstract protected void addPositionalFeature(SequenceFeature feature);
 
   /**
    * Adds the feature to the list of non-positional features (with lazy
@@ -396,15 +410,12 @@ public abstract class FeatureStore implements FeatureStoreI
       }
     }
 
-    boolean removedNonPositional = false;
-
     /*
      * if not found, try non-positional features
      */
     if (!removed && nonPositionalFeatures != null)
     {
-      removedNonPositional = nonPositionalFeatures.remove(sf);
-      removed = removedNonPositional;
+      removed = nonPositionalFeatures.remove(sf);
     }
 
     /*
@@ -412,7 +423,7 @@ public abstract class FeatureStore implements FeatureStoreI
      */
     if (!removed && features != null)
     {
-      removed = features.remove(sf);
+      removed = findAndRemoveNonContactFeature(sf);
     }
 
     if (removed)
@@ -423,6 +434,8 @@ public abstract class FeatureStore implements FeatureStoreI
     return removed;
   }
 
+  abstract protected boolean findAndRemoveNonContactFeature(SequenceFeature sf);
+
   abstract protected void findContactFeatures(long from, long to,
           List<SequenceFeature> result);
 
@@ -690,8 +703,10 @@ public abstract class FeatureStore implements FeatureStoreI
      */
     if (nonPositionalFeatures != null)
     {
-      for (SequenceFeature sf : nonPositionalFeatures)
+      List<SequenceFeature> list = nonPositionalFeatures;
+      for (int i = 0, n = list.size(); i < n; i++)
       {
+        SequenceFeature sf = list.get(i);
         nonPositionalFeatureGroups.add(sf.getFeatureGroup());
         float score = sf.getScore();
         nonPositionalMinScore = min(nonPositionalMinScore, score);
@@ -742,8 +757,10 @@ public abstract class FeatureStore implements FeatureStoreI
      * (Although a simple shift of all values would preserve data integrity!)
      */
     boolean modified = false;
-    for (SequenceFeature sf : getPositionalFeatures())
+    List<SequenceFeature> list = getPositionalFeatures();
+    for (int i = 0, n = list.size(); i < n; i++)
     {
+      SequenceFeature sf = list.get(i);
       if (sf.getBegin() >= fromPosition)
       {
         modified = true;
index 02394bb..47c1dd4 100644 (file)
@@ -27,7 +27,6 @@ import java.util.List;
 
 import intervalstore.api.IntervalStoreI;
 import intervalstore.impl.BinarySearcher;
-import intervalstore.impl.IntervalStore;
 
 /**
  * A data store for a set of sequence features that supports efficient lookup of
@@ -40,9 +39,18 @@ import intervalstore.impl.IntervalStore;
 public class FeatureStoreImpl extends FeatureStore
 {
 
+  /**
+   * Default constructor uses NCList
+   */
   public FeatureStoreImpl()
   {
-    features = new IntervalStore<>();
+    this(true);
+  }
+
+  public FeatureStoreImpl(boolean useNCList)
+  {
+    features = (useNCList ? new intervalstore.impl.IntervalStore<>()
+            : new intervalstore.nonc.IntervalStore<>(false));
   }
 
   /**
@@ -87,7 +95,7 @@ public class FeatureStoreImpl extends FeatureStore
    * (creating the IntervalStore if necessary)
    */
   @Override
-  protected synchronized void addNestedFeature(SequenceFeature feature)
+  protected synchronized void addPositionalFeature(SequenceFeature feature)
   {
     features.add(feature);
   }
@@ -286,4 +294,10 @@ public class FeatureStoreImpl extends FeatureStore
     return BinarySearcher.findFirst(list, f -> f.getEnd() >= pos);
   }
 
+  @Override
+  protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
+  {
+    return features.remove(sf);
+  }
+
 }
index bd50420..b119aef 100644 (file)
@@ -43,6 +43,17 @@ public class FeatureStoreJS extends FeatureStore
   private ArrayList<SequenceFeature> featureList;
 
   /**
+   * contact features ordered by first contact position
+   */
+  private SequenceFeature[] orderedFeatureStarts;
+
+  /**
+   * indicates that we need to rebuild orderedFeatureStarts and reset index
+   * fields
+   */
+  private boolean isTainted = true;
+
+  /**
    * Constructor
    */
   public FeatureStoreJS()
@@ -74,55 +85,6 @@ public class FeatureStoreJS extends FeatureStore
     return true;
   }
 
-  /**
-   * Adds one feature to the IntervalStore that can manage nested features
-   * (creating the IntervalStore if necessary)
-   */
-  @Override
-  protected synchronized void addNestedFeature(SequenceFeature feature)
-  {
-    featureList.add(findFirstBegin(featureList, feature.begin), feature);
-  }
-
-  /**
-   * Returns a (possibly empty) list of features whose extent overlaps the given
-   * range. The returned list is not ordered. Contact features are included if
-   * either of the contact points lies within the range.
-   * 
-   * @param start
-   *          start position of overlap range (inclusive)
-   * @param end
-   *          end position of overlap range (inclusive)
-   * @return
-   */
-
-  @Override
-  public List<SequenceFeature> findOverlappingFeatures(long start, long end,
-          List<SequenceFeature> result)
-  {
-    if (result == null)
-    {
-      result = new ArrayList<>();
-    }
-    if (contactFeatureStarts != null)
-    {
-      if (start == end)
-      {
-        findContactPoints(contactFeatureStarts, start, result, true);
-        findContactPoints(contactFeatureEnds, start, result, false);
-      }
-      else
-      {
-        findContactFeatures(start, end, result);
-      }
-    }
-    if (featureList.size() > 0)
-    {
-      findOverlaps(start, end, result);
-    }
-    return result;
-  }
-
   // The following methods use a linked list of containment in SequenceFeature
   // rather than IntervalStore.
   //
@@ -146,243 +108,135 @@ public class FeatureStoreJS extends FeatureStore
 
   // Initialization
 
-  /*
-   * contact features ordered by first contact position
+  /**
+   * Adds one feature to the IntervalStore that can manage nested features
+   * (creating the IntervalStore if necessary)
    */
-  private SequenceFeature[] orderedFeatureStarts;
-
-  private void rebuildArrays(int n)
+  @Override
+  protected synchronized void addPositionalFeature(SequenceFeature feature)
   {
-    // Arrays.sort(orderedFeatureStarts, startComp);
-    orderedFeatureStarts = featureList
-            .toArray(new SequenceFeature[featureList.size()]);
-    linkFeatures(orderedFeatureStarts);
+    featureList.add(findFirstBegin(featureList, feature.begin), feature);
+    isTainted = true;
   }
 
-  // /**
-  // * just a standard Comparator
-  // */
-  // private static StartComparator startComp;
-  //
-  // class StartComparator implements Comparator<SequenceFeature>
-  // {
-  //
-  // @Override
-  // public int compare(SequenceFeature o1, SequenceFeature o2)
-  // {
-  // int p1 = o1.begin;
-  // int p2 = o2.begin;
-  // return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0);
-  // }
-  //
-  // }
-
-  /**
-   * Run through the sorted sequence array once, building the containedBy linked
-   * list references. Does a check first to make sure there is actually
-   * something out there that is overlapping. A null for sf.containedBy means
-   * there are no overlaps for this feature.
-   * 
-   * @param intervals
-   */
-  private void linkFeatures(SequenceFeature[] intervals)
+  @Override
+  protected boolean containsFeature(SequenceFeature feature)
   {
-    if (intervals.length < 2)
-    {
-      return;
-    }
-    int maxEnd = intervals[0].end;
-    for (int i = 1, n = intervals.length; i < n; i++)
-    {
-      SequenceFeature sf = intervals[i];
-      if (sf.begin <= maxEnd)
-      {
-        sf.containedBy = getContainedBy(intervals[i - 1], sf);
-      }
-      if (sf.end > maxEnd)
-      {
-        maxEnd = sf.end;
-      }
-    }
+
+    return getEquivalentFeatureIndex(featureList, feature) >= 0;
   }
 
-  /**
-   * Since we are traversing the sorted feature array in a forward direction,
-   * all elements prior to the one we are working on have been fully linked. All
-   * we are doing is following those links until we find the first array feature
-   * with a containedBy element that has an end &gt;= our begin point. It is
-   * generally a very short list -- maybe one or two depths. But it might be
-   * more than that.
-   * 
-   * @param sf
-   * @param sf0
-   * @return
-   */
-  private SequenceFeature getContainedBy(SequenceFeature sf,
-          SequenceFeature sf0)
+  @Override
+  protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
   {
-    int begin = sf0.begin;
-    while (sf != null)
+    int pos = getEquivalentFeatureIndex(featureList, sf);
+    if (pos < 0)
     {
-      if (begin <= sf.end)
-      {
-        // System.out.println("\nFS found " + sf0.index + ":" + sf0
-        // + "\nFS in " + sf.index + ":" + sf);
-        return sf;
-      }
-      sf = sf.containedBy;
+      return false;
     }
-    return null;
+    featureList.remove(pos);
+    return (isTainted = true);
   }
 
-  // search-stage methods
-
   /**
-   * Binary search for contact start or end at a given (Overview) position.
+   * Adds contact features to the result list where either the second or the
+   * first contact position lies within the target range
    * 
-   * @param l
-   * @param pos
+   * @param from
+   * @param to
    * @param result
-   * @param isStart
-   * 
-   * @author Bob Hanson 2019.07.30
    */
-  private static void findContactPoints(List<SequenceFeature> l, long pos,
-          List<SequenceFeature> result, boolean isStart)
+  @Override
+  protected void findContactFeatures(long from, long to,
+          List<SequenceFeature> result)
   {
-    int low = 0;
-    int high = l.size() - 1;
-    while (low <= high)
-    {
-      int mid = (low + high) >>> 1;
-      SequenceFeature f = l.get(mid);
-      switch (Long.signum((isStart ? f.begin : f.end) - pos))
-      {
-      case -1:
-        low = mid + 1;
-        continue;
-      case 1:
-        high = mid - 1;
-        continue;
-      case 0:
-        int m = mid;
-        result.add(f);
-        // could be "5" in 12345556788 ?
-        while (++mid <= high && (f = l.get(mid)) != null
-                && (isStart ? f.begin : f.end) == pos)
-        {
-          result.add(f);
-        }
-        while (--m >= low && (f = l.get(m)) != null
-                && (isStart ? f.begin : f.end) == pos)
-        {
-          result.add(f);
-        }
-        return;
-      }
-    }
+    getContactStartOverlaps(from, to, result);
+    getContactEndOverlaps(from, to, result);
   }
 
-  /**
-   * Find all overlaps; special case when there is only one feature. The
-   * required array of start-sorted SequenceFeature is created lazily.
-   * 
-   * @param start
-   * @param end
-   * @param result
-   */
-  private void findOverlaps(long start, long end,
-          List<SequenceFeature> result)
+  @Override
+  protected int findFirstBegin(List<SequenceFeature> list, long pos)
   {
-    int n = featureList.size();
-    switch (n)
+    int start = 0;
+    int end = list.size() - 1;
+    int matched = list.size();
+
+    while (start <= end)
     {
-    case 0:
-      return;
-    case 1:
-      checkOne(featureList.get(0), start, end,
-              result);
-      return;
-    default:
-      if (orderedFeatureStarts == null)
+      int mid = (start + end) / 2;
+      if (list.get(mid).begin >= pos)
       {
-        rebuildArrays(n);
+        matched = mid;
+        end = mid - 1;
       }
-      break;
-    }
-
-    // (1) Find the closest feature to this position.
-
-    int index = findClosestFeature(orderedFeatureStarts, start);
-    SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
-
-    // (2) Traverse the containedBy field, checking for overlap.
-
-    while (sf != null)
-    {
-      if (sf.end >= start)
+      else
       {
-        result.add(sf);
+        start = mid + 1;
       }
-      sf = sf.containedBy;
     }
+    return matched;
+  }
 
-    // (3) For an interval, find the last feature that starts in this interval,
-    // and add all features up through that feature.
+  @Override
+  protected int findFirstEnd(List<SequenceFeature> list, long pos)
+  {
+    int start = 0;
+    int end = list.size() - 1;
+    int matched = list.size();
 
-    if (end > start)
+    while (start <= end)
     {
-      // fill in with all features that start within this interval, fully
-      // inclusive
-      int index2 = findClosestFeature(orderedFeatureStarts, end);
-      while (++index <= index2)
+      int mid = (start + end) / 2;
+      if (list.get(mid).end >= pos)
       {
-        result.add(orderedFeatureStarts[index]);
+        matched = mid;
+        end = mid - 1;
+      }
+      else
+      {
+        start = mid + 1;
       }
-
     }
+    return matched;
   }
 
   /**
-   * Quick check when we only have one feature.
+   * Returns a (possibly empty) list of features whose extent overlaps the given
+   * range. The returned list is not ordered. Contact features are included if
+   * either of the contact points lies within the range.
    * 
-   * @param sf
    * @param start
+   *          start position of overlap range (inclusive)
    * @param end
-   * @param result
+   *          end position of overlap range (inclusive)
+   * @return
    */
-  private void checkOne(SequenceFeature sf, long start, long end,
+
+  @Override
+  public List<SequenceFeature> findOverlappingFeatures(long start, long end,
           List<SequenceFeature> result)
   {
-    if (sf.begin <= end && sf.end >= start)
+    if (result == null)
     {
-      result.add(sf);
+      result = new ArrayList<>();
     }
-    return;
-  }
-
-  @Override
-  protected boolean containsFeature(SequenceFeature feature)
-  {
-
-    int pos = findFirstBegin(featureList,
-            feature.begin);
-    int len = featureList.size();
-    while (pos < len)
+    if (contactFeatureStarts != null)
     {
-      SequenceFeature sf = featureList.get(pos);
-      if (sf.begin > feature.begin)
+      if (start == end)
       {
-        return false; // no match found
+        getContactPoints(contactFeatureStarts, start, result, true);
+        getContactPoints(contactFeatureEnds, start, result, false);
       }
-      if (sf.equals(feature))
+      else
       {
-        return true;
+        findContactFeatures(start, end, result);
       }
-      pos++;
     }
-    return false;
-
+    if (featureList.size() > 0)
+    {
+      getOverlaps(start, end, result);
+    }
+    return result;
   }
 
   /**
@@ -396,7 +250,7 @@ public class FeatureStoreJS extends FeatureStore
    * @param pos
    * @return
    */
-  private int findClosestFeature(SequenceFeature[] l, long pos)
+  private int getClosestFeature(SequenceFeature[] l, long pos)
   {
     int low = 0;
     int high = l.length - 1;
@@ -425,22 +279,6 @@ public class FeatureStoreJS extends FeatureStore
   }
 
   /**
-   * Adds contact features to the result list where either the second or the
-   * first contact position lies within the target range
-   * 
-   * @param from
-   * @param to
-   * @param result
-   */
-  @Override
-  protected void findContactFeatures(long from, long to,
-          List<SequenceFeature> result)
-  {
-    findContactStartOverlaps(from, to, result);
-    findContactEndOverlaps(from, to, result);
-  }
-
-  /**
    * Adds to the result list any contact features whose end (second contact
    * point), but not start (first contact point), lies in the query from-to
    * range
@@ -450,7 +288,7 @@ public class FeatureStoreJS extends FeatureStore
    * @param result
    */
 
-  private void findContactEndOverlaps(long from, long to,
+  private void getContactEndOverlaps(long from, long to,
           List<SequenceFeature> result)
   {
     // find the first contact feature (if any)
@@ -480,6 +318,52 @@ public class FeatureStoreJS extends FeatureStore
   }
 
   /**
+   * Binary search for contact start or end at a given (Overview) position.
+   * 
+   * @param l
+   * @param pos
+   * @param result
+   * @param isStart
+   * 
+   * @author Bob Hanson 2019.07.30
+   */
+  private void getContactPoints(List<SequenceFeature> l, long pos,
+          List<SequenceFeature> result, boolean isStart)
+  {
+    int low = 0;
+    int high = l.size() - 1;
+    while (low <= high)
+    {
+      int mid = (low + high) >>> 1;
+      SequenceFeature f = l.get(mid);
+      switch (Long.signum((isStart ? f.begin : f.end) - pos))
+      {
+      case -1:
+        low = mid + 1;
+        continue;
+      case 1:
+        high = mid - 1;
+        continue;
+      case 0:
+        int m = mid;
+        result.add(f);
+        // could be "5" in 12345556788 ?
+        while (++mid <= high && (f = l.get(mid)) != null
+                && (isStart ? f.begin : f.end) == pos)
+        {
+          result.add(f);
+        }
+        while (--m >= low && (f = l.get(m)) != null
+                && (isStart ? f.begin : f.end) == pos)
+        {
+          result.add(f);
+        }
+        return;
+      }
+    }
+  }
+
+  /**
    * Adds contact features whose start position lies in the from-to range to the
    * result list
    * 
@@ -488,7 +372,7 @@ public class FeatureStoreJS extends FeatureStore
    * @param result
    */
 
-  private void findContactStartOverlaps(long from, long to,
+  private void getContactStartOverlaps(long from, long to,
           List<SequenceFeature> result)
   {
     for (int i = findFirstBegin(contactFeatureStarts,
@@ -503,51 +387,165 @@ public class FeatureStoreJS extends FeatureStore
     }
   }
 
+  /**
+   * Since we are traversing the sorted feature array in a forward direction,
+   * all elements prior to the one we are working on have been fully linked. All
+   * we are doing is following those links until we find the first array feature
+   * with a containedBy element that has an end &gt;= our begin point. It is
+   * generally a very short list -- maybe one or two depths. But it might be
+   * more than that.
+   * 
+   * @param sf
+   * @param sf0
+   * @return
+   */
+  private SequenceFeature getContainedBy(SequenceFeature sf,
+          SequenceFeature sf0)
+  {
+    int begin = sf0.begin;
+    while (sf != null)
+    {
+      if (begin <= sf.end)
+      {
+        System.out.println("\nFS found " + sf0 + "\nFS in " + sf);
+        return sf;
+      }
+      sf = sf.containedBy;
+    }
+    return null;
+  }
+
+  /**
+   * Fast find of known features already on the list; slower removal of
+   * equivalent feature, not necessarily identical.
+   * 
+   * @param feature
+   * @return 0-based index for this feature in featureList
+   */
   @Override
-  protected int findFirstBegin(List<SequenceFeature> list, long pos)
+  protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
+          SequenceFeature feature)
   {
-    int start = 0;
-    int end = list.size() - 1;
-    int matched = list.size();
+    int pos = feature.index1 - 1;
+    if (!isTainted && pos >= 0)
+    {
+      return pos;
+    }
+    return super.getEquivalentFeatureIndex(list, feature);
+  }
 
-    while (start <= end)
+  /**
+   * Find all overlaps; special case when there is only one feature. The
+   * required array of start-sorted SequenceFeature is created lazily.
+   * 
+   * @param start
+   * @param end
+   * @param result
+   */
+  private void getOverlaps(long start, long end,
+          List<SequenceFeature> result)
+  {
+    int n = featureList.size();
+    switch (n)
     {
-      int mid = (start + end) / 2;
-      if (list.get(mid).begin >= pos)
+    case 0:
+      return;
+    case 1:
+      justCheckOne(featureList.get(0), start, end, result);
+      return;
+    default:
+      if (isTainted)
       {
-        matched = mid;
-        end = mid - 1;
+        orderedFeatureStarts = featureList
+                .toArray(new SequenceFeature[featureList.size()]);
+        linkFeatures(orderedFeatureStarts);
+        isTainted = false;
       }
-      else
+      break;
+    }
+
+    // (1) Find the closest feature to this position.
+
+    int index = getClosestFeature(orderedFeatureStarts, start);
+    SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
+
+    // (2) Traverse the containedBy field, checking for overlap.
+
+    while (sf != null)
+    {
+      if (sf.end >= start)
       {
-        start = mid + 1;
+        result.add(sf);
       }
+      sf = sf.containedBy;
+    }
+
+    // (3) For an interval, find the last feature that starts in this interval,
+    // and add all features up through that feature.
+
+    if (end > start)
+    {
+      // fill in with all features that start within this interval, fully
+      // inclusive
+      int index2 = getClosestFeature(orderedFeatureStarts, end);
+      while (++index <= index2)
+      {
+        result.add(orderedFeatureStarts[index]);
+      }
+
     }
-    return matched;
   }
 
-  @Override
-  protected int findFirstEnd(List<SequenceFeature> list, long pos)
+  /**
+   * Quick check when we only have one feature.
+   * 
+   * @param sf
+   * @param start
+   * @param end
+   * @param result
+   */
+  private void justCheckOne(SequenceFeature sf, long start, long end,
+          List<SequenceFeature> result)
   {
-    int start = 0;
-    int end = list.size() - 1;
-    int matched = list.size();
+    if (sf.begin <= end && sf.end >= start)
+    {
+      result.add(sf);
+    }
+    return;
+  }
 
-    while (start <= end)
+  /**
+   * Run through the sorted sequence array once, building the containedBy linked
+   * list references. Does a check first to make sure there is actually
+   * something out there that is overlapping. A null for sf.containedBy means
+   * there are no overlaps for this feature.
+   * 
+   * @param features
+   */
+  private void linkFeatures(SequenceFeature[] features)
+  {
+    int n = features.length;
+    switch (n)
     {
-      int mid = (start + end) / 2;
-      if (list.get(mid).end >= pos)
+    case 0:
+      return;
+    case 1:
+      features[0].index1 = 1;
+      return;
+    }
+    int maxEnd = features[0].end;
+    for (int i = 1; i < n;)
+    {
+      SequenceFeature sf = features[i];
+      if (sf.begin <= maxEnd)
       {
-        matched = mid;
-        end = mid - 1;
+        sf.containedBy = getContainedBy(features[i - 1], sf);
       }
-      else
+      if (sf.end > maxEnd)
       {
-        start = mid + 1;
+        maxEnd = sf.end;
       }
+      sf.index1 = ++i;
     }
-    return matched;
   }
-
-
 }
index 0baeb19..5c6b4a7 100644 (file)
@@ -53,7 +53,28 @@ public class SequenceFeatures implements SequenceFeaturesI
    */
   private Map<String, FeatureStoreI> featureStore;
 
-  private static boolean useIntervalStore = !Platform.isJS();
+  /**
+   * original NCList-based IntervalStore
+   */
+  private final static int INTERVAL_STORE_NCLIST = 0;
+
+  /**
+   * linked-list deferred-sort IntervalStore
+   */
+  private final static int INTERVAL_STORE_NOCLKIST = 1;
+
+  /**
+   * no-IntervalStore option for JavaScript
+   */
+  private final static int INTERVAL_STORE_NONE_JS = -1;
+
+  private final int INTERVAL_STORE_MODE = (
+  // can be set differently for testing, but default is
+  // NONE_JS for JalviewJS and NCLIST for Java
+  Platform.isJS() ? //
+          INTERVAL_STORE_NONE_JS //
+          : INTERVAL_STORE_NCLIST//
+  );
 
   /**
    * Constructor
@@ -106,8 +127,16 @@ public class SequenceFeatures implements SequenceFeaturesI
 
   private FeatureStoreI newFeatureStore()
   {
-    // TODO Auto-generated method stub
-    return (useIntervalStore ? new FeatureStoreImpl() : new FeatureStoreJS());
+    switch (INTERVAL_STORE_MODE)
+    {
+    default:
+    case INTERVAL_STORE_NCLIST:
+      return new FeatureStoreImpl(true);
+    case INTERVAL_STORE_NOCLKIST:
+      return new FeatureStoreImpl(false);
+    case INTERVAL_STORE_NONE_JS:
+      return new FeatureStoreJS();
+    }
   }
 
   /**
@@ -120,10 +149,10 @@ public class SequenceFeatures implements SequenceFeaturesI
     List<SequenceFeature> result = new ArrayList<>();
     for (FeatureStoreI featureSet : varargToTypes(type))
     {
-//      System.err.println("SF findFeature " + System.currentTimeMillis()
-//              + " " + from + " " + to + " "
-//              + featureSet.getPositionalFeatures().get(0).type);
-//
+      // System.err.println("SF findFeature " + System.currentTimeMillis()
+      // + " " + from + " " + to + " "
+      // + featureSet.getPositionalFeatures().get(0).type);
+      //
       result.addAll(featureSet.findOverlappingFeatures(from, to, null));
     }
     return result;
@@ -164,8 +193,8 @@ public class SequenceFeatures implements SequenceFeaturesI
       return new ArrayList<>();
     }
 
-    return getAllFeatures(featureTypes.toArray(new String[featureTypes
-            .size()]));
+    return getAllFeatures(
+            featureTypes.toArray(new String[featureTypes.size()]));
   }
 
   /**
@@ -230,7 +259,6 @@ public class SequenceFeatures implements SequenceFeaturesI
       return featureStore.values();
     }
 
-
     List<FeatureStoreI> types = new ArrayList<>();
     List<String> args = Arrays.asList(type);
     for (Entry<String, FeatureStoreI> featureType : featureStore.entrySet())
@@ -333,8 +361,8 @@ public class SequenceFeatures implements SequenceFeaturesI
 
     for (Entry<String, FeatureStoreI> featureType : featureStore.entrySet())
     {
-      Set<String> featureGroups = featureType.getValue().getFeatureGroups(
-              positionalFeatures);
+      Set<String> featureGroups = featureType.getValue()
+              .getFeatureGroups(positionalFeatures);
       for (String group : groups)
       {
         if (featureGroups.contains(group))
@@ -402,8 +430,9 @@ public class SequenceFeatures implements SequenceFeaturesI
   @Override
   public float getMinimumScore(String type, boolean positional)
   {
-    return featureStore.containsKey(type) ? featureStore.get(type)
-            .getMinimumScore(positional) : Float.NaN;
+    return featureStore.containsKey(type)
+            ? featureStore.get(type).getMinimumScore(positional)
+            : Float.NaN;
   }
 
   /**
@@ -412,8 +441,9 @@ public class SequenceFeatures implements SequenceFeaturesI
   @Override
   public float getMaximumScore(String type, boolean positional)
   {
-    return featureStore.containsKey(type) ? featureStore.get(type)
-            .getMaximumScore(positional) : Float.NaN;
+    return featureStore.containsKey(type)
+            ? featureStore.get(type).getMaximumScore(positional)
+            : Float.NaN;
   }
 
   /**
@@ -527,8 +557,6 @@ public class SequenceFeatures implements SequenceFeaturesI
   // Platform: timer mark 52.355 0.185 overviewrender 16000 pixels row:14
   // Platform: timer mark 53.829 0.186 overviewrender 16000 pixels row:14
 
-
-
   /**
    * @author Bob Hanson 2019.08.01
    */
index 6cbace4..c3afe1f 100755 (executable)
@@ -347,7 +347,10 @@ public class FileLoader implements Runnable
           // We read the data anyway - it might make sense.
         }
         // BH 2018 switch to File object here instead of filename
+        Platform.timeCheck(null, Platform.TIME_MARK);
         alignFrame = new Jalview2XML(raiseGUI).loadJalviewAlign(selectedFile == null ? file : selectedFile);
+        Platform.timeCheck("JVP loaded", Platform.TIME_MARK);
+
       }
       else
       {
index 74f73d4..dab840e 100644 (file)
@@ -232,7 +232,7 @@ public class Sequencefeature extends Rangetype
     if (feature.otherDetails != null)
     {
       Iterator<String> iter = feature.otherDetails.keySet().iterator();
-      Vector props = dsa.getPropertyAsReference();
+      Vector<?> props = dsa.getPropertyAsReference();
       while (iter.hasNext())
       {
         String key = iter.next();
@@ -290,10 +290,11 @@ public class Sequencefeature extends Rangetype
     String featureType = dseta.getType();
     if (dseta.getScoreCount() > 0)
     {
-      Enumeration scr = dseta.enumerateScore();
+      @SuppressWarnings("unchecked")
+      Enumeration<Score> scr = dseta.enumerateScore();
       while (scr.hasMoreElements())
       {
-        Score score = (Score) scr.nextElement();
+        Score score = scr.nextElement();
         if (score.getName().equals(featureType))
         {
           theScore = score.getContent();
@@ -325,10 +326,11 @@ public class Sequencefeature extends Rangetype
     }
     if (dseta.getScoreCount() > 0)
     {
-      Enumeration scr = dseta.enumerateScore();
+      @SuppressWarnings("unchecked")
+      Enumeration<Score> scr = dseta.enumerateScore();
       while (scr.hasMoreElements())
       {
-        Score score = (Score) scr.nextElement();
+        Score score = scr.nextElement();
         if (!score.getName().equals(sf.getType()))
         {
           sf.setValue(score.getName(), "" + score.getContent());
@@ -336,10 +338,11 @@ public class Sequencefeature extends Rangetype
       }
     }
     // other details
-    Enumeration props = dseta.enumerateProperty();
+    @SuppressWarnings("unchecked")
+    Enumeration<Property> props = dseta.enumerateProperty();
     while (props.hasMoreElements())
     {
-      Property p = (Property) props.nextElement();
+      Property p = props.nextElement();
       Object val = null;
       if (Properties.isValid(p))
       {
index 22d7bfd..0291a0a 100644 (file)
@@ -3519,6 +3519,7 @@ public class Jalview2XML
       // now, for 2.10 projects, this is also done if the xml doc includes
       // dataset sequences not actually present in any particular view.
       //
+      Platform.timeCheck("J2XML features0", Platform.TIME_MARK);
       for (int i = 0; i < vamsasSeqs.size(); i++)
       {
         JSeq jseq = jseqs.get(i);
@@ -3576,6 +3577,8 @@ public class Jalview2XML
             al.getSequenceAt(i).addSequenceFeature(sf);
           }
         }
+        Platform.timeCheck("J2XML features done", Platform.TIME_MARK);
+
         if (vamsasSeqs.get(i).getDBRef().size() > 0)
         {
           // adds dbrefs to datasequence's set (since Jalview 2.10)
diff --git a/test/jalview/datamodel/features/FeatureStoreNoNCTest.java b/test/jalview/datamodel/features/FeatureStoreNoNCTest.java
new file mode 100644 (file)
index 0000000..900a7ca
--- /dev/null
@@ -0,0 +1,898 @@
+package jalview.datamodel.features;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertSame;
+import static org.testng.Assert.assertTrue;
+
+import jalview.datamodel.SequenceFeature;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.testng.annotations.Test;
+
+public class FeatureStoreNoNCTest
+{
+
+  private FeatureStoreI newFeatureStore()
+  {
+    return new FeatureStoreImpl(false);
+  }
+
+  @Test(groups = "Functional")
+  public void testFindFeatures_nonNested()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    fs.addFeature(new SequenceFeature("", "", 10, 20, Float.NaN,
+            null));
+    // same range different description
+    fs.addFeature(new SequenceFeature("", "desc", 10, 20, Float.NaN, null));
+    fs.addFeature(new SequenceFeature("", "", 15, 25, Float.NaN, null));
+    fs.addFeature(new SequenceFeature("", "", 20, 35, Float.NaN, null));
+
+    List<SequenceFeature> overlaps = fs.findOverlappingFeatures(1, 9);
+    assertTrue(overlaps.isEmpty());
+
+    overlaps = fs.findOverlappingFeatures(8, 10);
+    assertEquals(overlaps.size(), 2);
+    assertEquals(overlaps.get(0).getEnd(), 20);
+    assertEquals(overlaps.get(1).getEnd(), 20);
+
+    overlaps = fs.findOverlappingFeatures(12, 16);
+    assertEquals(overlaps.size(), 3);
+    assertEquals(overlaps.get(0).getEnd(), 20);
+    assertEquals(overlaps.get(1).getEnd(), 20);
+    assertEquals(overlaps.get(2).getEnd(), 25);
+
+    overlaps = fs.findOverlappingFeatures(33, 33);
+    assertEquals(overlaps.size(), 1);
+    assertEquals(overlaps.get(0).getEnd(), 35);
+  }
+
+  @Test(groups = "Functional")
+  public void testFindFeatures_nested()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    SequenceFeature sf1 = addFeature(fs, 10, 50);
+    SequenceFeature sf2 = addFeature(fs, 10, 40);
+    SequenceFeature sf3 = addFeature(fs, 20, 30);
+    // fudge feature at same location but different group (so is added)
+    SequenceFeature sf4 = new SequenceFeature("", "", 20, 30, Float.NaN,
+            "different group");
+    fs.addFeature(sf4);
+    SequenceFeature sf5 = addFeature(fs, 35, 36);
+
+    List<SequenceFeature> overlaps = fs.findOverlappingFeatures(1, 9);
+    assertTrue(overlaps.isEmpty());
+
+    overlaps = fs.findOverlappingFeatures(10, 15);
+    assertEquals(overlaps.size(), 2);
+    assertTrue(overlaps.contains(sf1));
+    assertTrue(overlaps.contains(sf2));
+
+    overlaps = fs.findOverlappingFeatures(45, 60);
+    assertEquals(overlaps.size(), 1);
+    assertTrue(overlaps.contains(sf1));
+
+    overlaps = fs.findOverlappingFeatures(32, 38);
+    assertEquals(overlaps.size(), 3);
+    assertTrue(overlaps.contains(sf1));
+    assertTrue(overlaps.contains(sf2));
+    assertTrue(overlaps.contains(sf5));
+
+    overlaps = fs.findOverlappingFeatures(15, 25);
+    assertEquals(overlaps.size(), 4);
+    assertTrue(overlaps.contains(sf1));
+    assertTrue(overlaps.contains(sf2));
+    assertTrue(overlaps.contains(sf3));
+    assertTrue(overlaps.contains(sf4));
+  }
+
+  @Test(groups = "Functional")
+  public void testFindFeatures_mixed()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    SequenceFeature sf1 = addFeature(fs, 10, 50);
+    SequenceFeature sf2 = addFeature(fs, 1, 15);
+    SequenceFeature sf3 = addFeature(fs, 20, 30);
+    SequenceFeature sf4 = addFeature(fs, 40, 100);
+    SequenceFeature sf5 = addFeature(fs, 60, 100);
+    SequenceFeature sf6 = addFeature(fs, 70, 70);
+
+    List<SequenceFeature> overlaps = fs.findOverlappingFeatures(200, 200);
+    assertTrue(overlaps.isEmpty());
+
+    overlaps = fs.findOverlappingFeatures(1, 9);
+    assertEquals(overlaps.size(), 1);
+    assertTrue(overlaps.contains(sf2));
+
+    overlaps = fs.findOverlappingFeatures(5, 18);
+    assertEquals(overlaps.size(), 2);
+    assertTrue(overlaps.contains(sf1));
+    assertTrue(overlaps.contains(sf2));
+
+    overlaps = fs.findOverlappingFeatures(30, 40);
+    assertEquals(overlaps.size(), 3);
+    assertTrue(overlaps.contains(sf1));
+    assertTrue(overlaps.contains(sf3));
+    assertTrue(overlaps.contains(sf4));
+
+    overlaps = fs.findOverlappingFeatures(80, 90);
+    assertEquals(overlaps.size(), 2);
+    assertTrue(overlaps.contains(sf4));
+    assertTrue(overlaps.contains(sf5));
+
+    overlaps = fs.findOverlappingFeatures(68, 70);
+    assertEquals(overlaps.size(), 3);
+    assertTrue(overlaps.contains(sf4));
+    assertTrue(overlaps.contains(sf5));
+    assertTrue(overlaps.contains(sf6));
+  }
+
+  /**
+   * Helper method to add a feature of no particular type
+   * 
+   * @param fs
+   * @param from
+   * @param to
+   * @return
+   */
+  SequenceFeature addFeature(FeatureStoreI fs, int from, int to)
+  {
+    SequenceFeature sf1 = new SequenceFeature("", "", from, to, Float.NaN,
+            null);
+    fs.addFeature(sf1);
+    return sf1;
+  }
+
+  @Test(groups = "Functional")
+  public void testFindFeatures_contactFeatures()
+  {
+    FeatureStoreI fs = newFeatureStore();
+
+    SequenceFeature sf = new SequenceFeature("disulphide bond", "bond", 10,
+            20, Float.NaN, null);
+    fs.addFeature(sf);
+
+    /*
+     * neither contact point in range
+     */
+    List<SequenceFeature> overlaps = fs.findOverlappingFeatures(1, 9);
+    assertTrue(overlaps.isEmpty());
+
+    /*
+     * neither contact point in range
+     */
+    overlaps = fs.findOverlappingFeatures(11, 19);
+    assertTrue(overlaps.isEmpty());
+
+    /*
+     * first contact point in range
+     */
+    overlaps = fs.findOverlappingFeatures(5, 15);
+    assertEquals(overlaps.size(), 1);
+    assertTrue(overlaps.contains(sf));
+
+    /*
+     * second contact point in range
+     */
+    overlaps = fs.findOverlappingFeatures(15, 25);
+    assertEquals(overlaps.size(), 1);
+    assertTrue(overlaps.contains(sf));
+
+    /*
+     * both contact points in range
+     */
+    overlaps = fs.findOverlappingFeatures(5, 25);
+    assertEquals(overlaps.size(), 1);
+    assertTrue(overlaps.contains(sf));
+  }
+
+  @Test(groups = "Functional")
+  public void testGetPositionalFeatures()
+  {
+    FeatureStoreI store = newFeatureStore();
+    SequenceFeature sf1 = new SequenceFeature("Metal", "desc", 10, 20,
+            Float.NaN, null);
+    store.addFeature(sf1);
+    // same range, different description
+    SequenceFeature sf2 = new SequenceFeature("Metal", "desc2", 10, 20,
+            Float.NaN, null);
+    store.addFeature(sf2);
+    // discontiguous range
+    SequenceFeature sf3 = new SequenceFeature("Metal", "desc", 30, 40,
+            Float.NaN, null);
+    store.addFeature(sf3);
+    // overlapping range
+    SequenceFeature sf4 = new SequenceFeature("Metal", "desc", 15, 35,
+            Float.NaN, null);
+    store.addFeature(sf4);
+    // enclosing range
+    SequenceFeature sf5 = new SequenceFeature("Metal", "desc", 5, 50,
+            Float.NaN, null);
+    store.addFeature(sf5);
+    // non-positional feature
+    SequenceFeature sf6 = new SequenceFeature("Metal", "desc", 0, 0,
+            Float.NaN, null);
+    store.addFeature(sf6);
+    // contact feature
+    SequenceFeature sf7 = new SequenceFeature("Disulphide bond", "desc",
+            18, 45, Float.NaN, null);
+    store.addFeature(sf7);
+
+    List<SequenceFeature> features = store.getPositionalFeatures();
+    assertEquals(features.size(), 6);
+    assertTrue(features.contains(sf1));
+    assertTrue(features.contains(sf2));
+    assertTrue(features.contains(sf3));
+    assertTrue(features.contains(sf4));
+    assertTrue(features.contains(sf5));
+    assertFalse(features.contains(sf6));
+    assertTrue(features.contains(sf7));
+
+    features = store.getNonPositionalFeatures();
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf6));
+  }
+
+  @Test(groups = "Functional")
+  public void testDelete()
+  {
+    FeatureStoreI store = newFeatureStore();
+    SequenceFeature sf1 = addFeature(store, 10, 20);
+    assertTrue(store.getPositionalFeatures().contains(sf1));
+
+    /*
+     * simple deletion
+     */
+    assertTrue(store.delete(sf1));
+    assertTrue(store.getPositionalFeatures().isEmpty());
+
+    /*
+     * non-positional feature deletion
+     */
+    SequenceFeature sf2 = addFeature(store, 0, 0);
+    assertFalse(store.getPositionalFeatures().contains(sf2));
+    assertTrue(store.getNonPositionalFeatures().contains(sf2));
+    assertTrue(store.delete(sf2));
+    assertTrue(store.getNonPositionalFeatures().isEmpty());
+
+    /*
+     * contact feature deletion
+     */
+    SequenceFeature sf3 = new SequenceFeature("", "Disulphide Bond", 11,
+            23, Float.NaN, null);
+    store.addFeature(sf3);
+    assertEquals(store.getPositionalFeatures().size(), 1);
+    assertTrue(store.getPositionalFeatures().contains(sf3));
+    assertTrue(store.delete(sf3));
+    assertTrue(store.getPositionalFeatures().isEmpty());
+
+    /*
+     * nested feature deletion
+     */
+    SequenceFeature sf4 = addFeature(store, 20, 30);
+    SequenceFeature sf5 = addFeature(store, 22, 26); // to NCList
+    SequenceFeature sf6 = addFeature(store, 23, 24); // child of sf5
+    SequenceFeature sf7 = addFeature(store, 25, 25); // sibling of sf6
+    SequenceFeature sf8 = addFeature(store, 24, 24); // child of sf6
+    SequenceFeature sf9 = addFeature(store, 23, 23); // child of sf6
+
+    // SequenceFeature sf4 = addFeature(store, 20, 30);
+    //// SequenceFeature sf5 = addFeature(store, 22, 26);
+    ////// SequenceFeature sf6 = addFeature(store, 23, 24); // child of sf5
+    //////// SequenceFeature sf9 = addFeature(store, 23, 23); // child of sf6
+    //////// SequenceFeature sf8 = addFeature(store, 24, 24); // child of sf6
+    ////// SequenceFeature sf7 = addFeature(store, 25, 25); // child of sf5
+    //
+    assertEquals(store.getPositionalFeatures().size(), 6);
+
+    // delete a node with children - they take its place
+    assertTrue(store.delete(sf6)); // sf8, sf9 should become children of sf5
+    assertEquals(store.getPositionalFeatures().size(), 5);
+    assertFalse(store.getPositionalFeatures().contains(sf6));
+
+    // delete a node with no children
+    assertTrue(store.delete(sf7));
+    assertEquals(store.getPositionalFeatures().size(), 4);
+    assertFalse(store.getPositionalFeatures().contains(sf7));
+
+    // delete root of NCList
+    assertTrue(store.delete(sf5));
+    assertEquals(store.getPositionalFeatures().size(), 3);
+    assertFalse(store.getPositionalFeatures().contains(sf5));
+
+    // continue the killing fields
+    assertTrue(store.delete(sf4));
+    assertEquals(store.getPositionalFeatures().size(), 2);
+    assertFalse(store.getPositionalFeatures().contains(sf4));
+
+    assertTrue(store.delete(sf9));
+    assertEquals(store.getPositionalFeatures().size(), 1);
+    assertFalse(store.getPositionalFeatures().contains(sf9));
+
+    assertTrue(store.delete(sf8));
+    assertTrue(store.getPositionalFeatures().isEmpty());
+  }
+
+  @Test(groups = "Functional")
+  public void testAddFeature()
+  {
+    FeatureStoreI fs = newFeatureStore();
+
+    SequenceFeature sf1 = new SequenceFeature("Cath", "", 10, 20,
+            Float.NaN, null);
+    SequenceFeature sf2 = new SequenceFeature("Cath", "", 10, 20,
+            Float.NaN, null);
+
+    assertTrue(fs.addFeature(sf1));
+    assertEquals(fs.getFeatureCount(true), 1); // positional
+    assertEquals(fs.getFeatureCount(false), 0); // non-positional
+
+    /*
+     * re-adding the same or an identical feature should fail
+     */
+    assertFalse(fs.addFeature(sf1));
+    assertEquals(fs.getFeatureCount(true), 1);
+    assertFalse(fs.addFeature(sf2));
+    assertEquals(fs.getFeatureCount(true), 1);
+
+    /*
+     * add non-positional
+     */
+    SequenceFeature sf3 = new SequenceFeature("Cath", "", 0, 0, Float.NaN,
+            null);
+    assertTrue(fs.addFeature(sf3));
+    assertEquals(fs.getFeatureCount(true), 1); // positional
+    assertEquals(fs.getFeatureCount(false), 1); // non-positional
+    SequenceFeature sf4 = new SequenceFeature("Cath", "", 0, 0, Float.NaN,
+            null);
+    assertFalse(fs.addFeature(sf4)); // already stored
+    assertEquals(fs.getFeatureCount(true), 1); // positional
+    assertEquals(fs.getFeatureCount(false), 1); // non-positional
+
+    /*
+     * add contact
+     */
+    SequenceFeature sf5 = new SequenceFeature("Disulfide bond", "", 10, 20,
+            Float.NaN, null);
+    assertTrue(fs.addFeature(sf5));
+    assertEquals(fs.getFeatureCount(true), 2); // positional - add 1 for contact
+    assertEquals(fs.getFeatureCount(false), 1); // non-positional
+    SequenceFeature sf6 = new SequenceFeature("Disulfide bond", "", 10, 20,
+            Float.NaN, null);
+    assertFalse(fs.addFeature(sf6)); // already stored
+    assertEquals(fs.getFeatureCount(true), 2); // no change
+    assertEquals(fs.getFeatureCount(false), 1); // no change
+  }
+
+  @Test(groups = "Functional")
+  public void testIsEmpty()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    assertTrue(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(true), 0);
+
+    /*
+     * non-nested feature
+     */
+    SequenceFeature sf1 = new SequenceFeature("Cath", "", 10, 20,
+            Float.NaN, null);
+    fs.addFeature(sf1);
+    assertFalse(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(true), 1);
+    fs.delete(sf1);
+    assertTrue(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(true), 0);
+
+    /*
+     * non-positional feature
+     */
+    sf1 = new SequenceFeature("Cath", "", 0, 0, Float.NaN, null);
+    fs.addFeature(sf1);
+    assertFalse(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(false), 1); // non-positional
+    assertEquals(fs.getFeatureCount(true), 0); // positional
+    fs.delete(sf1);
+    assertTrue(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(false), 0);
+
+    /*
+     * contact feature
+     */
+    sf1 = new SequenceFeature("Disulfide bond", "", 19, 49, Float.NaN, null);
+    fs.addFeature(sf1);
+    assertFalse(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(true), 1);
+    fs.delete(sf1);
+    assertTrue(fs.isEmpty());
+    assertEquals(fs.getFeatureCount(true), 0);
+
+    /*
+     * sf2, sf3 added as nested features
+     */
+    sf1 = new SequenceFeature("Cath", "", 19, 49, Float.NaN, null);
+    SequenceFeature sf2 = new SequenceFeature("Cath", "", 20, 40,
+            Float.NaN, null);
+    SequenceFeature sf3 = new SequenceFeature("Cath", "", 25, 35,
+            Float.NaN, null);
+    fs.addFeature(sf1);
+    fs.addFeature(sf2);
+    fs.addFeature(sf3);
+    assertEquals(fs.getFeatureCount(true), 3);
+    assertTrue(fs.delete(sf1));
+    assertEquals(fs.getFeatureCount(true), 2);
+    assertEquals(fs.getFeatures().size(), 2);
+    assertFalse(fs.isEmpty());
+    assertTrue(fs.delete(sf2));
+    assertEquals(fs.getFeatureCount(true), 1);
+    assertFalse(fs.isEmpty());
+    assertTrue(fs.delete(sf3));
+    assertEquals(fs.getFeatureCount(true), 0);
+    assertTrue(fs.isEmpty()); // all gone
+  }
+
+  @Test(groups = "Functional")
+  public void testGetFeatureGroups()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    assertTrue(fs.getFeatureGroups(true).isEmpty());
+    assertTrue(fs.getFeatureGroups(false).isEmpty());
+
+    SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 10, 20, 1f, "group1");
+    fs.addFeature(sf1);
+    Set<String> groups = fs.getFeatureGroups(true);
+    assertEquals(groups.size(), 1);
+    assertTrue(groups.contains("group1"));
+
+    /*
+     * add another feature of the same group, delete one, delete both
+     */
+    SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 20, 30, 1f, "group1");
+    fs.addFeature(sf2);
+    groups = fs.getFeatureGroups(true);
+    assertEquals(groups.size(), 1);
+    assertTrue(groups.contains("group1"));
+    fs.delete(sf2);
+    groups = fs.getFeatureGroups(true);
+    assertEquals(groups.size(), 1);
+    assertTrue(groups.contains("group1"));
+    fs.delete(sf1);
+    groups = fs.getFeatureGroups(true);
+    assertTrue(fs.getFeatureGroups(true).isEmpty());
+
+    SequenceFeature sf3 = new SequenceFeature("Cath", "desc", 20, 30, 1f, "group2");
+    fs.addFeature(sf3);
+    SequenceFeature sf4 = new SequenceFeature("Cath", "desc", 20, 30, 1f, "Group2");
+    fs.addFeature(sf4);
+    SequenceFeature sf5 = new SequenceFeature("Cath", "desc", 20, 30, 1f, null);
+    fs.addFeature(sf5);
+    groups = fs.getFeatureGroups(true);
+    assertEquals(groups.size(), 3);
+    assertTrue(groups.contains("group2"));
+    assertTrue(groups.contains("Group2")); // case sensitive
+    assertTrue(groups.contains(null)); // null allowed
+    assertTrue(fs.getFeatureGroups(false).isEmpty()); // non-positional
+
+    fs.delete(sf3);
+    groups = fs.getFeatureGroups(true);
+    assertEquals(groups.size(), 2);
+    assertFalse(groups.contains("group2"));
+    fs.delete(sf4);
+    groups = fs.getFeatureGroups(true);
+    assertEquals(groups.size(), 1);
+    assertFalse(groups.contains("Group2"));
+    fs.delete(sf5);
+    groups = fs.getFeatureGroups(true);
+    assertTrue(groups.isEmpty());
+
+    /*
+     * add non-positional feature
+     */
+    SequenceFeature sf6 = new SequenceFeature("Cath", "desc", 0, 0, 1f,
+            "CathGroup");
+    fs.addFeature(sf6);
+    groups = fs.getFeatureGroups(false);
+    assertEquals(groups.size(), 1);
+    assertTrue(groups.contains("CathGroup"));
+    assertTrue(fs.delete(sf6));
+    assertTrue(fs.getFeatureGroups(false).isEmpty());
+  }
+
+  @Test(groups = "Functional")
+  public void testGetTotalFeatureLength()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    assertEquals(fs.getTotalFeatureLength(), 0);
+
+    addFeature(fs, 10, 20); // 11
+    assertEquals(fs.getTotalFeatureLength(), 11);
+    addFeature(fs, 17, 37); // 21
+    SequenceFeature sf1 = addFeature(fs, 14, 74); // 61
+    assertEquals(fs.getTotalFeatureLength(), 93);
+
+    // non-positional features don't count
+    SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 0, 0, 1f,
+            "group1");
+    fs.addFeature(sf2);
+    assertEquals(fs.getTotalFeatureLength(), 93);
+
+    // contact features count 1
+    SequenceFeature sf3 = new SequenceFeature("disulphide bond", "desc",
+            15, 35, 1f, "group1");
+    fs.addFeature(sf3);
+    assertEquals(fs.getTotalFeatureLength(), 94);
+
+    assertTrue(fs.delete(sf1));
+    assertEquals(fs.getTotalFeatureLength(), 33);
+    assertFalse(fs.delete(sf1));
+    assertEquals(fs.getTotalFeatureLength(), 33);
+    assertTrue(fs.delete(sf2));
+    assertEquals(fs.getTotalFeatureLength(), 33);
+    assertTrue(fs.delete(sf3));
+    assertEquals(fs.getTotalFeatureLength(), 32);
+  }
+
+  @Test(groups = "Functional")
+  public void testGetFeatureLength()
+  {
+    /*
+     * positional feature
+     */
+    SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 10, 20, 1f, "group1");
+    assertEquals(FeatureStore.getFeatureLength(sf1), 11);
+  
+    /*
+     * non-positional feature
+     */
+    SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 0, 0, 1f,
+            "CathGroup");
+    assertEquals(FeatureStore.getFeatureLength(sf2), 0);
+
+    /*
+     * contact feature counts 1
+     */
+    SequenceFeature sf3 = new SequenceFeature("Disulphide Bond", "desc",
+            14, 28, 1f, "AGroup");
+    assertEquals(FeatureStore.getFeatureLength(sf3), 1);
+  }
+
+  @Test(groups = "Functional")
+  public void testMin()
+  {
+    assertEquals(FeatureStore.min(Float.NaN, Float.NaN), Float.NaN);
+    assertEquals(FeatureStore.min(Float.NaN, 2f), 2f);
+    assertEquals(FeatureStore.min(-2f, Float.NaN), -2f);
+    assertEquals(FeatureStore.min(2f, -3f), -3f);
+  }
+
+  @Test(groups = "Functional")
+  public void testMax()
+  {
+    assertEquals(FeatureStore.max(Float.NaN, Float.NaN), Float.NaN);
+    assertEquals(FeatureStore.max(Float.NaN, 2f), 2f);
+    assertEquals(FeatureStore.max(-2f, Float.NaN), -2f);
+    assertEquals(FeatureStore.max(2f, -3f), 2f);
+  }
+
+  @Test(groups = "Functional")
+  public void testGetMinimumScore_getMaximumScore()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    assertEquals(fs.getMinimumScore(true), Float.NaN); // positional
+    assertEquals(fs.getMaximumScore(true), Float.NaN);
+    assertEquals(fs.getMinimumScore(false), Float.NaN); // non-positional
+    assertEquals(fs.getMaximumScore(false), Float.NaN);
+
+    // add features with no score
+    SequenceFeature sf1 = new SequenceFeature("type", "desc", 0, 0,
+            Float.NaN, "group");
+    fs.addFeature(sf1);
+    SequenceFeature sf2 = new SequenceFeature("type", "desc", 10, 20,
+            Float.NaN, "group");
+    fs.addFeature(sf2);
+    assertEquals(fs.getMinimumScore(true), Float.NaN);
+    assertEquals(fs.getMaximumScore(true), Float.NaN);
+    assertEquals(fs.getMinimumScore(false), Float.NaN);
+    assertEquals(fs.getMaximumScore(false), Float.NaN);
+
+    // add positional features with score
+    SequenceFeature sf3 = new SequenceFeature("type", "desc", 10, 20, 1f,
+            "group");
+    fs.addFeature(sf3);
+    SequenceFeature sf4 = new SequenceFeature("type", "desc", 12, 16, 4f,
+            "group");
+    fs.addFeature(sf4);
+    assertEquals(fs.getMinimumScore(true), 1f);
+    assertEquals(fs.getMaximumScore(true), 4f);
+    assertEquals(fs.getMinimumScore(false), Float.NaN);
+    assertEquals(fs.getMaximumScore(false), Float.NaN);
+
+    // add non-positional features with score
+    SequenceFeature sf5 = new SequenceFeature("type", "desc", 0, 0, 11f,
+            "group");
+    fs.addFeature(sf5);
+    SequenceFeature sf6 = new SequenceFeature("type", "desc", 0, 0, -7f,
+            "group");
+    fs.addFeature(sf6);
+    assertEquals(fs.getMinimumScore(true), 1f);
+    assertEquals(fs.getMaximumScore(true), 4f);
+    assertEquals(fs.getMinimumScore(false), -7f);
+    assertEquals(fs.getMaximumScore(false), 11f);
+
+    // delete one positional and one non-positional
+    // min-max should be recomputed
+    assertTrue(fs.delete(sf6));
+    assertTrue(fs.delete(sf3));
+    assertEquals(fs.getMinimumScore(true), 4f);
+    assertEquals(fs.getMaximumScore(true), 4f);
+    assertEquals(fs.getMinimumScore(false), 11f);
+    assertEquals(fs.getMaximumScore(false), 11f);
+
+    // delete remaining features with score
+    assertTrue(fs.delete(sf4));
+    assertTrue(fs.delete(sf5));
+    assertEquals(fs.getMinimumScore(true), Float.NaN);
+    assertEquals(fs.getMaximumScore(true), Float.NaN);
+    assertEquals(fs.getMinimumScore(false), Float.NaN);
+    assertEquals(fs.getMaximumScore(false), Float.NaN);
+
+    // delete all features
+    assertTrue(fs.delete(sf1));
+    assertTrue(fs.delete(sf2));
+    assertTrue(fs.isEmpty());
+    assertEquals(fs.getMinimumScore(true), Float.NaN);
+    assertEquals(fs.getMaximumScore(true), Float.NaN);
+    assertEquals(fs.getMinimumScore(false), Float.NaN);
+    assertEquals(fs.getMaximumScore(false), Float.NaN);
+  }
+
+  @Test(groups = "Functional")
+  public void testListContains()
+  {
+    FeatureStoreI featureStore = newFeatureStore();
+    assertFalse(featureStore.listContains(null, null));
+    List<SequenceFeature> features = new ArrayList<>();
+    assertFalse(featureStore.listContains(features, null));
+
+    SequenceFeature sf1 = new SequenceFeature("type1", "desc1", 20, 30, 3f,
+            "group1");
+    assertFalse(featureStore.listContains(null, sf1));
+    assertFalse(featureStore.listContains(features, sf1));
+
+    features.add(sf1);
+    SequenceFeature sf2 = new SequenceFeature("type1", "desc1", 20, 30, 3f,
+            "group1");
+    SequenceFeature sf3 = new SequenceFeature("type1", "desc1", 20, 40, 3f,
+            "group1");
+
+    // sf2.equals(sf1) so contains should return true
+    assertTrue(featureStore.listContains(features, sf2));
+    assertFalse(featureStore.listContains(features, sf3));
+  }
+
+  @Test(groups = "Functional")
+  public void testGetFeaturesForGroup()
+  {
+    FeatureStoreI fs = newFeatureStore();
+
+    /*
+     * with no features
+     */
+    assertTrue(fs.getFeaturesForGroup(true, null).isEmpty());
+    assertTrue(fs.getFeaturesForGroup(false, null).isEmpty());
+    assertTrue(fs.getFeaturesForGroup(true, "uniprot").isEmpty());
+    assertTrue(fs.getFeaturesForGroup(false, "uniprot").isEmpty());
+
+    /*
+     * sf1: positional feature in the null group
+     */
+    SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 4, 10, 0f,
+            null);
+    fs.addFeature(sf1);
+    assertTrue(fs.getFeaturesForGroup(true, "uniprot").isEmpty());
+    assertTrue(fs.getFeaturesForGroup(false, "uniprot").isEmpty());
+    assertTrue(fs.getFeaturesForGroup(false, null).isEmpty());
+    List<SequenceFeature> features = fs.getFeaturesForGroup(true, null);
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf1));
+
+    /*
+     * sf2: non-positional feature in the null group
+     * sf3: positional feature in a non-null group
+     * sf4: non-positional feature in a non-null group
+     */
+    SequenceFeature sf2 = new SequenceFeature("Pfam", "desc", 0, 0, 0f,
+            null);
+    SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 4, 10, 0f,
+            "Uniprot");
+    SequenceFeature sf4 = new SequenceFeature("Pfam", "desc", 0, 0, 0f,
+            "Rfam");
+    fs.addFeature(sf2);
+    fs.addFeature(sf3);
+    fs.addFeature(sf4);
+
+    features = fs.getFeaturesForGroup(true, null);
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf1));
+
+    features = fs.getFeaturesForGroup(false, null);
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf2));
+
+    features = fs.getFeaturesForGroup(true, "Uniprot");
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf3));
+
+    features = fs.getFeaturesForGroup(false, "Rfam");
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf4));
+  }
+
+  @Test(groups = "Functional")
+  public void testShiftFeatures()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    assertFalse(fs.shiftFeatures(0, 1)); // nothing to do
+
+    SequenceFeature sf1 = new SequenceFeature("Cath", "", 2, 5, 0f, null);
+    fs.addFeature(sf1);
+    // nested feature:
+    SequenceFeature sf2 = new SequenceFeature("Cath", "", 8, 14, 0f, null);
+    fs.addFeature(sf2);
+    // contact feature:
+    SequenceFeature sf3 = new SequenceFeature("Disulfide bond", "", 23, 32,
+            0f, null);
+    fs.addFeature(sf3);
+    // non-positional feature:
+    SequenceFeature sf4 = new SequenceFeature("Cath", "", 0, 0, 0f, null);
+    fs.addFeature(sf4);
+
+    /*
+     * shift all features right by 5
+     */
+    assertTrue(fs.shiftFeatures(0, 5));
+
+    // non-positional features untouched:
+    List<SequenceFeature> nonPos = fs.getNonPositionalFeatures();
+    assertEquals(nonPos.size(), 1);
+    assertTrue(nonPos.contains(sf4));
+
+    // positional features are replaced
+    List<SequenceFeature> pos = fs.getPositionalFeatures();
+    assertEquals(pos.size(), 3);
+    assertFalse(pos.contains(sf1));
+    assertFalse(pos.contains(sf2));
+    assertFalse(pos.contains(sf3));
+    SequenceFeatures.sortFeatures(pos, true); // ascending start pos
+    assertEquals(pos.get(0).getBegin(), 7);
+    assertEquals(pos.get(0).getEnd(), 10);
+    assertEquals(pos.get(1).getBegin(), 13);
+    assertEquals(pos.get(1).getEnd(), 19);
+    assertEquals(pos.get(2).getBegin(), 28);
+    assertEquals(pos.get(2).getEnd(), 37);
+
+    /*
+     * now shift left by 15
+     * feature at [7-10] should be removed
+     * feature at [13-19] should become [1-4] 
+     */
+    assertTrue(fs.shiftFeatures(0, -15));
+    pos = fs.getPositionalFeatures();
+    assertEquals(pos.size(), 2);
+    SequenceFeatures.sortFeatures(pos, true);
+    assertEquals(pos.get(0).getBegin(), 1);
+    assertEquals(pos.get(0).getEnd(), 4);
+    assertEquals(pos.get(1).getBegin(), 13);
+    assertEquals(pos.get(1).getEnd(), 22);
+
+    /*
+     * shift right by 4 from position 2 onwards
+     * feature at [1-4] unchanged, feature at [13-22] shifts
+     */
+    assertTrue(fs.shiftFeatures(2, 4));
+    pos = fs.getPositionalFeatures();
+    assertEquals(pos.size(), 2);
+    SequenceFeatures.sortFeatures(pos, true);
+    assertEquals(pos.get(0).getBegin(), 1);
+    assertEquals(pos.get(0).getEnd(), 4);
+    assertEquals(pos.get(1).getBegin(), 17);
+    assertEquals(pos.get(1).getEnd(), 26);
+
+    /*
+     * shift right by 4 from position 18 onwards
+     * should be no change
+     */
+    SequenceFeature f1 = pos.get(0);
+    SequenceFeature f2 = pos.get(1);
+    assertFalse(fs.shiftFeatures(18, 4)); // no update
+    pos = fs.getPositionalFeatures();
+    assertEquals(pos.size(), 2);
+    SequenceFeatures.sortFeatures(pos, true);
+    assertSame(pos.get(0), f1);
+    assertSame(pos.get(1), f2);
+  }
+
+  @Test(groups = "Functional")
+  public void testDelete_readd()
+  {
+    /*
+     * add a feature and a nested feature
+     */
+    FeatureStoreI store = newFeatureStore();
+    SequenceFeature sf1 = addFeature(store, 10, 20);
+    // sf2 is nested in sf1 so will be stored in nestedFeatures
+    SequenceFeature sf2 = addFeature(store, 12, 14);
+    List<SequenceFeature> features = store.getPositionalFeatures();
+    assertEquals(features.size(), 2);
+    assertTrue(features.contains(sf1));
+    assertTrue(features.contains(sf2));
+    assertTrue(store.getFeatures().contains(sf1));
+    assertTrue(store.getFeatures().contains(sf2));
+  
+    /*
+     * delete the first feature
+     */
+    assertTrue(store.delete(sf1));
+    features = store.getPositionalFeatures();
+    assertFalse(features.contains(sf1));
+    assertTrue(features.contains(sf2));
+
+    /*
+     * re-add the 'nested' feature; is it now duplicated?
+     */
+    store.addFeature(sf2);
+    features = store.getPositionalFeatures();
+    assertEquals(features.size(), 1);
+    assertTrue(features.contains(sf2));
+  }
+
+  @Test(groups = "Functional")
+  public void testContains()
+  {
+    FeatureStoreI fs = newFeatureStore();
+    SequenceFeature sf1 = new SequenceFeature("Cath", "", 10, 20,
+            Float.NaN, "group1");
+    SequenceFeature sf2 = new SequenceFeature("Cath", "", 10, 20,
+            Float.NaN, "group2");
+    SequenceFeature sf3 = new SequenceFeature("Cath", "", 0, 0, Float.NaN,
+            "group1");
+    SequenceFeature sf4 = new SequenceFeature("Cath", "", 0, 0, 0f,
+            "group1");
+    SequenceFeature sf5 = new SequenceFeature("Disulphide Bond", "", 5, 15,
+            Float.NaN, "group1");
+    SequenceFeature sf6 = new SequenceFeature("Disulphide Bond", "", 5, 15,
+            Float.NaN, "group2");
+
+    fs.addFeature(sf1);
+    fs.addFeature(sf3);
+    fs.addFeature(sf5);
+    assertTrue(fs.contains(sf1)); // positional feature
+    assertTrue(fs.contains(new SequenceFeature(sf1))); // identical feature
+    assertFalse(fs.contains(sf2)); // different group
+    assertTrue(fs.contains(sf3)); // non-positional
+    assertTrue(fs.contains(new SequenceFeature(sf3)));
+    assertFalse(fs.contains(sf4)); // different score
+    assertTrue(fs.contains(sf5)); // contact feature
+    assertTrue(fs.contains(new SequenceFeature(sf5)));
+    assertFalse(fs.contains(sf6)); // different group
+
+    /*
+     * add a nested feature
+     */
+    SequenceFeature sf7 = new SequenceFeature("Cath", "", 12, 16,
+            Float.NaN, "group1");
+    fs.addFeature(sf7);
+    assertTrue(fs.contains(sf7));
+    assertTrue(fs.contains(new SequenceFeature(sf7)));
+
+    /*
+     * delete the outer (enclosing, non-nested) feature
+     */
+    fs.delete(sf1);
+    assertFalse(fs.contains(sf1));
+    assertTrue(fs.contains(sf7));
+  }
+}
diff --git a/unused/nonc/IntervalI.java b/unused/nonc/IntervalI.java
new file mode 100644 (file)
index 0000000..0dfd935
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2018, Mungo Carstairs
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package intervalstore.api;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+public interface IntervalI
+{
+  /**
+   * a comparator for sorting intervals by start position ascending
+   */
+  static Comparator<? super IntervalI> FORWARD_STRAND = new Comparator<IntervalI>()
+  {
+    @Override
+    public int compare(IntervalI o1, IntervalI o2)
+    {
+      return Integer.compare(o1.getBegin(), o2.getBegin());
+    }
+  };
+
+  /**
+   * a comparator for sorting intervals by end position descending
+   */
+  static Comparator<? super IntervalI> REVERSE_STRAND = new Comparator<IntervalI>()
+  {
+    @Override
+    public int compare(IntervalI o1, IntervalI o2)
+    {
+      return Integer.compare(o2.getEnd(), o1.getEnd());
+    }
+  };
+
+  int getBegin();
+
+  int getEnd();
+
+  /**
+   * Answers true if this interval contains (or matches) the given interval
+   * 
+   * @param i
+   * @return
+   */
+  default boolean containsInterval(IntervalI i)
+  {
+    return i != null
+            && i.getBegin() >= getBegin() && i.getEnd() <= getEnd();
+  }
+
+  /**
+   * Answers true if this interval properly contains the given interval, that
+   * is, it contains it and is larger than it
+   * 
+   * @param i
+   * @return
+   */
+  default boolean properlyContainsInterval(IntervalI i)
+  {
+    return containsInterval(i)
+            && (i.getBegin() > getBegin() || i.getEnd() < getEnd());
+  }
+
+  default boolean equalsInterval(IntervalI i)
+  {
+    return i != null && i.getBegin() == getBegin()
+            && i.getEnd() == getEnd();
+  }
+
+  default boolean overlapsInterval(IntervalI i)
+  {
+    if (i == null)
+    {
+      return false;
+    }
+    if (i.getBegin() < getBegin())
+    {
+      return i.getEnd() >= getBegin();
+    }
+    if (i.getEnd() > getEnd())
+    {
+      return i.getBegin() <= getEnd();
+    }
+    return true; // i internal to this
+  }
+
+  /**
+   * Sorts the list by start position ascending (if forwardString==true), or by
+   * end position descending
+   * 
+   * @param intervals
+   * @param forwardStrand
+   */
+  static void sortIntervals(List<? extends IntervalI> intervals,
+          final boolean forwardStrand)
+  {
+    Collections.sort(intervals,
+            forwardStrand ? FORWARD_STRAND : REVERSE_STRAND);
+  }
+
+  IntervalI getContainedBy();
+
+  void setContainedBy(IntervalI containedBy);
+
+}
diff --git a/unused/nonc/IntervalStore.java b/unused/nonc/IntervalStore.java
new file mode 100644 (file)
index 0000000..4e308b5
--- /dev/null
@@ -0,0 +1,540 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2018, Mungo Carstairs
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package intervalstore.nonc;
+
+import java.util.AbstractCollection;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import intervalstore.api.IntervalI;
+
+/**
+ * A collection class to store interval-associated data, with O(log N)
+ * performance for overlap queries, insertion and deletion (where N is the size
+ * of the store). Accepts duplicate entries but not null values.
+ * 
+ * @author gmcarstairs
+ *
+ * @param <T>
+ *          any type providing <code>getBegin()</code> and <code>getEnd()</code>
+ */
+public class IntervalStore<T extends IntervalI>
+        extends AbstractCollection<T>
+        implements intervalstore.api.IntervalStoreI<T>
+{
+  private List<T> intervals;
+
+  private boolean isTainted;
+
+  private IntervalI[] orderedIntervalStarts;
+
+  /**
+   * Constructor
+   */
+  public IntervalStore()
+  {
+    intervals = new ArrayList<>();
+  }
+
+  class IntervalComparator implements Comparator<T>
+  {
+
+    @Override
+    public int compare(IntervalI o1, IntervalI o2)
+    {
+          int order = Integer.compare(o1.getBegin(), o2.getBegin());
+          if (order == 0)
+          {
+            /*
+             * if tied on start position, longer length sorts to left
+             * i.e. the negation of normal ordering by length
+             */
+            order = Integer.compare(o2.getEnd(), o1.getEnd());
+          }
+          return order;
+
+    }
+
+  };
+  
+  /**
+   * Constructor given a list of intervals. Note that the list may get sorted as
+   * a side-effect of calling this constructor.
+   */
+  public IntervalStore(List<T> intervals)
+  {
+    Collections.sort(this.intervals = intervals,
+              new IntervalComparator());
+      isTainted = true;
+    findOverlaps(0, -1); // ensure this is ordered
+  }
+
+  /**
+   * Adds one interval to the store.
+   * 
+   * @param interval
+   */
+  @Override
+  public boolean add(T interval)
+  {
+    if (interval == null)
+    {
+      return false;
+    }
+
+    synchronized (intervals)
+    {
+      int insertPosition = findFirstBegin(intervals, interval.getBegin());
+      intervals.add(insertPosition, interval);
+      isTainted = true;
+      return true;
+    }
+  }
+
+  @Override
+  public void clear()
+  {
+    intervals.clear();
+    orderedIntervalStarts = null;
+    isTainted = true;
+  }
+
+  @Override
+  public boolean contains(Object entry)
+  {
+    return listContains(intervals, entry);
+  }
+
+  protected int findFirstBegin(List<T> list, long pos)
+  {
+    int start = 0;
+    int end = list.size() - 1;
+    int matched = list.size();
+
+    while (start <= end)
+    {
+      int mid = (start + end) / 2;
+      if (list.get(mid).getBegin() >= pos)
+      {
+        matched = mid;
+        end = mid - 1;
+      }
+      else
+      {
+        start = mid + 1;
+      }
+    }
+    return matched;
+  }
+
+  protected int findFirstEnd(List<T> list, long pos)
+  {
+    int start = 0;
+    int end = list.size() - 1;
+    int matched = list.size();
+
+    while (start <= end)
+    {
+      int mid = (start + end) / 2;
+      if (list.get(mid).getEnd() >= pos)
+      {
+        matched = mid;
+        end = mid - 1;
+      }
+      else
+      {
+        start = mid + 1;
+      }
+    }
+    return matched;
+  }
+
+  /**
+   * Adds non-nested intervals to the result list that lie within the target
+   * range
+   * 
+   * @param from
+   * @param to
+   * @param result
+   */
+  protected void findIntervalOverlaps(long from, long to,
+          List<T> result)
+  {
+
+    int startIndex = findFirstEnd(intervals, from);
+    final int startIndex1 = startIndex;
+    int i = startIndex1;
+    while (i < intervals.size())
+    {
+      T sf = intervals.get(i);
+      if (sf.getBegin() > to)
+      {
+        break;
+      }
+      if (sf.getBegin() <= to && sf.getEnd() >= from)
+      {
+        result.add(sf);
+      }
+      i++;
+    }
+  }
+
+  @Override
+  public List<T> findOverlaps(long start, long end)
+  {
+
+    List<T> result = new ArrayList<>();
+
+    int n = intervals.size();
+    switch (n)
+    {
+    case 0:
+      return result;
+    case 1:
+      justCheckOne(intervals.get(0), start, end, result);
+      return result;
+    default:
+
+      if (isTainted)
+      {
+        orderedIntervalStarts = intervals.toArray(new IntervalI[0]);
+        linkFeatures(orderedIntervalStarts);
+        isTainted = false;
+      }
+      break;
+    }
+
+    if (end < start)
+    {
+      // just ensuring not tainted -- fully loaded
+      return result;
+    }
+
+    // (1) Find the closest feature to this position.
+
+    int index = getClosestFeature(orderedIntervalStarts, start);
+    IntervalI sf = (index < 0 ? null : orderedIntervalStarts[index]);
+
+    // (2) Traverse the containedBy field, checking for overlap.
+
+    while (sf != null)
+    {
+      if (sf.getEnd() >= start)
+      {
+        result.add((T) sf);
+      }
+      sf = sf.getContainedBy();
+    }
+
+    // (3) For an interval, find the last feature that starts in this interval,
+    // and add all features up through that feature.
+
+    if (end >= start)
+    {
+      // fill in with all features that start within this interval, fully
+      // inclusive
+      int index2 = getClosestFeature(orderedIntervalStarts, end);
+      while (++index <= index2)
+      {
+        result.add((T) orderedIntervalStarts[index]);
+      }
+
+    }
+    return result;
+  }
+
+  private int getClosestFeature(IntervalI[] l, long pos)
+  {
+    int low = 0;
+    int high = l.length - 1;
+    while (low <= high)
+    {
+      int mid = (low + high) >>> 1;
+      IntervalI f = l[mid];
+      switch (Long.signum(f.getBegin() - pos))
+      {
+      case -1:
+        low = mid + 1;
+        continue;
+      case 1:
+        high = mid - 1;
+        continue;
+      case 0:
+
+        while (++mid <= high && l[mid].getBegin() == pos)
+        {
+          ;
+        }
+        return --mid;
+      }
+    }
+    return (high < 0 ? -1 : high);
+  }
+
+  @SuppressWarnings("unchecked")
+  public T getContainedBy(T sf, T sf0)
+  {
+    int begin = sf0.getBegin();
+    while (sf != null)
+    {
+      if (begin <= sf.getEnd())
+      {
+        // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0
+        // + "\nFS in " + sf.getIndex1() + ":" + sf);
+        return sf;
+      }
+      sf = (T) sf.getContainedBy();
+    }
+    return null;
+  }
+
+  @Override
+  public int getDepth()
+  {
+    if (size() == 0)
+    {
+      return 0;
+    }
+    int maxDepth = 0;
+    for (int i = intervals.size(); --i >= 0;)
+    {
+      T element = intervals.get(i);
+      T container = element;
+      int depth = 0;
+      while ((container = (T) container.getContainedBy()) != null)
+      {
+        if (++depth > maxDepth && container == this)
+        {
+          maxDepth = depth;
+          break;
+        }
+      }
+    }
+    return maxDepth;
+  }
+
+  @Override
+  public boolean isValid()
+  {
+    for (int i = 0; i < intervals.size() - 1; i++)
+    {
+      T i1 = intervals.get(i);
+      T i2 = intervals.get(i + 1);
+
+      if (i2.getBegin() < i1.getBegin())
+      {
+        System.err.println("nonNested wrong start order : " + i1.toString()
+                + ", " + i2.toString());
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Answers an iterator over the intervals in the store, with no particular
+   * ordering guaranteed. The iterator does not support the optional
+   * <code>remove</code> operation (throws
+   * <code>UnsupportedOperationException</code> if attempted).
+   */
+  @Override
+  public Iterator<T> iterator()
+  {
+    return intervals.iterator();
+
+  }
+
+  private void justCheckOne(T sf, long start, long end, List<T> result)
+  {
+    if (sf.getBegin() <= end && sf.getEnd() >= start)
+    {
+      result.add(sf);
+    }
+    return;
+  }
+
+  private void linkFeatures(IntervalI[] features)
+  {
+    int n = features.length;
+    switch (n)
+    {
+    case 0:
+      return;
+    case 1:
+      features[0].setIndex1(1);
+      return;
+    }
+    int maxEnd = features[0].getEnd();
+    for (int i = 1; i < n;)
+    {
+      T sf = (T) features[i];
+      sf.setIndex1(++i);
+      if (sf.getBegin() <= maxEnd)
+      {
+        sf.setContainedBy(getContainedBy((T) features[i - 2], sf));
+      }
+      if (sf.getEnd() > maxEnd)
+      {
+        maxEnd = sf.getEnd();
+      }
+    }
+
+  }
+
+  /**
+   * Answers true if the list contains the interval, else false. This method is
+   * optimised for the condition that the list is sorted on interval start
+   * position ascending, and will give unreliable results if this does not hold.
+   * 
+   * @param intervals
+   * @param entry
+   * @return
+   */
+  protected boolean listContains(List<T> intervals, Object entry)
+  {
+    if (intervals == null || entry == null)
+    {
+      return false;
+    }
+
+    T interval = (T) entry;
+
+    /*
+     * locate the first entry in the list which does not precede the interval
+     */
+    int pos = findFirstBegin(intervals, interval.getBegin());
+    int len = intervals.size();
+    while (pos < len)
+    {
+      T sf = intervals.get(pos);
+      if (sf.getBegin() > interval.getBegin())
+      {
+        return false; // no match found
+      }
+      if (sf.getEnd() == interval.getEnd() && sf.equals(interval))
+      {
+        return true;
+      }
+      pos++;
+    }
+    return false;
+  }
+
+  @Override
+  public String prettyPrint()
+  {
+    return toString();
+  }
+
+  @Override
+  public synchronized boolean remove(Object o)
+  {
+    if (o == null || !(o instanceof IntervalI))
+    {
+      return false;
+    }
+
+    T entry = (T) o;
+
+    /*
+     * try the non-nested positional intervals first
+     */
+    boolean removed = removeInterval(entry);
+    if (removed)
+    {
+      isTainted = true;
+    }
+
+    return removed;
+
+  }
+
+  /**
+   * Removes the given entry from the list of non-nested entries, returning true
+   * if found and removed, or false if not found. Specifically, removes the
+   * first item in the list for which <code>item.equals(entry)</code>.
+   * 
+   * @param entry
+   * @return
+   */
+  protected boolean removeInterval(T entry)
+  {
+    /*
+     * find the first interval that might match, i.e. whose 
+     * start position is not less than the target range start
+     * (NB inequality test ensures the first match if any is found)
+     */
+    int startIndex = findFirstBegin(intervals, entry.getBegin());
+
+    /*
+     * traverse intervals to look for a match
+     */
+    int from = entry.getBegin();
+    int i = startIndex;
+    int size = intervals.size();
+    while (i < size)
+    {
+      T sf = intervals.get(i);
+      if (sf.getBegin() > from)
+      {
+        break;
+      }
+      if (sf.equals(entry))
+      {
+        intervals.remove(i);
+        return true;
+      }
+      i++;
+    }
+    return false;
+  }
+
+  @Override
+  public int size()
+  {
+    return intervals.size();
+  }
+
+  @Override
+  public String toString()
+  {
+    ensureFinalized();
+    StringBuffer sb = new StringBuffer();
+
+    return sb.toString();
+  }
+
+}
diff --git a/unused/nonc/IntervalStoreI.java b/unused/nonc/IntervalStoreI.java
new file mode 100644 (file)
index 0000000..f925a15
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2018, Mungo Carstairs
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package intervalstore.api;
+
+import java.util.Collection;
+import java.util.List;
+
+import intervalstore.impl.NCList;
+
+public interface IntervalStoreI<T extends IntervalI> extends Collection<T>
+{
+
+  /**
+   * Returns a (possibly empty) list of items whose extent overlaps the given
+   * range
+   * 
+   * @param from
+   *          start of overlap range (inclusive)
+   * @param to
+   *          end of overlap range (inclusive)
+   * @return
+   */
+  List<T> findOverlaps(long from, long to);
+
+  /**
+   * Ensures that the IntervalStore is ready for findOverlap.
+   * 
+   * @return true iff the data held satisfy the rules of construction of an
+   *         IntervalStore.
+   * 
+   */
+  boolean isValid();
+
+  /**
+   * Answers the level of nesting of intervals in the store, as
+   * <ul>
+   * <li>0 if the store is empty</li>
+   * <li>1 if all intervals are 'top level' (non nested)</li>
+   * <li>else 1 plus the depth of the enclosed NCList</li>
+   * </ul>
+   * 
+   * @return
+   * @see NCList#getDepth()
+   */
+
+  int getDepth();
+
+  /**
+   * Return the number of top-level (not-contained) intervals.
+   * 
+   * @return
+   */
+  int getWidth();
+
+  List<T> findOverlaps(long start, long end, List<T> result);
+
+  String prettyPrint();
+
+  /**
+   * Resort and rebuild links.
+   * 
+   * @return
+   */
+  boolean revalidate();
+
+  IntervalI get(int i);
+
+}
\ No newline at end of file