JAL-3210 Improvements to eclipse detection. New src tree and SwingJS updated from...
[jalview.git] / src / jalview / datamodel / features / FeatureStoreImpl.java
diff --git a/src/jalview/datamodel/features/FeatureStoreImpl.java b/src/jalview/datamodel/features/FeatureStoreImpl.java
new file mode 100644 (file)
index 0000000..a90755d
--- /dev/null
@@ -0,0 +1,304 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.datamodel.features;
+
+import jalview.datamodel.SequenceFeature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import intervalstore.api.IntervalStoreI;
+import intervalstore.impl.BinarySearcher;
+
+/**
+ * A data store for a set of sequence features that supports efficient lookup of
+ * features overlapping a given range. Intended for (but not limited to) storage
+ * of features for one sequence and feature type.
+ * 
+ * @author gmcarstairs
+ *
+ */
+public class FeatureStoreImpl extends FeatureStore
+{
+
+  /**
+   * Default constructor uses NCList
+   */
+  public FeatureStoreImpl()
+  {
+    this(true);
+  }
+
+  public FeatureStoreImpl(boolean useNCList)
+  {
+    features = (useNCList ? new intervalstore.impl.IntervalStore<>()
+            : new intervalstore.nonc.IntervalStore<>(false));
+  }
+
+  /**
+   * Add a contact feature to the lists that hold them ordered by start (first
+   * contact) and by end (second contact) position, ensuring the lists remain
+   * ordered, and returns true. This method allows duplicate features to be
+   * added, so test before calling to avoid this.
+   * 
+   * @param feature
+   * @return
+   */
+  @Override
+  protected synchronized boolean addContactFeature(SequenceFeature feature)
+  {
+    if (contactFeatureStarts == null)
+    {
+      contactFeatureStarts = new ArrayList<>();
+      contactFeatureEnds = new ArrayList<>();
+    }
+
+    /*
+     * insert into list sorted by start (first contact position):
+     * binary search the sorted list to find the insertion point
+     */
+    int insertPosition = findFirstBeginStatic(contactFeatureStarts,
+            feature.getBegin());
+    contactFeatureStarts.add(insertPosition, feature);
+
+    /*
+     * insert into list sorted by end (second contact position):
+     * binary search the sorted list to find the insertion point
+     */
+    insertPosition = findFirstEndStatic(contactFeatureEnds,
+            feature.getEnd());
+    contactFeatureEnds.add(insertPosition, feature);
+
+    return true;
+  }
+
+  /**
+   * Adds one feature to the IntervalStore that can manage nested features
+   * (creating the IntervalStore if necessary)
+   */
+  @Override
+  protected synchronized boolean addPositionalFeature(
+          SequenceFeature feature)
+  {
+    return features.add(feature);
+  }
+
+  /**
+   * Adds contact features to the result list where either the second or the
+   * first contact position lies within the target range
+   * 
+   * @param from
+   * @param to
+   * @param result
+   */
+  @Override
+  protected void findContactFeatures(long from, long to,
+          List<SequenceFeature> result)
+  {
+    if (contactFeatureStarts != null)
+    {
+      findContactStartOverlaps(from, to, result);
+      findContactEndOverlaps(from, to, result);
+    }
+  }
+
+  @Override
+  protected boolean containsFeature(SequenceFeature feature)
+  {
+    return features.contains(feature);
+  }
+
+  /**
+   * Adds to the result list any contact features whose end (second contact
+   * point), but not start (first contact point), lies in the query from-to
+   * range
+   * 
+   * @param from
+   * @param to
+   * @param result
+   */
+
+  private void findContactEndOverlaps(long from, long to,
+          List<SequenceFeature> result)
+  {
+    /*
+     * find the first contact feature (if any) 
+     * whose end point is not before the target range
+     */
+    int index = findFirstEndStatic(contactFeatureEnds, from);
+
+    int n = contactFeatureEnds.size();
+    while (index < n)
+    {
+      SequenceFeature sf = contactFeatureEnds.get(index);
+      if (!sf.isContactFeature())
+      {
+        System.err.println("Error! non-contact feature type " + sf.getType()
+                + " in contact features list");
+        index++;
+        continue;
+      }
+
+      int begin = sf.getBegin();
+      if (begin >= from && begin <= to)
+      {
+        /*
+         * this feature's first contact position lies in the search range
+         * so we don't include it in results a second time
+         */
+        index++;
+        continue;
+      }
+
+      if (sf.getEnd() > to)
+      {
+        /*
+         * this feature (and all following) has end point after the target range
+         */
+        break;
+      }
+
+      /*
+       * feature has end >= from and end <= to
+       * i.e. contact end point lies within overlap search range
+       */
+      result.add(sf);
+      index++;
+    }
+  }
+
+  /**
+   * Adds contact features whose start position lies in the from-to range to the
+   * result list
+   * 
+   * @param from
+   * @param to
+   * @param result
+   */
+
+  private void findContactStartOverlaps(long from, long to,
+          List<SequenceFeature> result)
+  {
+    int index = findFirstBegin(contactFeatureStarts, from);
+
+    while (index < contactFeatureStarts.size())
+    {
+      SequenceFeature sf = contactFeatureStarts.get(index);
+      if (!sf.isContactFeature())
+      {
+        System.err.println("Error! non-contact feature " + sf.toString()
+                + " in contact features list");
+        index++;
+        continue;
+      }
+      if (sf.getBegin() > to)
+      {
+        /*
+         * this feature's start (and all following) follows the target range
+         */
+        break;
+      }
+
+      /*
+       * feature has begin >= from and begin <= to
+       * i.e. contact start point lies within overlap search range
+       */
+      result.add(sf);
+      index++;
+    }
+  }
+
+  /**
+   * Returns a (possibly empty) list of features whose extent overlaps the given
+   * range. The returned list is not ordered. Contact features are included if
+   * either of the contact points lies within the range.
+   * 
+   * @param start
+   *          start position of overlap range (inclusive)
+   * @param end
+   *          end position of overlap range (inclusive)
+   * @param result
+   *          ignored
+   * @return
+   */
+
+  @Override
+  public List<SequenceFeature> findOverlappingFeatures(long start, long end,
+          List<SequenceFeature> result)
+  {
+    result = new ArrayList<>();
+    findContactFeatures(start, end, result);
+    findOverlaps(start, end, result);
+    return result;
+  }
+
+  private void findOverlaps(long start, long end,
+          List<SequenceFeature> result)
+  {
+    result.addAll(((IntervalStoreI<SequenceFeature>) features)
+            .findOverlaps(start, end));
+  }
+
+  @Override
+  protected int findFirstBegin(List<SequenceFeature> list, long pos)
+  {
+    return findFirstBeginStatic(list, pos);
+  }
+
+  /**
+   * Possibly a bit faster using a static method.
+   * 
+   * @param list
+   * @param pos
+   * @return
+   */
+  private static int findFirstBeginStatic(List<SequenceFeature> list,
+          long pos)
+  {
+    return BinarySearcher.findFirst(list, f -> f.getBegin() >= pos);
+  }
+
+  @Override
+  protected int findFirstEnd(List<SequenceFeature> list, long pos)
+  {
+    return findFirstEndStatic(list, pos);
+  }
+
+  /**
+   * Possibly a bit faster using a static method.
+   * 
+   * @param list
+   * @param pos
+   * @return
+   */
+  private static int findFirstEndStatic(List<SequenceFeature> list,
+          long pos)
+  {
+    return BinarySearcher.findFirst(list, f -> f.getEnd() >= pos);
+  }
+
+  @Override
+  protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
+  {
+    return features.remove(sf);
+  }
+
+}