--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.datamodel.features;
+
+import jalview.datamodel.SequenceFeature;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * An adaption of FeatureStore that is efficient and lightweight, accelerating
+ * processing speed in JavaScript.
+ *
+ * @author gmcarstairs
+ * @author Bob Hanson 2019.08.03
+ *
+ */
+public class FeatureStoreJS extends FeatureStore
+{
+ boolean contactsTainted = true;
+
+ /**
+ * Constructor
+ */
+ public FeatureStoreJS()
+ {
+ features = new ArrayList<>();
+ }
+
+ /**
+ * Returns a (possibly empty) list of features whose extent overlaps the given
+ * range. The returned list is not ordered. Contact features are included if
+ * either of the contact points lies within the range.
+ *
+ * @param start
+ * start position of overlap range (inclusive)
+ * @param end
+ * end position of overlap range (inclusive)
+ * @return
+ */
+
+ @Override
+ public List<SequenceFeature> findOverlappingFeatures(long start, long end,
+ List<SequenceFeature> result)
+ {
+ if (result == null)
+ {
+ result = new ArrayList<>();
+ }
+ if (contactFeatureStarts != null)
+ {
+ if (start == end)
+ {
+ findContactPoints(contactFeatureStarts, start, result, true);
+ findContactPoints(contactFeatureEnds, start, result, false);
+ }
+ else
+ {
+ findContactFeatures(start, end, result);
+ }
+ }
+ if (features.size() > 0)
+ {
+ findOverlaps(start, end, result);
+ }
+ return result;
+ }
+
+ // The following methods use a linked list of containment in SequenceFeature
+ // rather than IntervalStore.
+ //
+ // There are two parts --- initialization, and overlap searching.
+ //
+ // Initialization involves two steps:
+ //
+ // (1) sorting of features by start position using a standard Array.sort with
+ // Comparator.
+ // (2) linking of features, effectively nesting them.
+ //
+ // Searching involves three steps:
+ //
+ // (1) binary search for a starting point within the sorted features array.
+ // (2) traverse the linked lists with an end check to read out the
+ // overlapped features at this position.
+ // (3) For an interval, find the last feature that starts in this interval,
+ // and add all features up through that feature.
+ //
+ // All of this is done with very simple standard methods.
+
+ // Initialization
+
+ /*
+ * contact features ordered by first contact position
+ */
+ private SequenceFeature[] orderedFeatureStarts;
+
+ private void rebuildArrays(int n)
+ {
+ if (startComp == null)
+ {
+ startComp = new StartComparator();
+ }
+ orderedFeatureStarts = new SequenceFeature[n];
+ for (int i = n; --i >= 0;)
+ {
+ SequenceFeature sf = ((ArrayList<SequenceFeature>) features).get(i);
+ sf.index = i; // for debugging only
+ orderedFeatureStarts[i] = sf;
+ }
+ Arrays.sort(orderedFeatureStarts, startComp);
+ linkFeatures(orderedFeatureStarts);
+ }
+
+ /**
+ * just a standard Comparator
+ */
+ private static StartComparator startComp;
+
+ class StartComparator implements Comparator<SequenceFeature>
+ {
+
+ @Override
+ public int compare(SequenceFeature o1, SequenceFeature o2)
+ {
+ int p1 = o1.begin;
+ int p2 = o2.begin;
+ return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0);
+ }
+
+ }
+
+ /**
+ * Run through the sorted sequence array once, building the containedBy linked
+ * list references. Does a check first to make sure there is actually
+ * something out there that is overlapping. A null for sf.containedBy means
+ * there are no overlaps for this feature.
+ *
+ * @param intervals
+ */
+ private void linkFeatures(SequenceFeature[] intervals)
+ {
+ if (intervals.length < 2)
+ {
+ return;
+ }
+ int maxEnd = intervals[0].end;
+ for (int i = 1, n = intervals.length; i < n; i++)
+ {
+ SequenceFeature sf = intervals[i];
+ if (sf.begin <= maxEnd)
+ {
+ sf.containedBy = getContainedBy(intervals[i - 1], sf);
+ }
+ if (sf.end > maxEnd)
+ {
+ maxEnd = sf.end;
+ }
+ }
+ }
+
+ /**
+ * Since we are traversing the sorted feature array in a forward direction,
+ * all elements prior to the one we are working on have been fully linked. All
+ * we are doing is following those links until we find the first array feature
+ * with a containedBy element that has an end >= our begin point. It is
+ * generally a very short list -- maybe one or two depths. But it might be
+ * more than that.
+ *
+ * @param sf
+ * @param sf0
+ * @return
+ */
+ private SequenceFeature getContainedBy(SequenceFeature sf,
+ SequenceFeature sf0)
+ {
+ int begin = sf0.begin;
+ while (sf != null)
+ {
+ if (begin <= sf.end)
+ {
+ // System.out.println("\nFS found " + sf0.index + ":" + sf0
+ // + "\nFS in " + sf.index + ":" + sf);
+ return sf;
+ }
+ sf = sf.containedBy;
+ }
+ return null;
+ }
+
+ // search-stage methods
+
+ /**
+ * Binary search for contact start or end at a given (Overview) position.
+ *
+ * @param l
+ * @param pos
+ * @param result
+ * @param isStart
+ *
+ * @author Bob Hanson 2019.07.30
+ */
+ private static void findContactPoints(List<SequenceFeature> l, long pos,
+ List<SequenceFeature> result, boolean isStart)
+ {
+ int low = 0;
+ int high = l.size() - 1;
+ while (low <= high)
+ {
+ int mid = (low + high) >>> 1;
+ SequenceFeature f = l.get(mid);
+ switch (Long.signum((isStart ? f.begin : f.end) - pos))
+ {
+ case -1:
+ low = mid + 1;
+ continue;
+ case 1:
+ high = mid - 1;
+ continue;
+ case 0:
+ int m = mid;
+ result.add(f);
+ // could be "5" in 12345556788 ?
+ while (++mid <= high && (f = l.get(mid)) != null
+ && (isStart ? f.begin : f.end) == pos)
+ {
+ result.add(f);
+ }
+ while (--m >= low && (f = l.get(m)) != null
+ && (isStart ? f.begin : f.end) == pos)
+ {
+ result.add(f);
+ }
+ return;
+ }
+ }
+ }
+
+ /**
+ * Find all overlaps; special case when there is only one feature. The
+ * required array of start-sorted SequenceFeature is created lazily.
+ *
+ * @param features
+ * @param pos
+ * @param result
+ */
+ private void findOverlaps(long start, long end,
+ List<SequenceFeature> result)
+ {
+ int n = features.size();
+ switch (n)
+ {
+ case 0:
+ return;
+ case 1:
+ checkOne(((ArrayList<SequenceFeature>) features).get(0), start, end,
+ result);
+ return;
+ default:
+ if (orderedFeatureStarts == null)
+ {
+ rebuildArrays(n);
+ }
+ break;
+ }
+
+ // (1) Find the closest feature to this position.
+
+ int index = findClosestFeature(orderedFeatureStarts, start);
+ SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
+
+ // (2) Traverse the containedBy field, checking for overlap.
+
+ while (sf != null)
+ {
+ if (sf.end >= start)
+ {
+ result.add(sf);
+ }
+ sf = sf.containedBy;
+ }
+
+ // (3) For an interval, find the last feature that starts in this interval,
+ // and add all features up through that feature.
+
+ if (end > start)
+ {
+ // fill in with all features that start within this interval, fully
+ // inclusive
+ int index2 = findClosestFeature(orderedFeatureStarts, end);
+ while (++index <= index2)
+ {
+ result.add(orderedFeatureStarts[index]);
+ }
+
+ }
+ }
+
+ /**
+ * Quick check when we only have one feature.
+ *
+ * @param sf
+ * @param start
+ * @param end
+ * @param result
+ */
+ private void checkOne(SequenceFeature sf, long start, long end,
+ List<SequenceFeature> result)
+ {
+ if (sf.begin <= end && sf.end >= start)
+ {
+ result.add(sf);
+ }
+ return;
+ }
+
+ /**
+ * A binary search identical to the one used for contact start/end, but here
+ * we return the feature itself. Unlike Collection.BinarySearch, all we have
+ * to be is close, not exact, and we make sure if there is a string of
+ * identical starts, then we slide to the end so that we can check all of
+ * them.
+ *
+ * @param l
+ * @param pos
+ * @return
+ */
+ private int findClosestFeature(SequenceFeature[] l, long pos)
+ {
+ int low = 0;
+ int high = l.length - 1;
+ while (low <= high)
+ {
+ int mid = (low + high) >>> 1;
+ SequenceFeature f = l[mid];
+ switch (Long.signum(f.begin - pos))
+ {
+ case -1:
+ low = mid + 1;
+ continue;
+ case 1:
+ high = mid - 1;
+ continue;
+ case 0:
+
+ while (++mid <= high && l[mid].begin == pos)
+ {
+ ;
+ }
+ return --mid;
+ }
+ }
+ return (high < 0 ? -1 : high);
+ }
+
+ /**
+ * Adds contact features to the result list where either the second or the
+ * first contact position lies within the target range
+ *
+ * @param from
+ * @param to
+ * @param result
+ */
+ @Override
+ protected void findContactFeatures(long from, long to,
+ List<SequenceFeature> result)
+ {
+ findContactStartOverlaps(from, to, result);
+ findContactEndOverlaps(from, to, result);
+ }
+
+ /**
+ * Adds to the result list any contact features whose end (second contact
+ * point), but not start (first contact point), lies in the query from-to
+ * range
+ *
+ * @param from
+ * @param to
+ * @param result
+ */
+
+ private void findContactEndOverlaps(long from, long to,
+ List<SequenceFeature> result)
+ {
+ // find the first contact feature (if any)
+ // with end point not before the target range
+
+ for (int i = findFirstEnd(contactFeatureEnds,
+ from), n = contactFeatureEnds.size(); i < n; i++)
+ {
+ SequenceFeature sf = contactFeatureEnds.get(i);
+ if (sf.begin >= from && sf.begin <= to)
+ {
+ // this feature's first contact position lies in the search range
+ // so we don't include it in results a second time
+ continue;
+ }
+
+ if (sf.end > to)
+ {
+ // this feature (and all following) has end point after the target range
+ break;
+ }
+
+ // feature has end >= from and end <= to
+ // i.e. contact end point lies within overlap search range
+ result.add(sf);
+ }
+ }
+
+ /**
+ * Adds contact features whose start position lies in the from-to range to the
+ * result list
+ *
+ * @param from
+ * @param to
+ * @param result
+ */
+
+ private void findContactStartOverlaps(long from, long to,
+ List<SequenceFeature> result)
+ {
+ for (int i = findFirstBegin(contactFeatureStarts,
+ from), n = contactFeatureStarts.size(); i < n; i++)
+ {
+ SequenceFeature sf = contactFeatureStarts.get(i);
+ if (sf.begin > to)
+ {
+ break;
+ }
+ result.add(sf);
+ }
+ }
+
+ public static int findFirstBegin(List<SequenceFeature> list, long pos)
+ {
+ int start = 0;
+ int end = list.size() - 1;
+ int matched = list.size();
+
+ while (start <= end)
+ {
+ int mid = (start + end) / 2;
+ if (list.get(mid).begin >= pos)
+ {
+ matched = mid;
+ end = mid - 1;
+ }
+ else
+ {
+ start = mid + 1;
+ }
+ }
+ return matched;
+ }
+
+ public static int findFirstEnd(List<SequenceFeature> list, long pos)
+ {
+ int start = 0;
+ int end = list.size() - 1;
+ int matched = list.size();
+
+ while (start <= end)
+ {
+ int mid = (start + end) / 2;
+ if (list.get(mid).end >= pos)
+ {
+ matched = mid;
+ end = mid - 1;
+ }
+ else
+ {
+ start = mid + 1;
+ }
+ }
+ return matched;
+ }
+
+
+}