2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.features;
23 import jalview.datamodel.SequenceFeature;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Comparator;
28 import java.util.List;
31 * An adaption of FeatureStore that is efficient and lightweight, accelerating
32 * processing speed in JavaScript.
35 * @author Bob Hanson 2019.08.03
38 public class FeatureStoreJS extends FeatureStore
40 boolean contactsTainted = true;
45 public FeatureStoreJS()
47 features = new ArrayList<>();
51 * Add a contact feature to the lists that hold them ordered by start (first
52 * contact) and by end (second contact) position, ensuring the lists remain
53 * ordered, and returns true. This method allows duplicate features to be
54 * added, so test before calling to avoid this.
60 protected synchronized boolean addContactFeature(SequenceFeature feature)
62 if (contactFeatureStarts == null)
64 contactFeatureStarts = new ArrayList<>();
65 contactFeatureEnds = new ArrayList<>();
67 contactFeatureStarts.add(
68 findFirstBegin(contactFeatureStarts, feature.begin), feature);
69 contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end),
75 * Returns a (possibly empty) list of features whose extent overlaps the given
76 * range. The returned list is not ordered. Contact features are included if
77 * either of the contact points lies within the range.
80 * start position of overlap range (inclusive)
82 * end position of overlap range (inclusive)
87 public List<SequenceFeature> findOverlappingFeatures(long start, long end,
88 List<SequenceFeature> result)
92 result = new ArrayList<>();
94 if (contactFeatureStarts != null)
98 findContactPoints(contactFeatureStarts, start, result, true);
99 findContactPoints(contactFeatureEnds, start, result, false);
103 findContactFeatures(start, end, result);
106 if (features.size() > 0)
108 findOverlaps(start, end, result);
113 // The following methods use a linked list of containment in SequenceFeature
114 // rather than IntervalStore.
116 // There are two parts --- initialization, and overlap searching.
118 // Initialization involves two steps:
120 // (1) sorting of features by start position using a standard Array.sort with
122 // (2) linking of features, effectively nesting them.
124 // Searching involves three steps:
126 // (1) binary search for a starting point within the sorted features array.
127 // (2) traverse the linked lists with an end check to read out the
128 // overlapped features at this position.
129 // (3) For an interval, find the last feature that starts in this interval,
130 // and add all features up through that feature.
132 // All of this is done with very simple standard methods.
137 * contact features ordered by first contact position
139 private SequenceFeature[] orderedFeatureStarts;
141 private void rebuildArrays(int n)
143 if (startComp == null)
145 startComp = new StartComparator();
147 orderedFeatureStarts = new SequenceFeature[n];
148 for (int i = n; --i >= 0;)
150 SequenceFeature sf = ((ArrayList<SequenceFeature>) features).get(i);
151 sf.index = i; // for debugging only
152 orderedFeatureStarts[i] = sf;
154 Arrays.sort(orderedFeatureStarts, startComp);
155 linkFeatures(orderedFeatureStarts);
159 * just a standard Comparator
161 private static StartComparator startComp;
163 class StartComparator implements Comparator<SequenceFeature>
167 public int compare(SequenceFeature o1, SequenceFeature o2)
171 return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0);
177 * Run through the sorted sequence array once, building the containedBy linked
178 * list references. Does a check first to make sure there is actually
179 * something out there that is overlapping. A null for sf.containedBy means
180 * there are no overlaps for this feature.
184 private void linkFeatures(SequenceFeature[] intervals)
186 if (intervals.length < 2)
190 int maxEnd = intervals[0].end;
191 for (int i = 1, n = intervals.length; i < n; i++)
193 SequenceFeature sf = intervals[i];
194 if (sf.begin <= maxEnd)
196 sf.containedBy = getContainedBy(intervals[i - 1], sf);
206 * Since we are traversing the sorted feature array in a forward direction,
207 * all elements prior to the one we are working on have been fully linked. All
208 * we are doing is following those links until we find the first array feature
209 * with a containedBy element that has an end >= our begin point. It is
210 * generally a very short list -- maybe one or two depths. But it might be
217 private SequenceFeature getContainedBy(SequenceFeature sf,
220 int begin = sf0.begin;
225 // System.out.println("\nFS found " + sf0.index + ":" + sf0
226 // + "\nFS in " + sf.index + ":" + sf);
234 // search-stage methods
237 * Binary search for contact start or end at a given (Overview) position.
244 * @author Bob Hanson 2019.07.30
246 private static void findContactPoints(List<SequenceFeature> l, long pos,
247 List<SequenceFeature> result, boolean isStart)
250 int high = l.size() - 1;
253 int mid = (low + high) >>> 1;
254 SequenceFeature f = l.get(mid);
255 switch (Long.signum((isStart ? f.begin : f.end) - pos))
266 // could be "5" in 12345556788 ?
267 while (++mid <= high && (f = l.get(mid)) != null
268 && (isStart ? f.begin : f.end) == pos)
272 while (--m >= low && (f = l.get(m)) != null
273 && (isStart ? f.begin : f.end) == pos)
283 * Find all overlaps; special case when there is only one feature. The
284 * required array of start-sorted SequenceFeature is created lazily.
290 private void findOverlaps(long start, long end,
291 List<SequenceFeature> result)
293 int n = features.size();
299 checkOne(((ArrayList<SequenceFeature>) features).get(0), start, end,
303 if (orderedFeatureStarts == null)
310 // (1) Find the closest feature to this position.
312 int index = findClosestFeature(orderedFeatureStarts, start);
313 SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
315 // (2) Traverse the containedBy field, checking for overlap.
326 // (3) For an interval, find the last feature that starts in this interval,
327 // and add all features up through that feature.
331 // fill in with all features that start within this interval, fully
333 int index2 = findClosestFeature(orderedFeatureStarts, end);
334 while (++index <= index2)
336 result.add(orderedFeatureStarts[index]);
343 * Quick check when we only have one feature.
350 private void checkOne(SequenceFeature sf, long start, long end,
351 List<SequenceFeature> result)
353 if (sf.begin <= end && sf.end >= start)
361 * A binary search identical to the one used for contact start/end, but here
362 * we return the feature itself. Unlike Collection.BinarySearch, all we have
363 * to be is close, not exact, and we make sure if there is a string of
364 * identical starts, then we slide to the end so that we can check all of
371 private int findClosestFeature(SequenceFeature[] l, long pos)
374 int high = l.length - 1;
377 int mid = (low + high) >>> 1;
378 SequenceFeature f = l[mid];
379 switch (Long.signum(f.begin - pos))
389 while (++mid <= high && l[mid].begin == pos)
396 return (high < 0 ? -1 : high);
400 * Adds contact features to the result list where either the second or the
401 * first contact position lies within the target range
408 protected void findContactFeatures(long from, long to,
409 List<SequenceFeature> result)
411 findContactStartOverlaps(from, to, result);
412 findContactEndOverlaps(from, to, result);
416 * Adds to the result list any contact features whose end (second contact
417 * point), but not start (first contact point), lies in the query from-to
425 private void findContactEndOverlaps(long from, long to,
426 List<SequenceFeature> result)
428 // find the first contact feature (if any)
429 // with end point not before the target range
431 for (int i = findFirstEnd(contactFeatureEnds,
432 from), n = contactFeatureEnds.size(); i < n; i++)
434 SequenceFeature sf = contactFeatureEnds.get(i);
435 if (sf.begin >= from && sf.begin <= to)
437 // this feature's first contact position lies in the search range
438 // so we don't include it in results a second time
444 // this feature (and all following) has end point after the target range
448 // feature has end >= from and end <= to
449 // i.e. contact end point lies within overlap search range
455 * Adds contact features whose start position lies in the from-to range to the
463 private void findContactStartOverlaps(long from, long to,
464 List<SequenceFeature> result)
466 for (int i = findFirstBegin(contactFeatureStarts,
467 from), n = contactFeatureStarts.size(); i < n; i++)
469 SequenceFeature sf = contactFeatureStarts.get(i);
479 protected int findFirstBegin(List<SequenceFeature> list, long pos)
482 int end = list.size() - 1;
483 int matched = list.size();
487 int mid = (start + end) / 2;
488 if (list.get(mid).begin >= pos)
502 protected int findFirstEnd(List<SequenceFeature> list, long pos)
505 int end = list.size() - 1;
506 int matched = list.size();
510 int mid = (start + end) / 2;
511 if (list.get(mid).end >= pos)