2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.features;
23 import jalview.datamodel.SequenceFeature;
25 import java.util.ArrayList;
26 import java.util.List;
29 * An adaption of FeatureStore that is efficient and lightweight, accelerating
30 * processing speed in JavaScript.
33 * @author Bob Hanson 2019.08.03
36 public class FeatureStoreJS extends FeatureStore
38 boolean contactsTainted = true;
41 * internal reference to features as an ArrayList
43 private ArrayList<SequenceFeature> featureList;
46 * contact features ordered by first contact position
48 private SequenceFeature[] orderedFeatureStarts;
51 * indicates that we need to rebuild orderedFeatureStarts and reset index
54 private boolean isTainted = true;
59 public FeatureStoreJS()
61 features = featureList = new ArrayList<>();
65 * Add a contact feature to the lists that hold them ordered by start (first
66 * contact) and by end (second contact) position, ensuring the lists remain
67 * ordered, and returns true. This method allows duplicate features to be
68 * added, so test before calling to avoid this.
74 protected synchronized boolean addContactFeature(SequenceFeature feature)
76 if (contactFeatureStarts == null)
78 contactFeatureStarts = new ArrayList<>();
79 contactFeatureEnds = new ArrayList<>();
81 contactFeatureStarts.add(
82 findFirstBegin(contactFeatureStarts, feature.begin), feature);
83 contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end),
88 // The following methods use a linked list of containment in SequenceFeature
89 // rather than IntervalStore.
91 // There are two parts --- initialization, and overlap searching.
93 // Initialization involves two steps:
95 // (1) sorting of features by start position using a standard Array.sort with
97 // (2) linking of features, effectively nesting them.
99 // Searching involves three steps:
101 // (1) binary search for a starting point within the sorted features array.
102 // (2) traverse the linked lists with an end check to read out the
103 // overlapped features at this position.
104 // (3) For an interval, find the last feature that starts in this interval,
105 // and add all features up through that feature.
107 // All of this is done with very simple standard methods.
112 * Adds one feature to the IntervalStore that can manage nested features
113 * (creating the IntervalStore if necessary)
116 protected synchronized void addPositionalFeature(SequenceFeature feature)
118 featureList.add(findFirstBegin(featureList, feature.begin), feature);
123 protected boolean containsFeature(SequenceFeature feature)
126 return getEquivalentFeatureIndex(featureList, feature) >= 0;
130 protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
132 int pos = getEquivalentFeatureIndex(featureList, sf);
137 featureList.remove(pos);
138 return (isTainted = true);
142 * Adds contact features to the result list where either the second or the
143 * first contact position lies within the target range
150 protected void findContactFeatures(long from, long to,
151 List<SequenceFeature> result)
153 getContactStartOverlaps(from, to, result);
154 getContactEndOverlaps(from, to, result);
158 protected int findFirstBegin(List<SequenceFeature> list, long pos)
161 int end = list.size() - 1;
162 int matched = list.size();
166 int mid = (start + end) / 2;
167 if (list.get(mid).begin >= pos)
181 protected int findFirstEnd(List<SequenceFeature> list, long pos)
184 int end = list.size() - 1;
185 int matched = list.size();
189 int mid = (start + end) / 2;
190 if (list.get(mid).end >= pos)
204 * Returns a (possibly empty) list of features whose extent overlaps the given
205 * range. The returned list is not ordered. Contact features are included if
206 * either of the contact points lies within the range.
209 * start position of overlap range (inclusive)
211 * end position of overlap range (inclusive)
216 public List<SequenceFeature> findOverlappingFeatures(long start, long end,
217 List<SequenceFeature> result)
221 result = new ArrayList<>();
223 if (contactFeatureStarts != null)
227 getContactPoints(contactFeatureStarts, start, result, true);
228 getContactPoints(contactFeatureEnds, start, result, false);
232 findContactFeatures(start, end, result);
235 if (featureList.size() > 0)
237 getOverlaps(start, end, result);
243 * A binary search identical to the one used for contact start/end, but here
244 * we return the feature itself. Unlike Collection.BinarySearch, all we have
245 * to be is close, not exact, and we make sure if there is a string of
246 * identical starts, then we slide to the end so that we can check all of
253 private int getClosestFeature(SequenceFeature[] l, long pos)
256 int high = l.length - 1;
259 int mid = (low + high) >>> 1;
260 SequenceFeature f = l[mid];
261 switch (Long.signum(f.begin - pos))
271 while (++mid <= high && l[mid].begin == pos)
278 return (high < 0 ? -1 : high);
282 * Adds to the result list any contact features whose end (second contact
283 * point), but not start (first contact point), lies in the query from-to
291 private void getContactEndOverlaps(long from, long to,
292 List<SequenceFeature> result)
294 // find the first contact feature (if any)
295 // with end point not before the target range
297 for (int i = findFirstEnd(contactFeatureEnds,
298 from), n = contactFeatureEnds.size(); i < n; i++)
300 SequenceFeature sf = contactFeatureEnds.get(i);
301 if (sf.begin >= from && sf.begin <= to)
303 // this feature's first contact position lies in the search range
304 // so we don't include it in results a second time
310 // this feature (and all following) has end point after the target range
314 // feature has end >= from and end <= to
315 // i.e. contact end point lies within overlap search range
321 * Binary search for contact start or end at a given (Overview) position.
328 * @author Bob Hanson 2019.07.30
330 private void getContactPoints(List<SequenceFeature> l, long pos,
331 List<SequenceFeature> result, boolean isStart)
334 int high = l.size() - 1;
337 int mid = (low + high) >>> 1;
338 SequenceFeature f = l.get(mid);
339 switch (Long.signum((isStart ? f.begin : f.end) - pos))
350 // could be "5" in 12345556788 ?
351 while (++mid <= high && (f = l.get(mid)) != null
352 && (isStart ? f.begin : f.end) == pos)
356 while (--m >= low && (f = l.get(m)) != null
357 && (isStart ? f.begin : f.end) == pos)
367 * Adds contact features whose start position lies in the from-to range to the
375 private void getContactStartOverlaps(long from, long to,
376 List<SequenceFeature> result)
378 for (int i = findFirstBegin(contactFeatureStarts,
379 from), n = contactFeatureStarts.size(); i < n; i++)
381 SequenceFeature sf = contactFeatureStarts.get(i);
391 * Since we are traversing the sorted feature array in a forward direction,
392 * all elements prior to the one we are working on have been fully linked. All
393 * we are doing is following those links until we find the first array feature
394 * with a containedBy element that has an end >= our begin point. It is
395 * generally a very short list -- maybe one or two depths. But it might be
402 private SequenceFeature getContainedBy(SequenceFeature sf,
405 int begin = sf0.begin;
410 System.out.println("\nFS found " + sf0 + "\nFS in " + sf);
419 * Fast find of known features already on the list; slower removal of
420 * equivalent feature, not necessarily identical.
423 * @return 0-based index for this feature in featureList
426 protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
427 SequenceFeature feature)
429 int pos = feature.index1 - 1;
430 if (!isTainted && pos >= 0)
434 return super.getEquivalentFeatureIndex(list, feature);
438 * Find all overlaps; special case when there is only one feature. The
439 * required array of start-sorted SequenceFeature is created lazily.
445 private void getOverlaps(long start, long end,
446 List<SequenceFeature> result)
448 int n = featureList.size();
454 justCheckOne(featureList.get(0), start, end, result);
459 orderedFeatureStarts = featureList
460 .toArray(new SequenceFeature[featureList.size()]);
461 linkFeatures(orderedFeatureStarts);
467 // (1) Find the closest feature to this position.
469 int index = getClosestFeature(orderedFeatureStarts, start);
470 SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
472 // (2) Traverse the containedBy field, checking for overlap.
483 // (3) For an interval, find the last feature that starts in this interval,
484 // and add all features up through that feature.
488 // fill in with all features that start within this interval, fully
490 int index2 = getClosestFeature(orderedFeatureStarts, end);
491 while (++index <= index2)
493 result.add(orderedFeatureStarts[index]);
500 * Quick check when we only have one feature.
507 private void justCheckOne(SequenceFeature sf, long start, long end,
508 List<SequenceFeature> result)
510 if (sf.begin <= end && sf.end >= start)
518 * Run through the sorted sequence array once, building the containedBy linked
519 * list references. Does a check first to make sure there is actually
520 * something out there that is overlapping. A null for sf.containedBy means
521 * there are no overlaps for this feature.
525 private void linkFeatures(SequenceFeature[] features)
527 int n = features.length;
533 features[0].index1 = 1;
536 int maxEnd = features[0].end;
537 for (int i = 1; i < n;)
539 SequenceFeature sf = features[i];
540 if (sf.begin <= maxEnd)
542 sf.containedBy = getContainedBy(features[i - 1], sf);