2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.features;
23 import jalview.datamodel.SequenceFeature;
25 import java.util.ArrayList;
26 import java.util.List;
28 import intervalstore.nonc.IntervalStore;
31 * An adaption of FeatureStore that is efficient and lightweight, accelerating
32 * processing speed in JavaScript.
35 * @author Bob Hanson 2019.08.03
38 public class FeatureStoreJS extends FeatureStore
40 boolean contactsTainted = true;
42 private IntervalStore<SequenceFeature> featureStore;
46 // * internal reference to features as an ArrayList
48 // private ArrayList<SequenceFeature> featureList;
51 // * contact features ordered by first contact position
53 // private SequenceFeature[] orderedFeatureStarts;
56 // * indicates that we need to rebuild orderedFeatureStarts and reset index
59 // private boolean isTainted = true;
64 public FeatureStoreJS()
66 // the only reference to features field in this class -- for the superclass
68 features = featureStore = new IntervalStore<>(); // featureList = new
73 * Add a contact feature to the lists that hold them ordered by start (first
74 * contact) and by end (second contact) position, ensuring the lists remain
75 * ordered, and returns true. This method allows duplicate features to be
76 * added, so test before calling to avoid this.
82 protected synchronized boolean addContactFeature(SequenceFeature feature)
84 if (contactFeatureStarts == null)
86 contactFeatureStarts = new ArrayList<>();
87 contactFeatureEnds = new ArrayList<>();
89 contactFeatureStarts.add(
90 findFirstBegin(contactFeatureStarts, feature.begin), feature);
91 contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end),
96 // The following methods use a linked list of containment in SequenceFeature
97 // rather than IntervalStore.
99 // There are two parts --- initialization, and overlap searching.
101 // Initialization involves two steps:
103 // (1) sorting of features by start position using a standard Array.sort with
105 // (2) linking of features, effectively nesting them.
107 // Searching involves three steps:
109 // (1) binary search for a starting point within the sorted features array.
110 // (2) traverse the linked lists with an end check to read out the
111 // overlapped features at this position.
112 // (3) For an interval, find the last feature that starts in this interval,
113 // and add all features up through that feature.
115 // All of this is done with very simple standard methods.
120 * Adds one feature to the IntervalStore that can manage nested features
121 * (creating the IntervalStore if necessary)
123 * @return false if could not add it (late check for contained
126 protected synchronized boolean addPositionalFeature(
127 SequenceFeature feature)
130 return featureStore.add(feature, false);
131 // features.add(feature);
132 // featureList.add(findFirstBegin(featureList, feature.begin), feature);
137 protected boolean checkContainsPositionalFeatureForAdd(
138 SequenceFeature feature)
140 // the non-NCList IntervalStore does this as part of the add for greater
141 // efficiency (one binary search)
146 protected boolean containsFeature(SequenceFeature feature)
148 return featureStore.contains(feature);
149 // return getEquivalentFeatureIndex(featureList, feature) >= 0;
153 protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
155 return featureStore.remove(sf);
156 // int pos = getEquivalentFeatureIndex(featureList, sf);
161 // featureList.remove(pos);
162 // return (isTainted = true);
166 * Adds contact features to the result list where either the second or the
167 * first contact position lies within the target range
174 protected void findContactFeatures(long from, long to,
175 List<SequenceFeature> result)
177 getContactStartOverlaps(from, to, result);
178 getContactEndOverlaps(from, to, result);
182 * Locate the first feature start that is at or after this position.
186 static int ntotal = 0, ncaught = 0;
189 protected int findFirstBegin(List<SequenceFeature> list, long pos)
191 int matched = list.size();
192 int end = matched - 1;
193 int start = (end < 0 || list.get(end).begin < pos ? matched : 0);
198 // else if (start > end)
204 // System.out.println(
205 // "FSJS " + ncaught + "/" + ntotal + " " + pos + "/"
206 // + (end < 0 ? -1 : list.get(end).begin) + " "
207 // + start + "/" + matched);
211 int mid = (start + end) / 2;
212 if (list.get(mid).begin >= pos)
226 * Locate the feature start that is after or at this position.
231 protected int findFirstEnd(List<SequenceFeature> list, long pos)
234 int end = list.size() - 1;
235 int matched = list.size();
239 int mid = (start + end) / 2;
240 if (list.get(mid).end >= pos)
254 * Returns a (possibly empty) list of features whose extent overlaps the given
255 * range. The returned list is not ordered. Contact features are included if
256 * either of the contact points lies within the range.
259 * start position of overlap range (inclusive)
261 * end position of overlap range (inclusive)
266 public List<SequenceFeature> findOverlappingFeatures(long start, long end,
267 List<SequenceFeature> result)
271 result = new ArrayList<>();
273 if (contactFeatureStarts != null)
277 getContactPoints(contactFeatureStarts, start, result, true);
278 getContactPoints(contactFeatureEnds, start, result, false);
282 findContactFeatures(start, end, result);
285 if (featureStore.size() > 0)
287 featureStore.findOverlaps(start, end, result);
288 // getOverlaps(start, end, result);
294 // * A binary search identical to the one used for contact start/end, but here
295 // * we return the feature itself. Unlike Collection.BinarySearch, all we have
296 // * to be is close, not exact, and we make sure if there is a string of
297 // * identical starts, then we slide to the end so that we can check all of
304 // private int getClosestFeature(SequenceFeature[] l, long pos)
307 // int high = l.length - 1;
308 // while (low <= high)
310 // int mid = (low + high) >>> 1;
311 // SequenceFeature f = l[mid];
312 // switch (Long.signum(f.begin - pos))
322 // while (++mid <= high && l[mid].begin == pos)
329 // return (high < 0 ? -1 : high);
333 * Adds to the result list any contact features whose end (second contact
334 * point), but not start (first contact point), lies in the query from-to
342 private void getContactEndOverlaps(long from, long to,
343 List<SequenceFeature> result)
345 // find the first contact feature (if any)
346 // with end point not before the target range
348 for (int i = findFirstEnd(contactFeatureEnds,
349 from), n = contactFeatureEnds.size(); i < n; i++)
351 SequenceFeature sf = contactFeatureEnds.get(i);
352 if (sf.begin >= from && sf.begin <= to)
354 // this feature's first contact position lies in the search range
355 // so we don't include it in results a second time
361 // this feature (and all following) has end point after the target range
365 // feature has end >= from and end <= to
366 // i.e. contact end point lies within overlap search range
372 * Binary search for contact start or end at a given (Overview) position.
379 * @author Bob Hanson 2019.07.30
381 private void getContactPoints(List<SequenceFeature> l, long pos,
382 List<SequenceFeature> result, boolean isStart)
385 int high = l.size() - 1;
388 int mid = (low + high) >>> 1;
389 SequenceFeature f = l.get(mid);
390 switch (Long.signum((isStart ? f.begin : f.end) - pos))
401 // could be "5" in 12345556788 ?
402 while (++mid <= high && (f = l.get(mid)) != null
403 && (isStart ? f.begin : f.end) == pos)
407 while (--m >= low && (f = l.get(m)) != null
408 && (isStart ? f.begin : f.end) == pos)
418 * Adds contact features whose start position lies in the from-to range to the
426 private void getContactStartOverlaps(long from, long to,
427 List<SequenceFeature> result)
429 for (int i = findFirstBegin(contactFeatureStarts,
430 from), n = contactFeatureStarts.size(); i < n; i++)
432 SequenceFeature sf = contactFeatureStarts.get(i);
442 // * Since we are traversing the sorted feature array in a forward direction,
443 // * all elements prior to the one we are working on have been fully linked.
445 // * we are doing is following those links until we find the first array
447 // * with a containedBy element that has an end >= our begin point. It is
448 // * generally a very short list -- maybe one or two depths. But it might be
455 // private SequenceFeature getContainedBy(SequenceFeature sf,
456 // SequenceFeature sf0)
458 // int begin = sf0.begin;
459 // while (sf != null)
461 // if (begin <= sf.end)
463 // // System.out.println("\nFSJS found " + sf0 + "\nFS in " + sf);
466 // sf = sf.containedBy;
472 // * Fast find of known features already on the list; slower removal of
473 // * equivalent feature, not necessarily identical.
476 // * @return 0-based index for this feature in featureList
479 // protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
480 // SequenceFeature feature)
482 // int pos = feature.index - 1;
483 // if (!isTainted && pos >= 0)
487 // return super.getEquivalentFeatureIndex(list, feature);
491 // * Find all overlaps; special case when there is only one feature. The
492 // * required array of start-sorted SequenceFeature is created lazily.
498 // private void getOverlaps(long start, long end,
499 // List<SequenceFeature> result)
501 // int n = featureList.size();
507 // justCheckOne(featureList.get(0), start, end, result);
512 // orderedFeatureStarts = featureList
513 // .toArray(new SequenceFeature[featureList.size()]);
514 // linkFeatures(orderedFeatureStarts);
515 // isTainted = false;
520 // // (1) Find the closest feature to this position.
522 // int index = getClosestFeature(orderedFeatureStarts, start);
523 // SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
525 // // (2) Traverse the containedBy field, checking for overlap.
527 // while (sf != null)
529 // if (sf.end >= start)
533 // sf = sf.containedBy;
536 // // (3) For an interval, find the last feature that starts in this interval,
537 // // and add all features up through that feature.
541 // // fill in with all features that start within this interval, fully
543 // int index2 = getClosestFeature(orderedFeatureStarts, end);
544 // while (++index <= index2)
546 // result.add(orderedFeatureStarts[index]);
553 // * Quick check when we only have one feature.
560 // private void justCheckOne(SequenceFeature sf, long start, long end,
561 // List<SequenceFeature> result)
563 // if (sf.begin <= end && sf.end >= start)
571 // * Run through the sorted sequence array once, building the containedBy
573 // * list references. Does a check first to make sure there is actually
574 // * something out there that is overlapping. A null for sf.containedBy means
575 // * there are no overlaps for this feature.
579 // private void linkFeatures(SequenceFeature[] features)
581 // int n = features.length;
587 // features[0].index = 1;
590 // int maxEnd = features[0].end;
591 // for (int i = 1; i < n;)
593 // SequenceFeature sf = features[i];
594 // if (sf.begin <= maxEnd)
596 // sf.containedBy = getContainedBy(features[i - 1], sf);
598 // if (sf.end > maxEnd)