/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; import java.util.List; /** * An adaption of FeatureStore that is efficient and lightweight, accelerating * processing speed in JavaScript. * * @author gmcarstairs * @author Bob Hanson 2019.08.03 * */ public class FeatureStoreJS extends FeatureStore { boolean contactsTainted = true; /** * internal reference to features as an ArrayList */ private ArrayList featureList; /** * contact features ordered by first contact position */ private SequenceFeature[] orderedFeatureStarts; /** * indicates that we need to rebuild orderedFeatureStarts and reset index * fields */ private boolean isTainted = true; /** * Constructor */ public FeatureStoreJS() { features = featureList = new ArrayList<>(); } /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain * ordered, and returns true. This method allows duplicate features to be * added, so test before calling to avoid this. * * @param feature * @return */ @Override protected synchronized boolean addContactFeature(SequenceFeature feature) { if (contactFeatureStarts == null) { contactFeatureStarts = new ArrayList<>(); contactFeatureEnds = new ArrayList<>(); } contactFeatureStarts.add( findFirstBegin(contactFeatureStarts, feature.begin), feature); contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end), feature); return true; } // The following methods use a linked list of containment in SequenceFeature // rather than IntervalStore. // // There are two parts --- initialization, and overlap searching. // // Initialization involves two steps: // // (1) sorting of features by start position using a standard Array.sort with // Comparator. // (2) linking of features, effectively nesting them. // // Searching involves three steps: // // (1) binary search for a starting point within the sorted features array. // (2) traverse the linked lists with an end check to read out the // overlapped features at this position. // (3) For an interval, find the last feature that starts in this interval, // and add all features up through that feature. // // All of this is done with very simple standard methods. // Initialization /** * Adds one feature to the IntervalStore that can manage nested features * (creating the IntervalStore if necessary) */ @Override protected synchronized void addPositionalFeature(SequenceFeature feature) { featureList.add(findFirstBegin(featureList, feature.begin), feature); isTainted = true; } @Override protected boolean containsFeature(SequenceFeature feature) { return getEquivalentFeatureIndex(featureList, feature) >= 0; } @Override protected boolean findAndRemoveNonContactFeature(SequenceFeature sf) { int pos = getEquivalentFeatureIndex(featureList, sf); if (pos < 0) { return false; } featureList.remove(pos); return (isTainted = true); } /** * Adds contact features to the result list where either the second or the * first contact position lies within the target range * * @param from * @param to * @param result */ @Override protected void findContactFeatures(long from, long to, List result) { getContactStartOverlaps(from, to, result); getContactEndOverlaps(from, to, result); } /** * Locate the first feature start that is at or after this position. * */ static int ntotal = 0, ncaught = 0; @Override protected int findFirstBegin(List list, long pos) { int matched = list.size(); int end = matched - 1; int start = (end < 0 || list.get(end).begin < pos ? matched : 0); // if (end < 0) // { // // } // else if (start > end) // { // ncaught++; // } // ntotal++; // // System.out.println( // "FSJS " + ncaught + "/" + ntotal + " " + pos + "/" // + (end < 0 ? -1 : list.get(end).begin) + " " // + start + "/" + matched); while (start <= end) { int mid = (start + end) / 2; if (list.get(mid).begin >= pos) { matched = mid; end = mid - 1; } else { start = mid + 1; } } return matched; } /** * Locate the feature start that is after or at this position. * */ @Override protected int findFirstEnd(List list, long pos) { int start = 0; int end = list.size() - 1; int matched = list.size(); while (start <= end) { int mid = (start + end) / 2; if (list.get(mid).end >= pos) { matched = mid; end = mid - 1; } else { start = mid + 1; } } return matched; } /** * Returns a (possibly empty) list of features whose extent overlaps the given * range. The returned list is not ordered. Contact features are included if * either of the contact points lies within the range. * * @param start * start position of overlap range (inclusive) * @param end * end position of overlap range (inclusive) * @return */ @Override public List findOverlappingFeatures(long start, long end, List result) { if (result == null) { result = new ArrayList<>(); } if (contactFeatureStarts != null) { if (start == end) { getContactPoints(contactFeatureStarts, start, result, true); getContactPoints(contactFeatureEnds, start, result, false); } else { findContactFeatures(start, end, result); } } if (featureList.size() > 0) { getOverlaps(start, end, result); } return result; } /** * A binary search identical to the one used for contact start/end, but here * we return the feature itself. Unlike Collection.BinarySearch, all we have * to be is close, not exact, and we make sure if there is a string of * identical starts, then we slide to the end so that we can check all of * them. * * @param l * @param pos * @return */ private int getClosestFeature(SequenceFeature[] l, long pos) { int low = 0; int high = l.length - 1; while (low <= high) { int mid = (low + high) >>> 1; SequenceFeature f = l[mid]; switch (Long.signum(f.begin - pos)) { case -1: low = mid + 1; continue; case 1: high = mid - 1; continue; case 0: while (++mid <= high && l[mid].begin == pos) { ; } return --mid; } } return (high < 0 ? -1 : high); } /** * Adds to the result list any contact features whose end (second contact * point), but not start (first contact point), lies in the query from-to * range * * @param from * @param to * @param result */ private void getContactEndOverlaps(long from, long to, List result) { // find the first contact feature (if any) // with end point not before the target range for (int i = findFirstEnd(contactFeatureEnds, from), n = contactFeatureEnds.size(); i < n; i++) { SequenceFeature sf = contactFeatureEnds.get(i); if (sf.begin >= from && sf.begin <= to) { // this feature's first contact position lies in the search range // so we don't include it in results a second time continue; } if (sf.end > to) { // this feature (and all following) has end point after the target range break; } // feature has end >= from and end <= to // i.e. contact end point lies within overlap search range result.add(sf); } } /** * Binary search for contact start or end at a given (Overview) position. * * @param l * @param pos * @param result * @param isStart * * @author Bob Hanson 2019.07.30 */ private void getContactPoints(List l, long pos, List result, boolean isStart) { int low = 0; int high = l.size() - 1; while (low <= high) { int mid = (low + high) >>> 1; SequenceFeature f = l.get(mid); switch (Long.signum((isStart ? f.begin : f.end) - pos)) { case -1: low = mid + 1; continue; case 1: high = mid - 1; continue; case 0: int m = mid; result.add(f); // could be "5" in 12345556788 ? while (++mid <= high && (f = l.get(mid)) != null && (isStart ? f.begin : f.end) == pos) { result.add(f); } while (--m >= low && (f = l.get(m)) != null && (isStart ? f.begin : f.end) == pos) { result.add(f); } return; } } } /** * Adds contact features whose start position lies in the from-to range to the * result list * * @param from * @param to * @param result */ private void getContactStartOverlaps(long from, long to, List result) { for (int i = findFirstBegin(contactFeatureStarts, from), n = contactFeatureStarts.size(); i < n; i++) { SequenceFeature sf = contactFeatureStarts.get(i); if (sf.begin > to) { break; } result.add(sf); } } /** * Since we are traversing the sorted feature array in a forward direction, * all elements prior to the one we are working on have been fully linked. All * we are doing is following those links until we find the first array feature * with a containedBy element that has an end >= our begin point. It is * generally a very short list -- maybe one or two depths. But it might be * more than that. * * @param sf * @param sf0 * @return */ private SequenceFeature getContainedBy(SequenceFeature sf, SequenceFeature sf0) { int begin = sf0.begin; while (sf != null) { if (begin <= sf.end) { // System.out.println("\nFSJS found " + sf0 + "\nFS in " + sf); return sf; } sf = sf.containedBy; } return null; } /** * Fast find of known features already on the list; slower removal of * equivalent feature, not necessarily identical. * * @param feature * @return 0-based index for this feature in featureList */ @Override protected int getEquivalentFeatureIndex(List list, SequenceFeature feature) { int pos = feature.index1 - 1; if (!isTainted && pos >= 0) { return pos; } return super.getEquivalentFeatureIndex(list, feature); } /** * Find all overlaps; special case when there is only one feature. The * required array of start-sorted SequenceFeature is created lazily. * * @param start * @param end * @param result */ private void getOverlaps(long start, long end, List result) { int n = featureList.size(); switch (n) { case 0: return; case 1: justCheckOne(featureList.get(0), start, end, result); return; default: if (isTainted) { orderedFeatureStarts = featureList .toArray(new SequenceFeature[featureList.size()]); linkFeatures(orderedFeatureStarts); isTainted = false; } break; } // (1) Find the closest feature to this position. int index = getClosestFeature(orderedFeatureStarts, start); SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]); // (2) Traverse the containedBy field, checking for overlap. while (sf != null) { if (sf.end >= start) { result.add(sf); } sf = sf.containedBy; } // (3) For an interval, find the last feature that starts in this interval, // and add all features up through that feature. if (end > start) { // fill in with all features that start within this interval, fully // inclusive int index2 = getClosestFeature(orderedFeatureStarts, end); while (++index <= index2) { result.add(orderedFeatureStarts[index]); } } } /** * Quick check when we only have one feature. * * @param sf * @param start * @param end * @param result */ private void justCheckOne(SequenceFeature sf, long start, long end, List result) { if (sf.begin <= end && sf.end >= start) { result.add(sf); } return; } /** * Run through the sorted sequence array once, building the containedBy linked * list references. Does a check first to make sure there is actually * something out there that is overlapping. A null for sf.containedBy means * there are no overlaps for this feature. * * @param features */ private void linkFeatures(SequenceFeature[] features) { int n = features.length; switch (n) { case 0: return; case 1: features[0].index1 = 1; return; } int maxEnd = features[0].end; for (int i = 1; i < n;) { SequenceFeature sf = features[i]; if (sf.begin <= maxEnd) { sf.containedBy = getContainedBy(features[i - 1], sf); } if (sf.end > maxEnd) { maxEnd = sf.end; } sf.index1 = ++i; } } }