From: hansonr Date: Fri, 16 Aug 2019 12:43:39 +0000 (-0500) Subject: JAL-3253-applet JAL-3397 JAL-3383 fast IntervalStore for JavaScript X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=6671b10825bea29ed703911822c18e25ce44f776;p=jalview.git JAL-3253-applet JAL-3397 JAL-3383 fast IntervalStore for JavaScript fully commented and cleaned code --- diff --git a/src/jalview/datamodel/features/FeatureStoreJS.java b/src/jalview/datamodel/features/FeatureStoreJS.java index c15b99e..dbf1413 100644 --- a/src/jalview/datamodel/features/FeatureStoreJS.java +++ b/src/jalview/datamodel/features/FeatureStoreJS.java @@ -31,52 +31,43 @@ import intervalstore.nonc.IntervalStore; * An adaption of FeatureStore that is efficient and lightweight, accelerating * processing speed in JavaScript. * + * It could be used in Java as well, with significant acceleration, but all this + * is so fast anyway that it probably will not be noticed in Java to speed it up + * by a factor of two or three. So for now, at least, this implementation is + * just in JavaScript. The flag for this is in SequenceFeatures. + * + * This implementation uses the IntervalStore developed by Bob Hanson, found at + * https://github.com/BobHanson/IntervalStoreJ, forked from the one developed by + * Mungo Carstairs at https://github.com/bartongroup/IntervalStoreJ. + * + * See the discussion folder at https://github.com/BobHanson/IntervalStoreJ for + * details. + * * @author gmcarstairs - * @author Bob Hanson 2019.08.03 + * @author Bob Hanson 2019.08.03-2019.08.16 * */ public class FeatureStoreJS extends FeatureStore { - boolean contactsTainted = true; - private IntervalStore featureStore; - // - // /** - // * internal reference to features as an ArrayList - // */ - // private ArrayList featureList; - // - // /** - // * contact features ordered by first contact position - // */ - // private SequenceFeature[] orderedFeatureStarts; - - // /** - // * indicates that we need to rebuild orderedFeatureStarts and reset index - // * fields - // */ - // private boolean isTainted = true; - - /** - * Constructor - */ public FeatureStoreJS() { // the only reference to features field in this class -- for the superclass - features = featureStore = new IntervalStore<>(); // featureList = new - // ArrayList<>(); + // linked-list no-NCList IntervalStore with presort + + features = featureStore = new IntervalStore<>(true); } /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain - * ordered, and returns true. This method allows duplicate features to be - * added, so test before calling to avoid this. + * ordered. This method allows duplicate features to be added, so test before + * calling to avoid this. * * @param feature - * @return + * @return true */ @Override protected synchronized boolean addContactFeature(SequenceFeature feature) @@ -93,78 +84,60 @@ public class FeatureStoreJS extends FeatureStore return true; } - // The following methods use a linked list of containment in SequenceFeature - // rather than IntervalStore. - // - // There are two parts --- initialization, and overlap searching. - // - // Initialization involves two steps: - // - // (1) sorting of features by start position using a standard Array.sort with - // Comparator. - // (2) linking of features, effectively nesting them. - // - // Searching involves three steps: - // - // (1) binary search for a starting point within the sorted features array. - // (2) traverse the linked lists with an end check to read out the - // overlapped features at this position. - // (3) For an interval, find the last feature that starts in this interval, - // and add all features up through that feature. - // - // All of this is done with very simple standard methods. - - // Initialization /** - * Adds one feature to the IntervalStore that can manage nested features - * (creating the IntervalStore if necessary) + * Add a feature to the IntervalStore, not allowing for duplicates. + * * - * @return false if could not add it (late check for contained + * @return false if could not add it (late check for duplicate) */ @Override protected synchronized boolean addPositionalFeature( SequenceFeature feature) { - return featureStore.add(feature, false); - // features.add(feature); - // featureList.add(findFirstBegin(featureList, feature.begin), feature); - // isTainted = true; } + /** + * Initial check in FeatureStore.add(feature) that in other implementations + * does a containment check, but in this implementation just returns false to + * indicate that we should continue. This implementation will do this check as + * part of the add() method for greater efficiency (one binary search instead + * of two). + * + * @return false -- meaning "maybe not contained; continue adding" + */ @Override protected boolean checkContainsPositionalFeatureForAdd( SequenceFeature feature) { - // the non-NCList IntervalStore does this as part of the add for greater - // efficiency (one binary search) return false; } + /** + * Check to see if a feature (or its equivalent based on + * IntervalI.equalsInterval) is already in this store. This method should be + * avoided except when necessary, as it involves a binary search with identity + * check. + * + * @return true if this feature or its equivalent (based on equalsInterval) is + * present already in the collection. + */ @Override protected boolean containsFeature(SequenceFeature feature) { return featureStore.contains(feature); - // return getEquivalentFeatureIndex(featureList, feature) >= 0; } @Override protected boolean findAndRemoveNonContactFeature(SequenceFeature sf) { return featureStore.remove(sf); - // int pos = getEquivalentFeatureIndex(featureList, sf); - // if (pos < 0) - // { - // return false; - // } - // featureList.remove(pos); - // return (isTainted = true); } /** - * Adds contact features to the result list where either the second or the - * first contact position lies within the target range + * Add contact features to the result list where either the second or the + * first contact position lies within the target range, inclusively. * * @param from * @param to @@ -179,33 +152,17 @@ public class FeatureStoreJS extends FeatureStore } /** - * Locate the first feature start that is at or after this position. + * Locate the first feature start in a standard ArrayList that is at or after + * this position. * */ - static int ntotal = 0, ncaught = 0; - @Override protected int findFirstBegin(List list, long pos) { int matched = list.size(); int end = matched - 1; int start = (end < 0 || list.get(end).begin < pos ? matched : 0); - // if (end < 0) - // { - // - // } - // else if (start > end) - // { - // ncaught++; - // } - // ntotal++; - // - // System.out.println( - // "FSJS " + ncaught + "/" + ntotal + " " + pos + "/" - // + (end < 0 ? -1 : list.get(end).begin) + " " - // + start + "/" + matched); - while (start <= end) { int mid = (start + end) / 2; @@ -223,17 +180,17 @@ public class FeatureStoreJS extends FeatureStore } /** - * Locate the feature start that is after or at this position. + * Locate the feature end in a standard ArrayList that is after or at this + * position. * */ @Override protected int findFirstEnd(List list, long pos) { - int start = 0; - int end = list.size() - 1; int matched = list.size(); - + int end = matched - 1; + int start = 0; while (start <= end) { int mid = (start + end) / 2; @@ -252,14 +209,27 @@ public class FeatureStoreJS extends FeatureStore /** * Returns a (possibly empty) list of features whose extent overlaps the given - * range. The returned list is not ordered. Contact features are included if - * either of the contact points lies within the range. + * range. The returned list is ordered as follows: + * + * (1) ContactFeature starts + * + * (2) ContactFeature ends (that are not also starts) + * + * (3) noncontact SequenceFeatures, in reverse start order + * + * (This last reverse order is for efficiency in processing only.) + * + * * * @param start * start position of overlap range (inclusive) * @param end * end position of overlap range (inclusive) - * @return + * + * @param result + * optional result list; for highest efficiency, provide this + * parameter + * @return result same as result parameter, or a new ArrayList if that is null */ @Override @@ -285,54 +255,13 @@ public class FeatureStoreJS extends FeatureStore if (featureStore.size() > 0) { featureStore.findOverlaps(start, end, result); - // getOverlaps(start, end, result); } return result; } - // /** - // * A binary search identical to the one used for contact start/end, but here - // * we return the feature itself. Unlike Collection.BinarySearch, all we have - // * to be is close, not exact, and we make sure if there is a string of - // * identical starts, then we slide to the end so that we can check all of - // * them. - // * - // * @param l - // * @param pos - // * @return - // */ - // private int getClosestFeature(SequenceFeature[] l, long pos) - // { - // int low = 0; - // int high = l.length - 1; - // while (low <= high) - // { - // int mid = (low + high) >>> 1; - // SequenceFeature f = l[mid]; - // switch (Long.signum(f.begin - pos)) - // { - // case -1: - // low = mid + 1; - // continue; - // case 1: - // high = mid - 1; - // continue; - // case 0: - // - // while (++mid <= high && l[mid].begin == pos) - // { - // ; - // } - // return --mid; - // } - // } - // return (high < 0 ? -1 : high); - // } - /** - * Adds to the result list any contact features whose end (second contact - * point), but not start (first contact point), lies in the query from-to - * range + * Adds to the result list any contact features having end (second contact + * point), but not start (first contact point), in the query from-to range * * @param from * @param to @@ -369,7 +298,9 @@ public class FeatureStoreJS extends FeatureStore } /** - * Binary search for contact start or end at a given (Overview) position. + * Binary search for contact start or end matching a specific position. This + * efficient search was designed specifically for rapid return for the + * OverviewPanel. It's implementation sped processing of that panel by 2300%. * * @param l * @param pos @@ -437,169 +368,4 @@ public class FeatureStoreJS extends FeatureStore result.add(sf); } } - - // /** - // * Since we are traversing the sorted feature array in a forward direction, - // * all elements prior to the one we are working on have been fully linked. - // All - // * we are doing is following those links until we find the first array - // feature - // * with a containedBy element that has an end >= our begin point. It is - // * generally a very short list -- maybe one or two depths. But it might be - // * more than that. - // * - // * @param sf - // * @param sf0 - // * @return - // */ - // private SequenceFeature getContainedBy(SequenceFeature sf, - // SequenceFeature sf0) - // { - // int begin = sf0.begin; - // while (sf != null) - // { - // if (begin <= sf.end) - // { - // // System.out.println("\nFSJS found " + sf0 + "\nFS in " + sf); - // return sf; - // } - // sf = sf.containedBy; - // } - // return null; - // } - - // /** - // * Fast find of known features already on the list; slower removal of - // * equivalent feature, not necessarily identical. - // * - // * @param feature - // * @return 0-based index for this feature in featureList - // */ - // @Override - // protected int getEquivalentFeatureIndex(List list, - // SequenceFeature feature) - // { - // int pos = feature.index - 1; - // if (!isTainted && pos >= 0) - // { - // return pos; - // } - // return super.getEquivalentFeatureIndex(list, feature); - // } - // - // /** - // * Find all overlaps; special case when there is only one feature. The - // * required array of start-sorted SequenceFeature is created lazily. - // * - // * @param start - // * @param end - // * @param result - // */ - // private void getOverlaps(long start, long end, - // List result) - // { - // int n = featureList.size(); - // switch (n) - // { - // case 0: - // return; - // case 1: - // justCheckOne(featureList.get(0), start, end, result); - // return; - // default: - // if (isTainted) - // { - // orderedFeatureStarts = featureList - // .toArray(new SequenceFeature[featureList.size()]); - // linkFeatures(orderedFeatureStarts); - // isTainted = false; - // } - // break; - // } - // - // // (1) Find the closest feature to this position. - // - // int index = getClosestFeature(orderedFeatureStarts, start); - // SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]); - // - // // (2) Traverse the containedBy field, checking for overlap. - // - // while (sf != null) - // { - // if (sf.end >= start) - // { - // result.add(sf); - // } - // sf = sf.containedBy; - // } - // - // // (3) For an interval, find the last feature that starts in this interval, - // // and add all features up through that feature. - // - // if (end > start) - // { - // // fill in with all features that start within this interval, fully - // // inclusive - // int index2 = getClosestFeature(orderedFeatureStarts, end); - // while (++index <= index2) - // { - // result.add(orderedFeatureStarts[index]); - // } - // - // } - // } - // - // /** - // * Quick check when we only have one feature. - // * - // * @param sf - // * @param start - // * @param end - // * @param result - // */ - // private void justCheckOne(SequenceFeature sf, long start, long end, - // List result) - // { - // if (sf.begin <= end && sf.end >= start) - // { - // result.add(sf); - // } - // return; - // } - // - // /** - // * Run through the sorted sequence array once, building the containedBy - // linked - // * list references. Does a check first to make sure there is actually - // * something out there that is overlapping. A null for sf.containedBy means - // * there are no overlaps for this feature. - // * - // * @param features - // */ - // private void linkFeatures(SequenceFeature[] features) - // { - // int n = features.length; - // switch (n) - // { - // case 0: - // return; - // case 1: - // features[0].index = 1; - // return; - // } - // int maxEnd = features[0].end; - // for (int i = 1; i < n;) - // { - // SequenceFeature sf = features[i]; - // if (sf.begin <= maxEnd) - // { - // sf.containedBy = getContainedBy(features[i - 1], sf); - // } - // if (sf.end > maxEnd) - // { - // maxEnd = sf.end; - // } - // sf.index = ++i; - // } - // } } diff --git a/src/jalview/datamodel/features/SequenceFeatures.java b/src/jalview/datamodel/features/SequenceFeatures.java index be9c4f2..cb2b8cc 100644 --- a/src/jalview/datamodel/features/SequenceFeatures.java +++ b/src/jalview/datamodel/features/SequenceFeatures.java @@ -59,18 +59,21 @@ public class SequenceFeatures implements SequenceFeaturesI private final static int INTERVAL_STORE_NCLIST = 0; /** - * linked-list deferred-sort IntervalStore + * linked-list deferred-sort IntervalStore - experimental only; unused */ - private final static int INTERVAL_STORE_NOCLKIST = 1; + private final static int INTERVAL_STORE_LINKED_LIST_NO_PRESORT = 1; /** - * no-IntervalStore option for JavaScript + * linked-list IntervalStore option for JavaScript */ private final static int INTERVAL_STORE_LINKED_LIST = -1; + /** + * mode for Java or JavaScript; can be set differently for testing, but + * default is LINKED_LIST for JalviewJS and NCLIST for Java + */ private final int INTERVAL_STORE_MODE = ( - // can be set differently for testing, but default is - // LINKED_LIST for JalviewJS and NCLIST for Java + // true || // Platform.isJS() ? // INTERVAL_STORE_LINKED_LIST // : INTERVAL_STORE_NCLIST// @@ -132,7 +135,7 @@ public class SequenceFeatures implements SequenceFeaturesI default: case INTERVAL_STORE_NCLIST: return new FeatureStoreImpl(true); - case INTERVAL_STORE_NOCLKIST: + case INTERVAL_STORE_LINKED_LIST_NO_PRESORT: return new FeatureStoreImpl(false); case INTERVAL_STORE_LINKED_LIST: return new FeatureStoreJS();