* An adaption of FeatureStore that is efficient and lightweight, accelerating
* processing speed in JavaScript.
*
+ * It could be used in Java as well, with significant acceleration, but all this
+ * is so fast anyway that it probably will not be noticed in Java to speed it up
+ * by a factor of two or three. So for now, at least, this implementation is
+ * just in JavaScript. The flag for this is in SequenceFeatures.
+ *
+ * This implementation uses the IntervalStore developed by Bob Hanson, found at
+ * https://github.com/BobHanson/IntervalStoreJ, forked from the one developed by
+ * Mungo Carstairs at https://github.com/bartongroup/IntervalStoreJ.
+ *
+ * See the discussion folder at https://github.com/BobHanson/IntervalStoreJ for
+ * details.
+ *
* @author gmcarstairs
- * @author Bob Hanson 2019.08.03
+ * @author Bob Hanson 2019.08.03-2019.08.16
*
*/
public class FeatureStoreJS extends FeatureStore
{
- boolean contactsTainted = true;
-
private IntervalStore<SequenceFeature> featureStore;
- //
- // /**
- // * internal reference to features as an ArrayList
- // */
- // private ArrayList<SequenceFeature> featureList;
- //
- // /**
- // * contact features ordered by first contact position
- // */
- // private SequenceFeature[] orderedFeatureStarts;
-
- // /**
- // * indicates that we need to rebuild orderedFeatureStarts and reset index
- // * fields
- // */
- // private boolean isTainted = true;
-
- /**
- * Constructor
- */
public FeatureStoreJS()
{
// the only reference to features field in this class -- for the superclass
- features = featureStore = new IntervalStore<>(); // featureList = new
- // ArrayList<>();
+ // linked-list no-NCList IntervalStore with presort
+
+ features = featureStore = new IntervalStore<>(true);
}
/**
* Add a contact feature to the lists that hold them ordered by start (first
* contact) and by end (second contact) position, ensuring the lists remain
- * ordered, and returns true. This method allows duplicate features to be
- * added, so test before calling to avoid this.
+ * ordered. This method allows duplicate features to be added, so test before
+ * calling to avoid this.
*
* @param feature
- * @return
+ * @return true
*/
@Override
protected synchronized boolean addContactFeature(SequenceFeature feature)
return true;
}
- // The following methods use a linked list of containment in SequenceFeature
- // rather than IntervalStore.
- //
- // There are two parts --- initialization, and overlap searching.
- //
- // Initialization involves two steps:
- //
- // (1) sorting of features by start position using a standard Array.sort with
- // Comparator.
- // (2) linking of features, effectively nesting them.
- //
- // Searching involves three steps:
- //
- // (1) binary search for a starting point within the sorted features array.
- // (2) traverse the linked lists with an end check to read out the
- // overlapped features at this position.
- // (3) For an interval, find the last feature that starts in this interval,
- // and add all features up through that feature.
- //
- // All of this is done with very simple standard methods.
-
- // Initialization
/**
- * Adds one feature to the IntervalStore that can manage nested features
- * (creating the IntervalStore if necessary)
+ * Add a feature to the IntervalStore, not allowing for duplicates.
+ *
*
- * @return false if could not add it (late check for contained
+ * @return false if could not add it (late check for duplicate)
*/
@Override
protected synchronized boolean addPositionalFeature(
SequenceFeature feature)
{
-
return featureStore.add(feature, false);
- // features.add(feature);
- // featureList.add(findFirstBegin(featureList, feature.begin), feature);
- // isTainted = true;
}
+ /**
+ * Initial check in FeatureStore.add(feature) that in other implementations
+ * does a containment check, but in this implementation just returns false to
+ * indicate that we should continue. This implementation will do this check as
+ * part of the add() method for greater efficiency (one binary search instead
+ * of two).
+ *
+ * @return false -- meaning "maybe not contained; continue adding"
+ */
@Override
protected boolean checkContainsPositionalFeatureForAdd(
SequenceFeature feature)
{
- // the non-NCList IntervalStore does this as part of the add for greater
- // efficiency (one binary search)
return false;
}
+ /**
+ * Check to see if a feature (or its equivalent based on
+ * IntervalI.equalsInterval) is already in this store. This method should be
+ * avoided except when necessary, as it involves a binary search with identity
+ * check.
+ *
+ * @return true if this feature or its equivalent (based on equalsInterval) is
+ * present already in the collection.
+ */
@Override
protected boolean containsFeature(SequenceFeature feature)
{
return featureStore.contains(feature);
- // return getEquivalentFeatureIndex(featureList, feature) >= 0;
}
@Override
protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
{
return featureStore.remove(sf);
- // int pos = getEquivalentFeatureIndex(featureList, sf);
- // if (pos < 0)
- // {
- // return false;
- // }
- // featureList.remove(pos);
- // return (isTainted = true);
}
/**
- * Adds contact features to the result list where either the second or the
- * first contact position lies within the target range
+ * Add contact features to the result list where either the second or the
+ * first contact position lies within the target range, inclusively.
*
* @param from
* @param to
}
/**
- * Locate the first feature start that is at or after this position.
+ * Locate the first feature start in a standard ArrayList that is at or after
+ * this position.
*
*/
- static int ntotal = 0, ncaught = 0;
-
@Override
protected int findFirstBegin(List<SequenceFeature> list, long pos)
{
int matched = list.size();
int end = matched - 1;
int start = (end < 0 || list.get(end).begin < pos ? matched : 0);
- // if (end < 0)
- // {
- //
- // }
- // else if (start > end)
- // {
- // ncaught++;
- // }
- // ntotal++;
- //
- // System.out.println(
- // "FSJS " + ncaught + "/" + ntotal + " " + pos + "/"
- // + (end < 0 ? -1 : list.get(end).begin) + " "
- // + start + "/" + matched);
-
while (start <= end)
{
int mid = (start + end) / 2;
}
/**
- * Locate the feature start that is after or at this position.
+ * Locate the feature end in a standard ArrayList that is after or at this
+ * position.
*
*/
@Override
protected int findFirstEnd(List<SequenceFeature> list, long pos)
{
- int start = 0;
- int end = list.size() - 1;
int matched = list.size();
-
+ int end = matched - 1;
+ int start = 0;
while (start <= end)
{
int mid = (start + end) / 2;
/**
* Returns a (possibly empty) list of features whose extent overlaps the given
- * range. The returned list is not ordered. Contact features are included if
- * either of the contact points lies within the range.
+ * range. The returned list is ordered as follows:
+ *
+ * (1) ContactFeature starts
+ *
+ * (2) ContactFeature ends (that are not also starts)
+ *
+ * (3) noncontact SequenceFeatures, in reverse start order
+ *
+ * (This last reverse order is for efficiency in processing only.)
+ *
+ *
*
* @param start
* start position of overlap range (inclusive)
* @param end
* end position of overlap range (inclusive)
- * @return
+ *
+ * @param result
+ * optional result list; for highest efficiency, provide this
+ * parameter
+ * @return result same as result parameter, or a new ArrayList if that is null
*/
@Override
if (featureStore.size() > 0)
{
featureStore.findOverlaps(start, end, result);
- // getOverlaps(start, end, result);
}
return result;
}
- // /**
- // * A binary search identical to the one used for contact start/end, but here
- // * we return the feature itself. Unlike Collection.BinarySearch, all we have
- // * to be is close, not exact, and we make sure if there is a string of
- // * identical starts, then we slide to the end so that we can check all of
- // * them.
- // *
- // * @param l
- // * @param pos
- // * @return
- // */
- // private int getClosestFeature(SequenceFeature[] l, long pos)
- // {
- // int low = 0;
- // int high = l.length - 1;
- // while (low <= high)
- // {
- // int mid = (low + high) >>> 1;
- // SequenceFeature f = l[mid];
- // switch (Long.signum(f.begin - pos))
- // {
- // case -1:
- // low = mid + 1;
- // continue;
- // case 1:
- // high = mid - 1;
- // continue;
- // case 0:
- //
- // while (++mid <= high && l[mid].begin == pos)
- // {
- // ;
- // }
- // return --mid;
- // }
- // }
- // return (high < 0 ? -1 : high);
- // }
-
/**
- * Adds to the result list any contact features whose end (second contact
- * point), but not start (first contact point), lies in the query from-to
- * range
+ * Adds to the result list any contact features having end (second contact
+ * point), but not start (first contact point), in the query from-to range
*
* @param from
* @param to
}
/**
- * Binary search for contact start or end at a given (Overview) position.
+ * Binary search for contact start or end matching a specific position. This
+ * efficient search was designed specifically for rapid return for the
+ * OverviewPanel. It's implementation sped processing of that panel by 2300%.
*
* @param l
* @param pos
result.add(sf);
}
}
-
- // /**
- // * Since we are traversing the sorted feature array in a forward direction,
- // * all elements prior to the one we are working on have been fully linked.
- // All
- // * we are doing is following those links until we find the first array
- // feature
- // * with a containedBy element that has an end >= our begin point. It is
- // * generally a very short list -- maybe one or two depths. But it might be
- // * more than that.
- // *
- // * @param sf
- // * @param sf0
- // * @return
- // */
- // private SequenceFeature getContainedBy(SequenceFeature sf,
- // SequenceFeature sf0)
- // {
- // int begin = sf0.begin;
- // while (sf != null)
- // {
- // if (begin <= sf.end)
- // {
- // // System.out.println("\nFSJS found " + sf0 + "\nFS in " + sf);
- // return sf;
- // }
- // sf = sf.containedBy;
- // }
- // return null;
- // }
-
- // /**
- // * Fast find of known features already on the list; slower removal of
- // * equivalent feature, not necessarily identical.
- // *
- // * @param feature
- // * @return 0-based index for this feature in featureList
- // */
- // @Override
- // protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
- // SequenceFeature feature)
- // {
- // int pos = feature.index - 1;
- // if (!isTainted && pos >= 0)
- // {
- // return pos;
- // }
- // return super.getEquivalentFeatureIndex(list, feature);
- // }
- //
- // /**
- // * Find all overlaps; special case when there is only one feature. The
- // * required array of start-sorted SequenceFeature is created lazily.
- // *
- // * @param start
- // * @param end
- // * @param result
- // */
- // private void getOverlaps(long start, long end,
- // List<SequenceFeature> result)
- // {
- // int n = featureList.size();
- // switch (n)
- // {
- // case 0:
- // return;
- // case 1:
- // justCheckOne(featureList.get(0), start, end, result);
- // return;
- // default:
- // if (isTainted)
- // {
- // orderedFeatureStarts = featureList
- // .toArray(new SequenceFeature[featureList.size()]);
- // linkFeatures(orderedFeatureStarts);
- // isTainted = false;
- // }
- // break;
- // }
- //
- // // (1) Find the closest feature to this position.
- //
- // int index = getClosestFeature(orderedFeatureStarts, start);
- // SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
- //
- // // (2) Traverse the containedBy field, checking for overlap.
- //
- // while (sf != null)
- // {
- // if (sf.end >= start)
- // {
- // result.add(sf);
- // }
- // sf = sf.containedBy;
- // }
- //
- // // (3) For an interval, find the last feature that starts in this interval,
- // // and add all features up through that feature.
- //
- // if (end > start)
- // {
- // // fill in with all features that start within this interval, fully
- // // inclusive
- // int index2 = getClosestFeature(orderedFeatureStarts, end);
- // while (++index <= index2)
- // {
- // result.add(orderedFeatureStarts[index]);
- // }
- //
- // }
- // }
- //
- // /**
- // * Quick check when we only have one feature.
- // *
- // * @param sf
- // * @param start
- // * @param end
- // * @param result
- // */
- // private void justCheckOne(SequenceFeature sf, long start, long end,
- // List<SequenceFeature> result)
- // {
- // if (sf.begin <= end && sf.end >= start)
- // {
- // result.add(sf);
- // }
- // return;
- // }
- //
- // /**
- // * Run through the sorted sequence array once, building the containedBy
- // linked
- // * list references. Does a check first to make sure there is actually
- // * something out there that is overlapping. A null for sf.containedBy means
- // * there are no overlaps for this feature.
- // *
- // * @param features
- // */
- // private void linkFeatures(SequenceFeature[] features)
- // {
- // int n = features.length;
- // switch (n)
- // {
- // case 0:
- // return;
- // case 1:
- // features[0].index = 1;
- // return;
- // }
- // int maxEnd = features[0].end;
- // for (int i = 1; i < n;)
- // {
- // SequenceFeature sf = features[i];
- // if (sf.begin <= maxEnd)
- // {
- // sf.containedBy = getContainedBy(features[i - 1], sf);
- // }
- // if (sf.end > maxEnd)
- // {
- // maxEnd = sf.end;
- // }
- // sf.index = ++i;
- // }
- // }
}