private ArrayList<SequenceFeature> featureList;
/**
+ * contact features ordered by first contact position
+ */
+ private SequenceFeature[] orderedFeatureStarts;
+
+ /**
+ * indicates that we need to rebuild orderedFeatureStarts and reset index
+ * fields
+ */
+ private boolean isTainted = true;
+
+ /**
* Constructor
*/
public FeatureStoreJS()
return true;
}
- /**
- * Adds one feature to the IntervalStore that can manage nested features
- * (creating the IntervalStore if necessary)
- */
- @Override
- protected synchronized void addNestedFeature(SequenceFeature feature)
- {
- featureList.add(findFirstBegin(featureList, feature.begin), feature);
- }
-
- /**
- * Returns a (possibly empty) list of features whose extent overlaps the given
- * range. The returned list is not ordered. Contact features are included if
- * either of the contact points lies within the range.
- *
- * @param start
- * start position of overlap range (inclusive)
- * @param end
- * end position of overlap range (inclusive)
- * @return
- */
-
- @Override
- public List<SequenceFeature> findOverlappingFeatures(long start, long end,
- List<SequenceFeature> result)
- {
- if (result == null)
- {
- result = new ArrayList<>();
- }
- if (contactFeatureStarts != null)
- {
- if (start == end)
- {
- findContactPoints(contactFeatureStarts, start, result, true);
- findContactPoints(contactFeatureEnds, start, result, false);
- }
- else
- {
- findContactFeatures(start, end, result);
- }
- }
- if (featureList.size() > 0)
- {
- findOverlaps(start, end, result);
- }
- return result;
- }
-
// The following methods use a linked list of containment in SequenceFeature
// rather than IntervalStore.
//
// Initialization
- /*
- * contact features ordered by first contact position
+ /**
+ * Adds one feature to the IntervalStore that can manage nested features
+ * (creating the IntervalStore if necessary)
*/
- private SequenceFeature[] orderedFeatureStarts;
-
- private void rebuildArrays(int n)
+ @Override
+ protected synchronized void addPositionalFeature(SequenceFeature feature)
{
- // Arrays.sort(orderedFeatureStarts, startComp);
- orderedFeatureStarts = featureList
- .toArray(new SequenceFeature[featureList.size()]);
- linkFeatures(orderedFeatureStarts);
+ featureList.add(findFirstBegin(featureList, feature.begin), feature);
+ isTainted = true;
}
- // /**
- // * just a standard Comparator
- // */
- // private static StartComparator startComp;
- //
- // class StartComparator implements Comparator<SequenceFeature>
- // {
- //
- // @Override
- // public int compare(SequenceFeature o1, SequenceFeature o2)
- // {
- // int p1 = o1.begin;
- // int p2 = o2.begin;
- // return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0);
- // }
- //
- // }
-
- /**
- * Run through the sorted sequence array once, building the containedBy linked
- * list references. Does a check first to make sure there is actually
- * something out there that is overlapping. A null for sf.containedBy means
- * there are no overlaps for this feature.
- *
- * @param intervals
- */
- private void linkFeatures(SequenceFeature[] intervals)
+ @Override
+ protected boolean containsFeature(SequenceFeature feature)
{
- if (intervals.length < 2)
- {
- return;
- }
- int maxEnd = intervals[0].end;
- for (int i = 1, n = intervals.length; i < n; i++)
- {
- SequenceFeature sf = intervals[i];
- if (sf.begin <= maxEnd)
- {
- sf.containedBy = getContainedBy(intervals[i - 1], sf);
- }
- if (sf.end > maxEnd)
- {
- maxEnd = sf.end;
- }
- }
+
+ return getEquivalentFeatureIndex(featureList, feature) >= 0;
}
- /**
- * Since we are traversing the sorted feature array in a forward direction,
- * all elements prior to the one we are working on have been fully linked. All
- * we are doing is following those links until we find the first array feature
- * with a containedBy element that has an end >= our begin point. It is
- * generally a very short list -- maybe one or two depths. But it might be
- * more than that.
- *
- * @param sf
- * @param sf0
- * @return
- */
- private SequenceFeature getContainedBy(SequenceFeature sf,
- SequenceFeature sf0)
+ @Override
+ protected boolean findAndRemoveNonContactFeature(SequenceFeature sf)
{
- int begin = sf0.begin;
- while (sf != null)
+ int pos = getEquivalentFeatureIndex(featureList, sf);
+ if (pos < 0)
{
- if (begin <= sf.end)
- {
- // System.out.println("\nFS found " + sf0.index + ":" + sf0
- // + "\nFS in " + sf.index + ":" + sf);
- return sf;
- }
- sf = sf.containedBy;
+ return false;
}
- return null;
+ featureList.remove(pos);
+ return (isTainted = true);
}
- // search-stage methods
-
/**
- * Binary search for contact start or end at a given (Overview) position.
+ * Adds contact features to the result list where either the second or the
+ * first contact position lies within the target range
*
- * @param l
- * @param pos
+ * @param from
+ * @param to
* @param result
- * @param isStart
- *
- * @author Bob Hanson 2019.07.30
*/
- private static void findContactPoints(List<SequenceFeature> l, long pos,
- List<SequenceFeature> result, boolean isStart)
+ @Override
+ protected void findContactFeatures(long from, long to,
+ List<SequenceFeature> result)
{
- int low = 0;
- int high = l.size() - 1;
- while (low <= high)
- {
- int mid = (low + high) >>> 1;
- SequenceFeature f = l.get(mid);
- switch (Long.signum((isStart ? f.begin : f.end) - pos))
- {
- case -1:
- low = mid + 1;
- continue;
- case 1:
- high = mid - 1;
- continue;
- case 0:
- int m = mid;
- result.add(f);
- // could be "5" in 12345556788 ?
- while (++mid <= high && (f = l.get(mid)) != null
- && (isStart ? f.begin : f.end) == pos)
- {
- result.add(f);
- }
- while (--m >= low && (f = l.get(m)) != null
- && (isStart ? f.begin : f.end) == pos)
- {
- result.add(f);
- }
- return;
- }
- }
+ getContactStartOverlaps(from, to, result);
+ getContactEndOverlaps(from, to, result);
}
- /**
- * Find all overlaps; special case when there is only one feature. The
- * required array of start-sorted SequenceFeature is created lazily.
- *
- * @param start
- * @param end
- * @param result
- */
- private void findOverlaps(long start, long end,
- List<SequenceFeature> result)
+ @Override
+ protected int findFirstBegin(List<SequenceFeature> list, long pos)
{
- int n = featureList.size();
- switch (n)
+ int start = 0;
+ int end = list.size() - 1;
+ int matched = list.size();
+
+ while (start <= end)
{
- case 0:
- return;
- case 1:
- checkOne(featureList.get(0), start, end,
- result);
- return;
- default:
- if (orderedFeatureStarts == null)
+ int mid = (start + end) / 2;
+ if (list.get(mid).begin >= pos)
{
- rebuildArrays(n);
+ matched = mid;
+ end = mid - 1;
}
- break;
- }
-
- // (1) Find the closest feature to this position.
-
- int index = findClosestFeature(orderedFeatureStarts, start);
- SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
-
- // (2) Traverse the containedBy field, checking for overlap.
-
- while (sf != null)
- {
- if (sf.end >= start)
+ else
{
- result.add(sf);
+ start = mid + 1;
}
- sf = sf.containedBy;
}
+ return matched;
+ }
- // (3) For an interval, find the last feature that starts in this interval,
- // and add all features up through that feature.
+ @Override
+ protected int findFirstEnd(List<SequenceFeature> list, long pos)
+ {
+ int start = 0;
+ int end = list.size() - 1;
+ int matched = list.size();
- if (end > start)
+ while (start <= end)
{
- // fill in with all features that start within this interval, fully
- // inclusive
- int index2 = findClosestFeature(orderedFeatureStarts, end);
- while (++index <= index2)
+ int mid = (start + end) / 2;
+ if (list.get(mid).end >= pos)
{
- result.add(orderedFeatureStarts[index]);
+ matched = mid;
+ end = mid - 1;
+ }
+ else
+ {
+ start = mid + 1;
}
-
}
+ return matched;
}
/**
- * Quick check when we only have one feature.
+ * Returns a (possibly empty) list of features whose extent overlaps the given
+ * range. The returned list is not ordered. Contact features are included if
+ * either of the contact points lies within the range.
*
- * @param sf
* @param start
+ * start position of overlap range (inclusive)
* @param end
- * @param result
+ * end position of overlap range (inclusive)
+ * @return
*/
- private void checkOne(SequenceFeature sf, long start, long end,
+
+ @Override
+ public List<SequenceFeature> findOverlappingFeatures(long start, long end,
List<SequenceFeature> result)
{
- if (sf.begin <= end && sf.end >= start)
+ if (result == null)
{
- result.add(sf);
+ result = new ArrayList<>();
}
- return;
- }
-
- @Override
- protected boolean containsFeature(SequenceFeature feature)
- {
-
- int pos = findFirstBegin(featureList,
- feature.begin);
- int len = featureList.size();
- while (pos < len)
+ if (contactFeatureStarts != null)
{
- SequenceFeature sf = featureList.get(pos);
- if (sf.begin > feature.begin)
+ if (start == end)
{
- return false; // no match found
+ getContactPoints(contactFeatureStarts, start, result, true);
+ getContactPoints(contactFeatureEnds, start, result, false);
}
- if (sf.equals(feature))
+ else
{
- return true;
+ findContactFeatures(start, end, result);
}
- pos++;
}
- return false;
-
+ if (featureList.size() > 0)
+ {
+ getOverlaps(start, end, result);
+ }
+ return result;
}
/**
* @param pos
* @return
*/
- private int findClosestFeature(SequenceFeature[] l, long pos)
+ private int getClosestFeature(SequenceFeature[] l, long pos)
{
int low = 0;
int high = l.length - 1;
}
/**
- * Adds contact features to the result list where either the second or the
- * first contact position lies within the target range
- *
- * @param from
- * @param to
- * @param result
- */
- @Override
- protected void findContactFeatures(long from, long to,
- List<SequenceFeature> result)
- {
- findContactStartOverlaps(from, to, result);
- findContactEndOverlaps(from, to, result);
- }
-
- /**
* Adds to the result list any contact features whose end (second contact
* point), but not start (first contact point), lies in the query from-to
* range
* @param result
*/
- private void findContactEndOverlaps(long from, long to,
+ private void getContactEndOverlaps(long from, long to,
List<SequenceFeature> result)
{
// find the first contact feature (if any)
}
/**
+ * Binary search for contact start or end at a given (Overview) position.
+ *
+ * @param l
+ * @param pos
+ * @param result
+ * @param isStart
+ *
+ * @author Bob Hanson 2019.07.30
+ */
+ private void getContactPoints(List<SequenceFeature> l, long pos,
+ List<SequenceFeature> result, boolean isStart)
+ {
+ int low = 0;
+ int high = l.size() - 1;
+ while (low <= high)
+ {
+ int mid = (low + high) >>> 1;
+ SequenceFeature f = l.get(mid);
+ switch (Long.signum((isStart ? f.begin : f.end) - pos))
+ {
+ case -1:
+ low = mid + 1;
+ continue;
+ case 1:
+ high = mid - 1;
+ continue;
+ case 0:
+ int m = mid;
+ result.add(f);
+ // could be "5" in 12345556788 ?
+ while (++mid <= high && (f = l.get(mid)) != null
+ && (isStart ? f.begin : f.end) == pos)
+ {
+ result.add(f);
+ }
+ while (--m >= low && (f = l.get(m)) != null
+ && (isStart ? f.begin : f.end) == pos)
+ {
+ result.add(f);
+ }
+ return;
+ }
+ }
+ }
+
+ /**
* Adds contact features whose start position lies in the from-to range to the
* result list
*
* @param result
*/
- private void findContactStartOverlaps(long from, long to,
+ private void getContactStartOverlaps(long from, long to,
List<SequenceFeature> result)
{
for (int i = findFirstBegin(contactFeatureStarts,
}
}
+ /**
+ * Since we are traversing the sorted feature array in a forward direction,
+ * all elements prior to the one we are working on have been fully linked. All
+ * we are doing is following those links until we find the first array feature
+ * with a containedBy element that has an end >= our begin point. It is
+ * generally a very short list -- maybe one or two depths. But it might be
+ * more than that.
+ *
+ * @param sf
+ * @param sf0
+ * @return
+ */
+ private SequenceFeature getContainedBy(SequenceFeature sf,
+ SequenceFeature sf0)
+ {
+ int begin = sf0.begin;
+ while (sf != null)
+ {
+ if (begin <= sf.end)
+ {
+ System.out.println("\nFS found " + sf0 + "\nFS in " + sf);
+ return sf;
+ }
+ sf = sf.containedBy;
+ }
+ return null;
+ }
+
+ /**
+ * Fast find of known features already on the list; slower removal of
+ * equivalent feature, not necessarily identical.
+ *
+ * @param feature
+ * @return 0-based index for this feature in featureList
+ */
@Override
- protected int findFirstBegin(List<SequenceFeature> list, long pos)
+ protected int getEquivalentFeatureIndex(List<SequenceFeature> list,
+ SequenceFeature feature)
{
- int start = 0;
- int end = list.size() - 1;
- int matched = list.size();
+ int pos = feature.index1 - 1;
+ if (!isTainted && pos >= 0)
+ {
+ return pos;
+ }
+ return super.getEquivalentFeatureIndex(list, feature);
+ }
- while (start <= end)
+ /**
+ * Find all overlaps; special case when there is only one feature. The
+ * required array of start-sorted SequenceFeature is created lazily.
+ *
+ * @param start
+ * @param end
+ * @param result
+ */
+ private void getOverlaps(long start, long end,
+ List<SequenceFeature> result)
+ {
+ int n = featureList.size();
+ switch (n)
{
- int mid = (start + end) / 2;
- if (list.get(mid).begin >= pos)
+ case 0:
+ return;
+ case 1:
+ justCheckOne(featureList.get(0), start, end, result);
+ return;
+ default:
+ if (isTainted)
{
- matched = mid;
- end = mid - 1;
+ orderedFeatureStarts = featureList
+ .toArray(new SequenceFeature[featureList.size()]);
+ linkFeatures(orderedFeatureStarts);
+ isTainted = false;
}
- else
+ break;
+ }
+
+ // (1) Find the closest feature to this position.
+
+ int index = getClosestFeature(orderedFeatureStarts, start);
+ SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
+
+ // (2) Traverse the containedBy field, checking for overlap.
+
+ while (sf != null)
+ {
+ if (sf.end >= start)
{
- start = mid + 1;
+ result.add(sf);
}
+ sf = sf.containedBy;
+ }
+
+ // (3) For an interval, find the last feature that starts in this interval,
+ // and add all features up through that feature.
+
+ if (end > start)
+ {
+ // fill in with all features that start within this interval, fully
+ // inclusive
+ int index2 = getClosestFeature(orderedFeatureStarts, end);
+ while (++index <= index2)
+ {
+ result.add(orderedFeatureStarts[index]);
+ }
+
}
- return matched;
}
- @Override
- protected int findFirstEnd(List<SequenceFeature> list, long pos)
+ /**
+ * Quick check when we only have one feature.
+ *
+ * @param sf
+ * @param start
+ * @param end
+ * @param result
+ */
+ private void justCheckOne(SequenceFeature sf, long start, long end,
+ List<SequenceFeature> result)
{
- int start = 0;
- int end = list.size() - 1;
- int matched = list.size();
+ if (sf.begin <= end && sf.end >= start)
+ {
+ result.add(sf);
+ }
+ return;
+ }
- while (start <= end)
+ /**
+ * Run through the sorted sequence array once, building the containedBy linked
+ * list references. Does a check first to make sure there is actually
+ * something out there that is overlapping. A null for sf.containedBy means
+ * there are no overlaps for this feature.
+ *
+ * @param features
+ */
+ private void linkFeatures(SequenceFeature[] features)
+ {
+ int n = features.length;
+ switch (n)
{
- int mid = (start + end) / 2;
- if (list.get(mid).end >= pos)
+ case 0:
+ return;
+ case 1:
+ features[0].index1 = 1;
+ return;
+ }
+ int maxEnd = features[0].end;
+ for (int i = 1; i < n;)
+ {
+ SequenceFeature sf = features[i];
+ if (sf.begin <= maxEnd)
{
- matched = mid;
- end = mid - 1;
+ sf.containedBy = getContainedBy(features[i - 1], sf);
}
- else
+ if (sf.end > maxEnd)
{
- start = mid + 1;
+ maxEnd = sf.end;
}
+ sf.index1 = ++i;
}
- return matched;
}
-
-
}