/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel.features;
import jalview.datamodel.SequenceFeature;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
/**
* An adaption of FeatureStore that is efficient and lightweight, accelerating
* processing speed in JavaScript.
*
* @author gmcarstairs
* @author Bob Hanson 2019.08.03
*
*/
public class FeatureStoreJS extends FeatureStore
{
boolean contactsTainted = true;
/**
* Constructor
*/
public FeatureStoreJS()
{
features = new ArrayList<>();
}
/**
* Returns a (possibly empty) list of features whose extent overlaps the given
* range. The returned list is not ordered. Contact features are included if
* either of the contact points lies within the range.
*
* @param start
* start position of overlap range (inclusive)
* @param end
* end position of overlap range (inclusive)
* @return
*/
@Override
public List findOverlappingFeatures(long start, long end,
List result)
{
if (result == null)
{
result = new ArrayList<>();
}
if (contactFeatureStarts != null)
{
if (start == end)
{
findContactPoints(contactFeatureStarts, start, result, true);
findContactPoints(contactFeatureEnds, start, result, false);
}
else
{
findContactFeatures(start, end, result);
}
}
if (features.size() > 0)
{
findOverlaps(start, end, result);
}
return result;
}
// The following methods use a linked list of containment in SequenceFeature
// rather than IntervalStore.
//
// There are two parts --- initialization, and overlap searching.
//
// Initialization involves two steps:
//
// (1) sorting of features by start position using a standard Array.sort with
// Comparator.
// (2) linking of features, effectively nesting them.
//
// Searching involves three steps:
//
// (1) binary search for a starting point within the sorted features array.
// (2) traverse the linked lists with an end check to read out the
// overlapped features at this position.
// (3) For an interval, find the last feature that starts in this interval,
// and add all features up through that feature.
//
// All of this is done with very simple standard methods.
// Initialization
/*
* contact features ordered by first contact position
*/
private SequenceFeature[] orderedFeatureStarts;
private void rebuildArrays(int n)
{
if (startComp == null)
{
startComp = new StartComparator();
}
orderedFeatureStarts = new SequenceFeature[n];
for (int i = n; --i >= 0;)
{
SequenceFeature sf = ((ArrayList) features).get(i);
sf.index = i; // for debugging only
orderedFeatureStarts[i] = sf;
}
Arrays.sort(orderedFeatureStarts, startComp);
linkFeatures(orderedFeatureStarts);
}
/**
* just a standard Comparator
*/
private static StartComparator startComp;
class StartComparator implements Comparator
{
@Override
public int compare(SequenceFeature o1, SequenceFeature o2)
{
int p1 = o1.begin;
int p2 = o2.begin;
return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0);
}
}
/**
* Run through the sorted sequence array once, building the containedBy linked
* list references. Does a check first to make sure there is actually
* something out there that is overlapping. A null for sf.containedBy means
* there are no overlaps for this feature.
*
* @param intervals
*/
private void linkFeatures(SequenceFeature[] intervals)
{
if (intervals.length < 2)
{
return;
}
int maxEnd = intervals[0].end;
for (int i = 1, n = intervals.length; i < n; i++)
{
SequenceFeature sf = intervals[i];
if (sf.begin <= maxEnd)
{
sf.containedBy = getContainedBy(intervals[i - 1], sf);
}
if (sf.end > maxEnd)
{
maxEnd = sf.end;
}
}
}
/**
* Since we are traversing the sorted feature array in a forward direction,
* all elements prior to the one we are working on have been fully linked. All
* we are doing is following those links until we find the first array feature
* with a containedBy element that has an end >= our begin point. It is
* generally a very short list -- maybe one or two depths. But it might be
* more than that.
*
* @param sf
* @param sf0
* @return
*/
private SequenceFeature getContainedBy(SequenceFeature sf,
SequenceFeature sf0)
{
int begin = sf0.begin;
while (sf != null)
{
if (begin <= sf.end)
{
// System.out.println("\nFS found " + sf0.index + ":" + sf0
// + "\nFS in " + sf.index + ":" + sf);
return sf;
}
sf = sf.containedBy;
}
return null;
}
// search-stage methods
/**
* Binary search for contact start or end at a given (Overview) position.
*
* @param l
* @param pos
* @param result
* @param isStart
*
* @author Bob Hanson 2019.07.30
*/
private static void findContactPoints(List l, long pos,
List result, boolean isStart)
{
int low = 0;
int high = l.size() - 1;
while (low <= high)
{
int mid = (low + high) >>> 1;
SequenceFeature f = l.get(mid);
switch (Long.signum((isStart ? f.begin : f.end) - pos))
{
case -1:
low = mid + 1;
continue;
case 1:
high = mid - 1;
continue;
case 0:
int m = mid;
result.add(f);
// could be "5" in 12345556788 ?
while (++mid <= high && (f = l.get(mid)) != null
&& (isStart ? f.begin : f.end) == pos)
{
result.add(f);
}
while (--m >= low && (f = l.get(m)) != null
&& (isStart ? f.begin : f.end) == pos)
{
result.add(f);
}
return;
}
}
}
/**
* Find all overlaps; special case when there is only one feature. The
* required array of start-sorted SequenceFeature is created lazily.
*
* @param features
* @param pos
* @param result
*/
private void findOverlaps(long start, long end,
List result)
{
int n = features.size();
switch (n)
{
case 0:
return;
case 1:
checkOne(((ArrayList) features).get(0), start, end,
result);
return;
default:
if (orderedFeatureStarts == null)
{
rebuildArrays(n);
}
break;
}
// (1) Find the closest feature to this position.
int index = findClosestFeature(orderedFeatureStarts, start);
SequenceFeature sf = (index < 0 ? null : orderedFeatureStarts[index]);
// (2) Traverse the containedBy field, checking for overlap.
while (sf != null)
{
if (sf.end >= start)
{
result.add(sf);
}
sf = sf.containedBy;
}
// (3) For an interval, find the last feature that starts in this interval,
// and add all features up through that feature.
if (end > start)
{
// fill in with all features that start within this interval, fully
// inclusive
int index2 = findClosestFeature(orderedFeatureStarts, end);
while (++index <= index2)
{
result.add(orderedFeatureStarts[index]);
}
}
}
/**
* Quick check when we only have one feature.
*
* @param sf
* @param start
* @param end
* @param result
*/
private void checkOne(SequenceFeature sf, long start, long end,
List result)
{
if (sf.begin <= end && sf.end >= start)
{
result.add(sf);
}
return;
}
/**
* A binary search identical to the one used for contact start/end, but here
* we return the feature itself. Unlike Collection.BinarySearch, all we have
* to be is close, not exact, and we make sure if there is a string of
* identical starts, then we slide to the end so that we can check all of
* them.
*
* @param l
* @param pos
* @return
*/
private int findClosestFeature(SequenceFeature[] l, long pos)
{
int low = 0;
int high = l.length - 1;
while (low <= high)
{
int mid = (low + high) >>> 1;
SequenceFeature f = l[mid];
switch (Long.signum(f.begin - pos))
{
case -1:
low = mid + 1;
continue;
case 1:
high = mid - 1;
continue;
case 0:
while (++mid <= high && l[mid].begin == pos)
{
;
}
return --mid;
}
}
return (high < 0 ? -1 : high);
}
/**
* Adds contact features to the result list where either the second or the
* first contact position lies within the target range
*
* @param from
* @param to
* @param result
*/
@Override
protected void findContactFeatures(long from, long to,
List result)
{
findContactStartOverlaps(from, to, result);
findContactEndOverlaps(from, to, result);
}
/**
* Adds to the result list any contact features whose end (second contact
* point), but not start (first contact point), lies in the query from-to
* range
*
* @param from
* @param to
* @param result
*/
private void findContactEndOverlaps(long from, long to,
List result)
{
// find the first contact feature (if any)
// with end point not before the target range
for (int i = findFirstEnd(contactFeatureEnds,
from), n = contactFeatureEnds.size(); i < n; i++)
{
SequenceFeature sf = contactFeatureEnds.get(i);
if (sf.begin >= from && sf.begin <= to)
{
// this feature's first contact position lies in the search range
// so we don't include it in results a second time
continue;
}
if (sf.end > to)
{
// this feature (and all following) has end point after the target range
break;
}
// feature has end >= from and end <= to
// i.e. contact end point lies within overlap search range
result.add(sf);
}
}
/**
* Adds contact features whose start position lies in the from-to range to the
* result list
*
* @param from
* @param to
* @param result
*/
private void findContactStartOverlaps(long from, long to,
List result)
{
for (int i = findFirstBegin(contactFeatureStarts,
from), n = contactFeatureStarts.size(); i < n; i++)
{
SequenceFeature sf = contactFeatureStarts.get(i);
if (sf.begin > to)
{
break;
}
result.add(sf);
}
}
public static int findFirstBegin(List list, long pos)
{
int start = 0;
int end = list.size() - 1;
int matched = list.size();
while (start <= end)
{
int mid = (start + end) / 2;
if (list.get(mid).begin >= pos)
{
matched = mid;
end = mid - 1;
}
else
{
start = mid + 1;
}
}
return matched;
}
public static int findFirstEnd(List list, long pos)
{
int start = 0;
int end = list.size() - 1;
int matched = list.size();
while (start <= end)
{
int mid = (start + end) / 2;
if (list.get(mid).end >= pos)
{
matched = mid;
end = mid - 1;
}
else
{
start = mid + 1;
}
}
return matched;
}
}