--- /dev/null
+package jalview.datamodel.features;
+
+import jalview.datamodel.SequenceFeature;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * A data store for a set of sequence features that supports efficient lookup of
+ * features overlapping a given range.
+ *
+ * @author gmcarstairs
+ *
+ */
+public class FeatureStore
+{
+ Comparator<ContiguousI> startOrdering = new RangeComparator(true);
+
+ Comparator<ContiguousI> endOrdering = new RangeComparator(false);
+
+ /*
+ * An ordered list of features, with the promise that no feature in the list
+ * properly contains any other. This constraint allows bounded linear search
+ * of the list for features overlapping a region.
+ * Contact features are not included in this list.
+ */
+ List<SequenceFeature> nonNestedFeatures;
+
+ /*
+ * contact features ordered by first contact position
+ */
+ List<SequenceFeature> contactFeatureStarts;
+
+ /*
+ * contact features ordered by second contact position
+ */
+ List<SequenceFeature> contactFeatureEnds;
+
+ /*
+ * Nested Containment List is used to hold any features that are nested
+ * within (properly contained by) any other feature. This is a recursive tree
+ * which supports depth-first scan for features overlapping a range.
+ * It is used here as a 'catch-all' fallback for features that cannot be put
+ * into a simple ordered list without invalidating the search methods.
+ */
+ NCList<SequenceFeature> nestedFeatures;
+
+ /**
+ * Constructor
+ */
+ public FeatureStore()
+ {
+ nonNestedFeatures = new ArrayList<SequenceFeature>();
+ // we only construct contactFeatures and the NCList if we need to
+ }
+
+ /**
+ * Add one entry to the data store
+ *
+ * @param feature
+ */
+ public void addFeature(SequenceFeature feature)
+ {
+ if (feature.isContactFeature())
+ {
+ addContactFeature(feature);
+ }
+ else
+ {
+ boolean added = addNonNestedFeature(feature);
+ if (!added)
+ {
+ /*
+ * detected a nested feature - put it in the NCList structure
+ */
+ addNestedFeature(feature);
+ }
+ }
+ }
+
+ /**
+ * Adds one feature to the NCList that can manage nested features (creating
+ * the NCList if necessary)
+ */
+ protected synchronized void addNestedFeature(SequenceFeature feature)
+ {
+ if (nestedFeatures == null)
+ {
+ nestedFeatures = new NCList<SequenceFeature>(feature);
+ }
+ else
+ {
+ nestedFeatures.add(feature);
+ }
+ }
+
+ /**
+ * Add a feature to the list of non-nested features, maintaining the ordering
+ * of the list. A check is made for whether the feature is nested in (properly
+ * contained by) an existing feature. If there is no nesting, the feature is
+ * added to the list and the method returns true. If nesting is found, the
+ * feature is not added and the method returns false.
+ * <p>
+ * Contact features are added at the position of their first contact point
+ *
+ * @param feature
+ * @return
+ */
+ protected boolean addNonNestedFeature(SequenceFeature feature)
+ {
+ synchronized (nonNestedFeatures)
+ {
+ int insertPosition = binarySearchForAdd(nonNestedFeatures, feature);
+
+ /*
+ * fail if we detect feature enclosure - of the new feature by
+ * the one preceding it, or of the next feature by the new one
+ */
+ if (insertPosition > 0)
+ {
+ if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
+ {
+ return false;
+ }
+ }
+ if (insertPosition < nonNestedFeatures.size())
+ {
+ if (encloses(feature, nonNestedFeatures.get(insertPosition)))
+ {
+ return false;
+ }
+ }
+
+ /*
+ * checks passed - add or append the feature
+ */
+ if (insertPosition == nonNestedFeatures.size())
+ {
+ nonNestedFeatures.add(feature);
+ }
+ else
+ {
+ nonNestedFeatures.add(insertPosition, feature);
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Answers true if range1 properly encloses range2, else false
+ *
+ * @param range1
+ * @param range2
+ * @return
+ */
+ protected boolean encloses(ContiguousI range1, ContiguousI range2)
+ {
+ int begin1 = range1.getBegin();
+ int begin2 = range2.getBegin();
+ int end1 = range1.getEnd();
+ int end2 = range2.getEnd();
+ if (begin1 == begin2 && end1 > end2)
+ {
+ return true;
+ }
+ if (begin1 < begin2 && end1 >= end2)
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Answers the index of the first element in the given list which follows or
+ * matches the given feature in the sort order. If no such element, answers
+ * the length of the list.
+ *
+ * @param list
+ * @param feature
+ *
+ * @return
+ */
+ protected int binarySearchForAdd(List<SequenceFeature> list, SequenceFeature feature)
+ {
+ // TODO binary search!
+ int i = 0;
+ while (i < list.size())
+ {
+ if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0)
+ {
+ break;
+ }
+ i++;
+ }
+ return i;
+ }
+
+ /**
+ * Add a contact feature to the lists that hold them ordered by start (first
+ * contact) and by end (second contact) position, ensuring the lists remain
+ * ordered
+ *
+ * @param feature
+ */
+ protected synchronized void addContactFeature(SequenceFeature feature)
+ {
+ // TODO binary search for insertion points!
+ if (contactFeatureStarts == null)
+ {
+ contactFeatureStarts = new ArrayList<SequenceFeature>();
+ }
+ if (contactFeatureEnds == null)
+ {
+ contactFeatureEnds = new ArrayList<SequenceFeature>();
+ }
+ contactFeatureStarts.add(feature);
+ Collections.sort(contactFeatureStarts, startOrdering);
+ contactFeatureEnds.add(feature);
+ Collections.sort(contactFeatureEnds, endOrdering);
+ }
+
+ /**
+ * Returns a (possibly empty) list of entries whose range overlaps the given
+ * range. The returned list is not ordered. Contact features are included if
+ * either of the contact points lies within the range.
+ *
+ * @param start
+ * start position of overlap range (inclusive)
+ * @param end
+ * end position of overlap range (inclusive)
+ * @return
+ */
+ public List<SequenceFeature> findOverlappingFeatures(long start, long end)
+ {
+ List<SequenceFeature> result = new ArrayList<SequenceFeature>();
+
+ findNonNestedFeatures(start, end, result);
+
+ findContactFeatures(start, end, result);
+
+ if (nestedFeatures != null)
+ {
+ result.addAll(nestedFeatures.findOverlaps(start, end));
+ }
+
+ return result;
+ }
+
+ /**
+ * Adds contact features to the result list where either the second or the
+ * first contact position lies within the target range.
+ *
+ * @param from
+ * @param to
+ * @param result
+ */
+ protected void findContactFeatures(long from, long to,
+ List<SequenceFeature> result)
+ {
+ if (contactFeatureStarts != null)
+ {
+ findContactStartFeatures(from, to, result);
+ }
+ if (contactFeatureEnds != null)
+ {
+ findContactEndFeatures(from, to, result);
+ }
+ }
+
+ /**
+ * @param from
+ * @param to
+ * @param result
+ */
+ protected void findContactEndFeatures(long from, long to,
+ List<SequenceFeature> result)
+ {
+ // TODO binary search for startPosition
+ for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++)
+ {
+ SequenceFeature sf = contactFeatureEnds.get(startPosition);
+ if (!sf.isContactFeature())
+ {
+ System.err.println("Error! non-contact feature type "
+ + sf.getType() + " in contact features list");
+ continue;
+ }
+ int begin = sf.getBegin();
+ if (begin >= from && begin <= to)
+ {
+ /*
+ * this feature's first contact position lies in the search range
+ * so we don't include it in results a second time
+ */
+ continue;
+ }
+ int end = sf.getEnd();
+ if (end >= from && end <= to)
+ {
+ result.add(sf);
+ }
+ }
+ }
+
+ /**
+ * Returns the index of the first contact feature found whose end (second
+ * contact position) is not before the given start position. If no such
+ * feature is found, returns the length of the contact features list.
+ *
+ * @param start
+ * @return
+ */
+ protected int contactsBinarySearch(long start)
+ {
+ // TODO binary search!!
+ int i = 0;
+ while (i < contactFeatureEnds.size())
+ {
+ if (contactFeatureEnds.get(i).getEnd() >= start)
+ {
+ break;
+ }
+ i++;
+ }
+
+ return i;
+ }
+
+ /**
+ * Adds features to the result list that are at a single position which lies
+ * within the target range. Non-positional features (start=end=0) and contact
+ * features are excluded.
+ *
+ * @param from
+ * @param to
+ * @param result
+ */
+ protected void findNonNestedFeatures(long from, long to,
+ List<SequenceFeature> result)
+ {
+ int startIndex = binarySearch(nonNestedFeatures, from);
+ findNonNestedFeatures(startIndex, from, to, result);
+ }
+
+ /**
+ * Scans the list of non-nested features, starting from startIndex, to find
+ * those that overlap the from-to range, and adds them to the result list.
+ * Returns the index of the first feature whose start position is after the
+ * target range (or the length of the whole list if none such feature exists).
+ *
+ * @param startIndex
+ * @param from
+ * @param to
+ * @param result
+ * @return
+ */
+ protected int findNonNestedFeatures(final int startIndex, long from,
+ long to,
+ List<SequenceFeature> result)
+ {
+ int i = startIndex;
+ while (i < nonNestedFeatures.size())
+ {
+ SequenceFeature sf = nonNestedFeatures.get(i);
+ if (sf.getBegin() > to)
+ {
+ break;
+ }
+ int start = sf.getBegin();
+ int end = sf.getEnd();
+ if (sf.isContactFeature())
+ {
+ end = start;
+ }
+ if (start <= to && end >= from)
+ {
+ result.add(sf);
+ }
+ i++;
+ }
+ return i;
+ }
+
+ /**
+ * Performs a binary search of the (sorted) list to find the index of the
+ * first entry whose end position is not less than the target position (i.e.
+ * skip all features that properly precede the given position)
+ *
+ * @param features
+ * @param target
+ * @return
+ */
+ protected int binarySearch(List<SequenceFeature> features, long target)
+ {
+ int width = features.size() / 2;
+ int lastpos = width;
+ while (width > 0)
+ {
+ int end = features.get(lastpos).getEnd();
+ width = width / 2;
+ if (end > target)
+ {
+ lastpos -= width;
+ }
+ else
+ {
+ lastpos += width;
+ }
+ }
+ // todo correct binary search
+ return lastpos > 1 ? lastpos - 2 : 0;
+ // return lastpos;
+ }
+
+ /**
+ * Adds contact features whose start position lies in the from-to range to the
+ * result list
+ *
+ * @param from
+ * @param to
+ * @param result
+ */
+ protected void findContactStartFeatures(long from, long to,
+ List<SequenceFeature> result)
+ {
+ // TODO binary search for startPosition
+ for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++)
+ {
+ SequenceFeature sf = contactFeatureStarts.get(startPosition);
+ if (!sf.isContactFeature())
+ {
+ System.err.println("Error! non-contact feature type "
+ + sf.getType() + " in contact features list");
+ continue;
+ }
+ int begin = sf.getBegin();
+ if (begin >= from && begin <= to)
+ {
+ result.add(sf);
+ }
+ }
+ }
+}