1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
11 * A data store for a set of sequence features that supports efficient lookup of
12 * features overlapping a given range.
17 public class FeatureStore
19 Comparator<ContiguousI> startOrdering = new RangeComparator(true);
21 Comparator<ContiguousI> endOrdering = new RangeComparator(false);
24 * An ordered list of features, with the promise that no feature in the list
25 * properly contains any other. This constraint allows bounded linear search
26 * of the list for features overlapping a region.
27 * Contact features are not included in this list.
29 List<SequenceFeature> nonNestedFeatures;
32 * contact features ordered by first contact position
34 List<SequenceFeature> contactFeatureStarts;
37 * contact features ordered by second contact position
39 List<SequenceFeature> contactFeatureEnds;
42 * Nested Containment List is used to hold any features that are nested
43 * within (properly contained by) any other feature. This is a recursive tree
44 * which supports depth-first scan for features overlapping a range.
45 * It is used here as a 'catch-all' fallback for features that cannot be put
46 * into a simple ordered list without invalidating the search methods.
48 NCList<SequenceFeature> nestedFeatures;
55 nonNestedFeatures = new ArrayList<SequenceFeature>();
56 // we only construct contactFeatures and the NCList if we need to
60 * Add one entry to the data store
64 public void addFeature(SequenceFeature feature)
66 if (feature.isContactFeature())
68 addContactFeature(feature);
72 boolean added = addNonNestedFeature(feature);
76 * detected a nested feature - put it in the NCList structure
78 addNestedFeature(feature);
84 * Adds one feature to the NCList that can manage nested features (creating
85 * the NCList if necessary)
87 protected synchronized void addNestedFeature(SequenceFeature feature)
89 if (nestedFeatures == null)
91 nestedFeatures = new NCList<SequenceFeature>(feature);
95 nestedFeatures.add(feature);
100 * Add a feature to the list of non-nested features, maintaining the ordering
101 * of the list. A check is made for whether the feature is nested in (properly
102 * contained by) an existing feature. If there is no nesting, the feature is
103 * added to the list and the method returns true. If nesting is found, the
104 * feature is not added and the method returns false.
106 * Contact features are added at the position of their first contact point
111 protected boolean addNonNestedFeature(SequenceFeature feature)
113 synchronized (nonNestedFeatures)
115 int insertPosition = binarySearchForAdd(nonNestedFeatures, feature);
118 * fail if we detect feature enclosure - of the new feature by
119 * the one preceding it, or of the next feature by the new one
121 if (insertPosition > 0)
123 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
128 if (insertPosition < nonNestedFeatures.size())
130 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
137 * checks passed - add or append the feature
139 if (insertPosition == nonNestedFeatures.size())
141 nonNestedFeatures.add(feature);
145 nonNestedFeatures.add(insertPosition, feature);
152 * Answers true if range1 properly encloses range2, else false
158 protected boolean encloses(ContiguousI range1, ContiguousI range2)
160 int begin1 = range1.getBegin();
161 int begin2 = range2.getBegin();
162 int end1 = range1.getEnd();
163 int end2 = range2.getEnd();
164 if (begin1 == begin2 && end1 > end2)
168 if (begin1 < begin2 && end1 >= end2)
176 * Answers the index of the first element in the given list which follows or
177 * matches the given feature in the sort order. If no such element, answers
178 * the length of the list.
185 protected int binarySearchForAdd(List<SequenceFeature> list, SequenceFeature feature)
187 // TODO binary search!
189 while (i < list.size())
191 if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0)
201 * Add a contact feature to the lists that hold them ordered by start (first
202 * contact) and by end (second contact) position, ensuring the lists remain
207 protected synchronized void addContactFeature(SequenceFeature feature)
209 // TODO binary search for insertion points!
210 if (contactFeatureStarts == null)
212 contactFeatureStarts = new ArrayList<SequenceFeature>();
214 if (contactFeatureEnds == null)
216 contactFeatureEnds = new ArrayList<SequenceFeature>();
218 contactFeatureStarts.add(feature);
219 Collections.sort(contactFeatureStarts, startOrdering);
220 contactFeatureEnds.add(feature);
221 Collections.sort(contactFeatureEnds, endOrdering);
225 * Returns a (possibly empty) list of entries whose range overlaps the given
226 * range. The returned list is not ordered. Contact features are included if
227 * either of the contact points lies within the range.
230 * start position of overlap range (inclusive)
232 * end position of overlap range (inclusive)
235 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
237 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
239 findNonNestedFeatures(start, end, result);
241 findContactFeatures(start, end, result);
243 if (nestedFeatures != null)
245 result.addAll(nestedFeatures.findOverlaps(start, end));
252 * Adds contact features to the result list where either the second or the
253 * first contact position lies within the target range.
259 protected void findContactFeatures(long from, long to,
260 List<SequenceFeature> result)
262 if (contactFeatureStarts != null)
264 findContactStartFeatures(from, to, result);
266 if (contactFeatureEnds != null)
268 findContactEndFeatures(from, to, result);
277 protected void findContactEndFeatures(long from, long to,
278 List<SequenceFeature> result)
280 // TODO binary search for startPosition
281 for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++)
283 SequenceFeature sf = contactFeatureEnds.get(startPosition);
284 if (!sf.isContactFeature())
286 System.err.println("Error! non-contact feature type "
287 + sf.getType() + " in contact features list");
290 int begin = sf.getBegin();
291 if (begin >= from && begin <= to)
294 * this feature's first contact position lies in the search range
295 * so we don't include it in results a second time
299 int end = sf.getEnd();
300 if (end >= from && end <= to)
308 * Returns the index of the first contact feature found whose end (second
309 * contact position) is not before the given start position. If no such
310 * feature is found, returns the length of the contact features list.
315 protected int contactsBinarySearch(long start)
317 // TODO binary search!!
319 while (i < contactFeatureEnds.size())
321 if (contactFeatureEnds.get(i).getEnd() >= start)
332 * Adds features to the result list that are at a single position which lies
333 * within the target range. Non-positional features (start=end=0) and contact
334 * features are excluded.
340 protected void findNonNestedFeatures(long from, long to,
341 List<SequenceFeature> result)
343 int startIndex = binarySearch(nonNestedFeatures, from);
344 findNonNestedFeatures(startIndex, from, to, result);
348 * Scans the list of non-nested features, starting from startIndex, to find
349 * those that overlap the from-to range, and adds them to the result list.
350 * Returns the index of the first feature whose start position is after the
351 * target range (or the length of the whole list if none such feature exists).
359 protected int findNonNestedFeatures(final int startIndex, long from,
361 List<SequenceFeature> result)
364 while (i < nonNestedFeatures.size())
366 SequenceFeature sf = nonNestedFeatures.get(i);
367 if (sf.getBegin() > to)
371 int start = sf.getBegin();
372 int end = sf.getEnd();
373 if (sf.isContactFeature())
377 if (start <= to && end >= from)
387 * Performs a binary search of the (sorted) list to find the index of the
388 * first entry whose end position is not less than the target position (i.e.
389 * skip all features that properly precede the given position)
395 protected int binarySearch(List<SequenceFeature> features, long target)
397 int width = features.size() / 2;
401 int end = features.get(lastpos).getEnd();
412 // todo correct binary search
413 return lastpos > 1 ? lastpos - 2 : 0;
418 * Adds contact features whose start position lies in the from-to range to the
425 protected void findContactStartFeatures(long from, long to,
426 List<SequenceFeature> result)
428 // TODO binary search for startPosition
429 for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++)
431 SequenceFeature sf = contactFeatureStarts.get(startPosition);
432 if (!sf.isContactFeature())
434 System.err.println("Error! non-contact feature type "
435 + sf.getType() + " in contact features list");
438 int begin = sf.getBegin();
439 if (begin >= from && begin <= to)