1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
11 * A data store for a set of sequence features that supports efficient lookup of
12 * features overlapping a given range. Intended for (but not limited to) storage
13 * of features for one sequence and feature type.
18 public class FeatureStore
20 Comparator<ContiguousI> startOrdering = new RangeComparator(true);
22 Comparator<ContiguousI> endOrdering = new RangeComparator(false);
25 * Non-positional features have no (zero) start/end position.
27 List<SequenceFeature> nonPositionalFeatures;
30 * An ordered list of features, with the promise that no feature in the list
31 * properly contains any other. This constraint allows bounded linear search
32 * of the list for features overlapping a region.
33 * Contact features are not included in this list.
35 List<SequenceFeature> nonNestedFeatures;
38 * contact features ordered by first contact position
40 List<SequenceFeature> contactFeatureStarts;
43 * contact features ordered by second contact position
45 List<SequenceFeature> contactFeatureEnds;
48 * Nested Containment List is used to hold any features that are nested
49 * within (properly contained by) any other feature. This is a recursive tree
50 * which supports depth-first scan for features overlapping a range.
51 * It is used here as a 'catch-all' fallback for features that cannot be put
52 * into a simple ordered list without invalidating the search methods.
54 NCList<SequenceFeature> nestedFeatures;
61 nonNestedFeatures = new ArrayList<SequenceFeature>();
62 // we only construct nonPositionalFeatures, contactFeatures
63 // or the NCList if we need to
67 * Adds one sequence feature to the store, and returns true, unless the
68 * feature is already contained in the store, in which case this method
69 * returns false. Containment is determined by SequenceFeature.equals()
74 public boolean addFeature(SequenceFeature feature)
76 boolean added = false;
78 if (feature.isContactFeature())
80 added = addContactFeature(feature);
82 else if (feature.isNonPositional())
84 added = addNonPositionalFeature(feature);
88 if (!nonNestedFeatures.contains(feature))
90 added = addNonNestedFeature(feature);
94 * detected a nested feature - put it in the NCList structure
96 added = addNestedFeature(feature);
105 * Adds the feature to the list of non-positional features (with lazy
106 * instantiation of the list if it is null), and returns true. If the
107 * non-positional features already include the new feature (by equality test),
108 * then it is not added, and this method returns false.
112 protected boolean addNonPositionalFeature(SequenceFeature feature)
114 if (nonPositionalFeatures == null)
116 nonPositionalFeatures = new ArrayList<SequenceFeature>();
118 if (nonPositionalFeatures.contains(feature))
122 nonPositionalFeatures.add(feature);
127 * Adds one feature to the NCList that can manage nested features (creating
128 * the NCList if necessary), and returns true. If the feature is already
129 * stored in the NCList (by equality test), then it is not added, and this
130 * method returns false.
132 protected synchronized boolean addNestedFeature(SequenceFeature feature)
134 if (nestedFeatures == null)
136 nestedFeatures = new NCList<SequenceFeature>(feature);
139 return nestedFeatures.add(feature, false);
143 * Add a feature to the list of non-nested features, maintaining the ordering
144 * of the list. A check is made for whether the feature is nested in (properly
145 * contained by) an existing feature. If there is no nesting, the feature is
146 * added to the list and the method returns true. If nesting is found, the
147 * feature is not added and the method returns false.
149 * Contact features are added at the position of their first contact point
154 protected boolean addNonNestedFeature(SequenceFeature feature)
156 synchronized (nonNestedFeatures)
158 int insertPosition = binarySearchForAdd(nonNestedFeatures, feature);
161 * fail if we detect feature enclosure - of the new feature by
162 * the one preceding it, or of the next feature by the new one
164 if (insertPosition > 0)
166 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
171 if (insertPosition < nonNestedFeatures.size())
173 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
180 * checks passed - add or append the feature
182 if (insertPosition == nonNestedFeatures.size())
184 nonNestedFeatures.add(feature);
188 nonNestedFeatures.add(insertPosition, feature);
195 * Answers true if range1 properly encloses range2, else false
201 protected boolean encloses(ContiguousI range1, ContiguousI range2)
203 int begin1 = range1.getBegin();
204 int begin2 = range2.getBegin();
205 int end1 = range1.getEnd();
206 int end2 = range2.getEnd();
207 if (begin1 == begin2 && end1 > end2)
211 if (begin1 < begin2 && end1 >= end2)
219 * Answers the index of the first element in the given list which follows or
220 * matches the given feature in the sort order. If no such element, answers
221 * the length of the list.
228 protected int binarySearchForAdd(List<SequenceFeature> list, SequenceFeature feature)
230 // TODO binary search!
232 while (i < list.size())
234 if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0)
244 * Add a contact feature to the lists that hold them ordered by start (first
245 * contact) and by end (second contact) position, ensuring the lists remain
246 * ordered, and returns true. If the contact feature lists already contain the
247 * given feature (by test for equality), does not add it and returns false.
252 protected synchronized boolean addContactFeature(SequenceFeature feature)
254 if (contactFeatureStarts == null)
256 contactFeatureStarts = new ArrayList<SequenceFeature>();
258 if (contactFeatureEnds == null)
260 contactFeatureEnds = new ArrayList<SequenceFeature>();
263 // TODO binary search for insertion points!
264 if (contactFeatureStarts.contains(feature))
269 contactFeatureStarts.add(feature);
270 Collections.sort(contactFeatureStarts, startOrdering);
271 contactFeatureEnds.add(feature);
272 Collections.sort(contactFeatureEnds, endOrdering);
278 * Returns a (possibly empty) list of entries whose range overlaps the given
279 * range. The returned list is not ordered. Contact features are included if
280 * either of the contact points lies within the range.
283 * start position of overlap range (inclusive)
285 * end position of overlap range (inclusive)
288 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
290 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
292 findNonNestedFeatures(start, end, result);
294 findContactFeatures(start, end, result);
296 if (nestedFeatures != null)
298 result.addAll(nestedFeatures.findOverlaps(start, end));
305 * Adds contact features to the result list where either the second or the
306 * first contact position lies within the target range.
312 protected void findContactFeatures(long from, long to,
313 List<SequenceFeature> result)
315 if (contactFeatureStarts != null)
317 findContactStartFeatures(from, to, result);
319 if (contactFeatureEnds != null)
321 findContactEndFeatures(from, to, result);
330 protected void findContactEndFeatures(long from, long to,
331 List<SequenceFeature> result)
333 // TODO binary search for startPosition
334 for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++)
336 SequenceFeature sf = contactFeatureEnds.get(startPosition);
337 if (!sf.isContactFeature())
339 System.err.println("Error! non-contact feature type "
340 + sf.getType() + " in contact features list");
343 int begin = sf.getBegin();
344 if (begin >= from && begin <= to)
347 * this feature's first contact position lies in the search range
348 * so we don't include it in results a second time
352 int end = sf.getEnd();
353 if (end >= from && end <= to)
361 * Returns the index of the first contact feature found whose end (second
362 * contact position) is not before the given start position. If no such
363 * feature is found, returns the length of the contact features list.
368 protected int contactsBinarySearch(long start)
370 // TODO binary search!!
372 while (i < contactFeatureEnds.size())
374 if (contactFeatureEnds.get(i).getEnd() >= start)
385 * Adds features to the result list that are at a single position which lies
386 * within the target range. Non-positional features (start=end=0) and contact
387 * features are excluded.
393 protected void findNonNestedFeatures(long from, long to,
394 List<SequenceFeature> result)
396 int startIndex = binarySearch(nonNestedFeatures, from);
397 findNonNestedFeatures(startIndex, from, to, result);
401 * Scans the list of non-nested features, starting from startIndex, to find
402 * those that overlap the from-to range, and adds them to the result list.
403 * Returns the index of the first feature whose start position is after the
404 * target range (or the length of the whole list if none such feature exists).
412 protected int findNonNestedFeatures(final int startIndex, long from,
414 List<SequenceFeature> result)
417 while (i < nonNestedFeatures.size())
419 SequenceFeature sf = nonNestedFeatures.get(i);
420 if (sf.getBegin() > to)
424 int start = sf.getBegin();
425 int end = sf.getEnd();
426 if (start <= to && end >= from)
436 * Performs a binary search of the (sorted) list to find the index of the
437 * first entry whose end position is not less than the target position (i.e.
438 * skip all features that properly precede the given position)
444 protected int binarySearch(List<SequenceFeature> features, long target)
446 int width = features.size() / 2;
450 int end = features.get(lastpos).getEnd();
461 // todo correct binary search
462 return lastpos > 1 ? lastpos - 2 : 0;
467 * Adds contact features whose start position lies in the from-to range to the
474 protected void findContactStartFeatures(long from, long to,
475 List<SequenceFeature> result)
477 // TODO binary search for startPosition
478 for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++)
480 SequenceFeature sf = contactFeatureStarts.get(startPosition);
481 if (!sf.isContactFeature())
483 System.err.println("Error! non-contact feature type "
484 + sf.getType() + " in contact features list");
487 int begin = sf.getBegin();
488 if (begin >= from && begin <= to)
496 * Answers a list of all features stored (including any non-positional
497 * features), in no guaranteed order
501 public List<SequenceFeature> getFeatures()
504 * add non-nested features (may be all features for many cases)
506 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
507 result.addAll(nonNestedFeatures);
510 * add any contact features - from the list by start position
512 if (contactFeatureStarts != null)
514 result.addAll(contactFeatureStarts);
518 * add any non-positional features
520 if (nonPositionalFeatures != null)
522 result.addAll(nonPositionalFeatures);
526 * add any nested features
528 if (nestedFeatures != null)
530 result.addAll(nestedFeatures.getEntries());
537 * Answers a list of all contact features. If there are none, returns an
538 * immutable empty list.
542 public List<SequenceFeature> getContactFeatures()
544 if (contactFeatureStarts == null)
546 return Collections.emptyList();
548 return new ArrayList<SequenceFeature>(contactFeatureStarts);
552 * Answers a list of all non-positional features. If there are none, returns
553 * an immutable empty list.
557 public List<SequenceFeature> getNonPositionalFeatures()
559 if (nonPositionalFeatures == null)
561 return Collections.emptyList();
563 return new ArrayList<SequenceFeature>(nonPositionalFeatures);
567 * Deletes the given feature from the store, returning true if it was found
568 * (and deleted), else false. This method makes no assumption that the feature
569 * is in the 'expected' place in the store, in case it has been modified since
574 public boolean delete(SequenceFeature sf)
577 * try the non-nested positional features first
579 boolean removed = nonNestedFeatures.remove(sf);
582 * if not found, try contact positions (and if found, delete
583 * from both lists of contact positions)
585 if (!removed && contactFeatureStarts != null)
587 removed = contactFeatureStarts.remove(sf);
590 contactFeatureEnds.remove(sf);
595 * if not found, try non-positional features
597 if (!removed && nonPositionalFeatures != null)
599 removed = nonPositionalFeatures.remove(sf);
603 * if not found, try nested features
605 if (!removed && nestedFeatures != null)
607 removed = nestedFeatures.delete(sf);