1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
11 * A data store for a set of sequence features that supports efficient lookup of
12 * features overlapping a given range. Intended for (but not limited to) storage
13 * of features for one sequence and feature type.
18 public class FeatureStore
20 Comparator<ContiguousI> startOrdering = new RangeComparator(true);
22 Comparator<ContiguousI> endOrdering = new RangeComparator(false);
25 * Non-positional features have no (zero) start/end position.
26 * Kept as a separate list in case this criterion changes in future.
28 List<SequenceFeature> nonPositionalFeatures;
31 * An ordered list of features, with the promise that no feature in the list
32 * properly contains any other. This constraint allows bounded linear search
33 * of the list for features overlapping a region.
34 * Contact features are not included in this list.
36 List<SequenceFeature> nonNestedFeatures;
39 * contact features ordered by first contact position
41 List<SequenceFeature> contactFeatureStarts;
44 * contact features ordered by second contact position
46 List<SequenceFeature> contactFeatureEnds;
49 * Nested Containment List is used to hold any features that are nested
50 * within (properly contained by) any other feature. This is a recursive tree
51 * which supports depth-first scan for features overlapping a range.
52 * It is used here as a 'catch-all' fallback for features that cannot be put
53 * into a simple ordered list without invalidating the search methods.
55 NCList<SequenceFeature> nestedFeatures;
62 nonNestedFeatures = new ArrayList<SequenceFeature>();
63 // we only construct nonPositionalFeatures, contactFeatures
64 // or the NCList if we need to
68 * Adds one sequence feature to the store, and returns true, unless the
69 * feature is already contained in the store, in which case this method
70 * returns false. Containment is determined by SequenceFeature.equals()
75 public boolean addFeature(SequenceFeature feature)
77 boolean added = false;
79 if (feature.isContactFeature())
81 added = addContactFeature(feature);
83 else if (feature.isNonPositional())
85 added = addNonPositionalFeature(feature);
89 if (!nonNestedFeatures.contains(feature))
91 added = addNonNestedFeature(feature);
95 * detected a nested feature - put it in the NCList structure
97 added = addNestedFeature(feature);
106 * Adds the feature to the list of non-positional features (with lazy
107 * instantiation of the list if it is null), and returns true. If the
108 * non-positional features already include the new feature (by equality test),
109 * then it is not added, and this method returns false.
113 protected boolean addNonPositionalFeature(SequenceFeature feature)
115 if (nonPositionalFeatures == null)
117 nonPositionalFeatures = new ArrayList<SequenceFeature>();
119 if (nonPositionalFeatures.contains(feature))
123 nonPositionalFeatures.add(feature);
128 * Adds one feature to the NCList that can manage nested features (creating
129 * the NCList if necessary), and returns true. If the feature is already
130 * stored in the NCList (by equality test), then it is not added, and this
131 * method returns false.
133 protected synchronized boolean addNestedFeature(SequenceFeature feature)
135 if (nestedFeatures == null)
137 nestedFeatures = new NCList<SequenceFeature>(feature);
140 return nestedFeatures.add(feature, false);
144 * Add a feature to the list of non-nested features, maintaining the ordering
145 * of the list. A check is made for whether the feature is nested in (properly
146 * contained by) an existing feature. If there is no nesting, the feature is
147 * added to the list and the method returns true. If nesting is found, the
148 * feature is not added and the method returns false.
150 * Contact features are added at the position of their first contact point
155 protected boolean addNonNestedFeature(SequenceFeature feature)
157 synchronized (nonNestedFeatures)
159 int insertPosition = binarySearchForAdd(nonNestedFeatures, feature);
162 * fail if we detect feature enclosure - of the new feature by
163 * the one preceding it, or of the next feature by the new one
165 if (insertPosition > 0)
167 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
172 if (insertPosition < nonNestedFeatures.size())
174 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
181 * checks passed - add or append the feature
183 if (insertPosition == nonNestedFeatures.size())
185 nonNestedFeatures.add(feature);
189 nonNestedFeatures.add(insertPosition, feature);
196 * Answers true if range1 properly encloses range2, else false
202 protected boolean encloses(ContiguousI range1, ContiguousI range2)
204 int begin1 = range1.getBegin();
205 int begin2 = range2.getBegin();
206 int end1 = range1.getEnd();
207 int end2 = range2.getEnd();
208 if (begin1 == begin2 && end1 > end2)
212 if (begin1 < begin2 && end1 >= end2)
220 * Answers the index of the first element in the given list which follows or
221 * matches the given feature in the sort order. If no such element, answers
222 * the length of the list.
229 protected int binarySearchForAdd(List<SequenceFeature> list, SequenceFeature feature)
231 // TODO binary search!
233 while (i < list.size())
235 if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0)
245 * Add a contact feature to the lists that hold them ordered by start (first
246 * contact) and by end (second contact) position, ensuring the lists remain
247 * ordered, and returns true. If the contact feature lists already contain the
248 * given feature (by test for equality), does not add it and returns false.
253 protected synchronized boolean addContactFeature(SequenceFeature feature)
255 if (contactFeatureStarts == null)
257 contactFeatureStarts = new ArrayList<SequenceFeature>();
259 if (contactFeatureEnds == null)
261 contactFeatureEnds = new ArrayList<SequenceFeature>();
264 // TODO binary search for insertion points!
265 if (contactFeatureStarts.contains(feature))
270 contactFeatureStarts.add(feature);
271 Collections.sort(contactFeatureStarts, startOrdering);
272 contactFeatureEnds.add(feature);
273 Collections.sort(contactFeatureEnds, endOrdering);
279 * Returns a (possibly empty) list of entries whose range overlaps the given
280 * range. The returned list is not ordered. Contact features are included if
281 * either of the contact points lies within the range.
284 * start position of overlap range (inclusive)
286 * end position of overlap range (inclusive)
289 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
291 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
293 findNonNestedFeatures(start, end, result);
295 findContactFeatures(start, end, result);
297 if (nestedFeatures != null)
299 result.addAll(nestedFeatures.findOverlaps(start, end));
306 * Adds contact features to the result list where either the second or the
307 * first contact position lies within the target range.
313 protected void findContactFeatures(long from, long to,
314 List<SequenceFeature> result)
316 if (contactFeatureStarts != null)
318 findContactStartFeatures(from, to, result);
320 if (contactFeatureEnds != null)
322 findContactEndFeatures(from, to, result);
331 protected void findContactEndFeatures(long from, long to,
332 List<SequenceFeature> result)
334 // TODO binary search for startPosition
335 for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++)
337 SequenceFeature sf = contactFeatureEnds.get(startPosition);
338 if (!sf.isContactFeature())
340 System.err.println("Error! non-contact feature type "
341 + sf.getType() + " in contact features list");
344 int begin = sf.getBegin();
345 if (begin >= from && begin <= to)
348 * this feature's first contact position lies in the search range
349 * so we don't include it in results a second time
353 int end = sf.getEnd();
354 if (end >= from && end <= to)
362 * Returns the index of the first contact feature found whose end (second
363 * contact position) is not before the given start position. If no such
364 * feature is found, returns the length of the contact features list.
369 protected int contactsBinarySearch(long start)
371 // TODO binary search!!
373 while (i < contactFeatureEnds.size())
375 if (contactFeatureEnds.get(i).getEnd() >= start)
386 * Adds features to the result list that are at a single position which lies
387 * within the target range. Non-positional features (start=end=0) and contact
388 * features are excluded.
394 protected void findNonNestedFeatures(long from, long to,
395 List<SequenceFeature> result)
397 int startIndex = binarySearch(nonNestedFeatures, from);
398 findNonNestedFeatures(startIndex, from, to, result);
402 * Scans the list of non-nested features, starting from startIndex, to find
403 * those that overlap the from-to range, and adds them to the result list.
404 * Returns the index of the first feature whose start position is after the
405 * target range (or the length of the whole list if none such feature exists).
413 protected int findNonNestedFeatures(final int startIndex, long from,
415 List<SequenceFeature> result)
418 while (i < nonNestedFeatures.size())
420 SequenceFeature sf = nonNestedFeatures.get(i);
421 if (sf.getBegin() > to)
425 int start = sf.getBegin();
426 int end = sf.getEnd();
427 if (start <= to && end >= from)
437 * Performs a binary search of the (sorted) list to find the index of the
438 * first entry whose end position is not less than the target position (i.e.
439 * skip all features that properly precede the given position)
445 protected int binarySearch(List<SequenceFeature> features, long target)
447 int width = features.size() / 2;
451 int end = features.get(lastpos).getEnd();
462 // todo correct binary search
463 return lastpos > 1 ? lastpos - 2 : 0;
468 * Adds contact features whose start position lies in the from-to range to the
475 protected void findContactStartFeatures(long from, long to,
476 List<SequenceFeature> result)
478 // TODO binary search for startPosition
479 for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++)
481 SequenceFeature sf = contactFeatureStarts.get(startPosition);
482 if (!sf.isContactFeature())
484 System.err.println("Error! non-contact feature type "
485 + sf.getType() + " in contact features list");
488 int begin = sf.getBegin();
489 if (begin >= from && begin <= to)
497 * Answers a list of all features stored (including any non-positional
498 * features), in no guaranteed order
502 public List<SequenceFeature> getFeatures()
505 * add non-nested features (may be all features for many cases)
507 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
508 result.addAll(nonNestedFeatures);
511 * add any contact features - from the list by start position
513 if (contactFeatureStarts != null)
515 result.addAll(contactFeatureStarts);
519 * add any non-positional features
521 if (nonPositionalFeatures != null)
523 result.addAll(nonPositionalFeatures);
527 * add any nested features
529 if (nestedFeatures != null)
531 result.addAll(nestedFeatures.getEntries());
538 * Answers a list of all contact features. If there are none, returns an
539 * immutable empty list.
543 public List<SequenceFeature> getContactFeatures()
545 if (contactFeatureStarts == null)
547 return Collections.emptyList();
549 return new ArrayList<SequenceFeature>(contactFeatureStarts);
553 * Answers a list of all non-positional features. If there are none, returns
554 * an immutable empty list.
558 public List<SequenceFeature> getNonPositionalFeatures()
560 if (nonPositionalFeatures == null)
562 return Collections.emptyList();
564 return new ArrayList<SequenceFeature>(nonPositionalFeatures);
568 * Deletes the given feature from the store, returning true if it was found
569 * (and deleted), else false. This method makes no assumption that the feature
570 * is in the 'expected' place in the store, in case it has been modified since
575 public boolean delete(SequenceFeature sf)
578 * try the non-nested positional features first
580 boolean removed = nonNestedFeatures.remove(sf);
583 * if not found, try contact positions (and if found, delete
584 * from both lists of contact positions)
586 if (!removed && contactFeatureStarts != null)
588 removed = contactFeatureStarts.remove(sf);
591 contactFeatureEnds.remove(sf);
596 * if not found, try non-positional features
598 if (!removed && nonPositionalFeatures != null)
600 removed = nonPositionalFeatures.remove(sf);
604 * if not found, try nested features
606 if (!removed && nestedFeatures != null)
608 removed = nestedFeatures.delete(sf);
615 * Answers true if this store has no features, else false
619 public boolean isEmpty()
621 boolean hasFeatures = !nonNestedFeatures.isEmpty()
622 || (contactFeatureStarts != null && !contactFeatureStarts
624 || (nonPositionalFeatures != null && !nonPositionalFeatures
626 || (nestedFeatures != null && nestedFeatures.size() > 0);