1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashSet;
13 * A data store for a set of sequence features that supports efficient lookup of
14 * features overlapping a given range. Intended for (but not limited to) storage
15 * of features for one sequence and feature type.
20 public class FeatureStore
22 Comparator<ContiguousI> startOrdering = new RangeComparator(true);
24 Comparator<ContiguousI> endOrdering = new RangeComparator(false);
27 * Non-positional features have no (zero) start/end position.
28 * Kept as a separate list in case this criterion changes in future.
30 List<SequenceFeature> nonPositionalFeatures;
33 * An ordered list of features, with the promise that no feature in the list
34 * properly contains any other. This constraint allows bounded linear search
35 * of the list for features overlapping a region.
36 * Contact features are not included in this list.
38 List<SequenceFeature> nonNestedFeatures;
41 * contact features ordered by first contact position
43 List<SequenceFeature> contactFeatureStarts;
46 * contact features ordered by second contact position
48 List<SequenceFeature> contactFeatureEnds;
51 * Nested Containment List is used to hold any features that are nested
52 * within (properly contained by) any other feature. This is a recursive tree
53 * which supports depth-first scan for features overlapping a range.
54 * It is used here as a 'catch-all' fallback for features that cannot be put
55 * into a simple ordered list without invalidating the search methods.
57 NCList<SequenceFeature> nestedFeatures;
60 * Feature groups represented in stored positional features
61 * (possibly including null)
63 Set<String> positionalFeatureGroups;
66 * Feature groups represented in stored non-positional features
67 * (possibly including null)
69 Set<String> nonPositionalFeatureGroups;
76 nonNestedFeatures = new ArrayList<SequenceFeature>();
77 positionalFeatureGroups = new HashSet<String>();
79 // we only construct nonPositionalFeatures, contactFeatures
80 // or the NCList if we need to
84 * Adds one sequence feature to the store, and returns true, unless the
85 * feature is already contained in the store, in which case this method
86 * returns false. Containment is determined by SequenceFeature.equals()
91 public boolean addFeature(SequenceFeature feature)
93 if (!feature.isNonPositional())
95 positionalFeatureGroups.add(feature.getFeatureGroup());
98 boolean added = false;
100 if (feature.isContactFeature())
102 added = addContactFeature(feature);
104 else if (feature.isNonPositional())
106 added = addNonPositionalFeature(feature);
110 if (!nonNestedFeatures.contains(feature))
112 added = addNonNestedFeature(feature);
116 * detected a nested feature - put it in the NCList structure
118 added = addNestedFeature(feature);
127 * Adds the feature to the list of non-positional features (with lazy
128 * instantiation of the list if it is null), and returns true. If the
129 * non-positional features already include the new feature (by equality test),
130 * then it is not added, and this method returns false. The feature group is
131 * added to the set of distinct feature groups for non-positional features.
135 protected boolean addNonPositionalFeature(SequenceFeature feature)
137 if (nonPositionalFeatures == null)
139 nonPositionalFeatures = new ArrayList<SequenceFeature>();
140 nonPositionalFeatureGroups = new HashSet<String>();
142 if (nonPositionalFeatures.contains(feature))
147 nonPositionalFeatures.add(feature);
149 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
155 * Adds one feature to the NCList that can manage nested features (creating
156 * the NCList if necessary), and returns true. If the feature is already
157 * stored in the NCList (by equality test), then it is not added, and this
158 * method returns false.
160 protected synchronized boolean addNestedFeature(SequenceFeature feature)
162 if (nestedFeatures == null)
164 nestedFeatures = new NCList<SequenceFeature>(feature);
167 return nestedFeatures.add(feature, false);
171 * Add a feature to the list of non-nested features, maintaining the ordering
172 * of the list. A check is made for whether the feature is nested in (properly
173 * contained by) an existing feature. If there is no nesting, the feature is
174 * added to the list and the method returns true. If nesting is found, the
175 * feature is not added and the method returns false.
177 * Contact features are added at the position of their first contact point
182 protected boolean addNonNestedFeature(SequenceFeature feature)
184 synchronized (nonNestedFeatures)
186 int insertPosition = binarySearchForAdd(nonNestedFeatures, feature);
189 * fail if we detect feature enclosure - of the new feature by
190 * the one preceding it, or of the next feature by the new one
192 if (insertPosition > 0)
194 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
199 if (insertPosition < nonNestedFeatures.size())
201 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
208 * checks passed - add or append the feature
210 if (insertPosition == nonNestedFeatures.size())
212 nonNestedFeatures.add(feature);
216 nonNestedFeatures.add(insertPosition, feature);
223 * Answers true if range1 properly encloses range2, else false
229 protected boolean encloses(ContiguousI range1, ContiguousI range2)
231 int begin1 = range1.getBegin();
232 int begin2 = range2.getBegin();
233 int end1 = range1.getEnd();
234 int end2 = range2.getEnd();
235 if (begin1 == begin2 && end1 > end2)
239 if (begin1 < begin2 && end1 >= end2)
247 * Answers the index of the first element in the given list which follows or
248 * matches the given feature in the sort order. If no such element, answers
249 * the length of the list.
256 protected int binarySearchForAdd(List<SequenceFeature> list, SequenceFeature feature)
258 // TODO binary search!
260 while (i < list.size())
262 if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0)
272 * Add a contact feature to the lists that hold them ordered by start (first
273 * contact) and by end (second contact) position, ensuring the lists remain
274 * ordered, and returns true. If the contact feature lists already contain the
275 * given feature (by test for equality), does not add it and returns false.
280 protected synchronized boolean addContactFeature(SequenceFeature feature)
282 if (contactFeatureStarts == null)
284 contactFeatureStarts = new ArrayList<SequenceFeature>();
286 if (contactFeatureEnds == null)
288 contactFeatureEnds = new ArrayList<SequenceFeature>();
291 // TODO binary search for insertion points!
292 if (contactFeatureStarts.contains(feature))
297 contactFeatureStarts.add(feature);
298 Collections.sort(contactFeatureStarts, startOrdering);
299 contactFeatureEnds.add(feature);
300 Collections.sort(contactFeatureEnds, endOrdering);
306 * Returns a (possibly empty) list of entries whose range overlaps the given
307 * range. The returned list is not ordered. Contact features are included if
308 * either of the contact points lies within the range.
311 * start position of overlap range (inclusive)
313 * end position of overlap range (inclusive)
316 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
318 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
320 findNonNestedFeatures(start, end, result);
322 findContactFeatures(start, end, result);
324 if (nestedFeatures != null)
326 result.addAll(nestedFeatures.findOverlaps(start, end));
333 * Adds contact features to the result list where either the second or the
334 * first contact position lies within the target range.
340 protected void findContactFeatures(long from, long to,
341 List<SequenceFeature> result)
343 if (contactFeatureStarts != null)
345 findContactStartFeatures(from, to, result);
347 if (contactFeatureEnds != null)
349 findContactEndFeatures(from, to, result);
358 protected void findContactEndFeatures(long from, long to,
359 List<SequenceFeature> result)
361 // TODO binary search for startPosition
362 for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++)
364 SequenceFeature sf = contactFeatureEnds.get(startPosition);
365 if (!sf.isContactFeature())
367 System.err.println("Error! non-contact feature type "
368 + sf.getType() + " in contact features list");
371 int begin = sf.getBegin();
372 if (begin >= from && begin <= to)
375 * this feature's first contact position lies in the search range
376 * so we don't include it in results a second time
380 int end = sf.getEnd();
381 if (end >= from && end <= to)
389 * Returns the index of the first contact feature found whose end (second
390 * contact position) is not before the given start position. If no such
391 * feature is found, returns the length of the contact features list.
396 protected int contactsBinarySearch(long start)
398 // TODO binary search!!
400 while (i < contactFeatureEnds.size())
402 if (contactFeatureEnds.get(i).getEnd() >= start)
413 * Adds features to the result list that are at a single position which lies
414 * within the target range. Non-positional features (start=end=0) and contact
415 * features are excluded.
421 protected void findNonNestedFeatures(long from, long to,
422 List<SequenceFeature> result)
424 int startIndex = binarySearch(nonNestedFeatures, from);
425 findNonNestedFeatures(startIndex, from, to, result);
429 * Scans the list of non-nested features, starting from startIndex, to find
430 * those that overlap the from-to range, and adds them to the result list.
431 * Returns the index of the first feature whose start position is after the
432 * target range (or the length of the whole list if none such feature exists).
440 protected int findNonNestedFeatures(final int startIndex, long from,
442 List<SequenceFeature> result)
445 while (i < nonNestedFeatures.size())
447 SequenceFeature sf = nonNestedFeatures.get(i);
448 if (sf.getBegin() > to)
452 int start = sf.getBegin();
453 int end = sf.getEnd();
454 if (start <= to && end >= from)
464 * Performs a binary search of the (sorted) list to find the index of the
465 * first entry whose end position is not less than the target position (i.e.
466 * skip all features that properly precede the given position)
472 protected int binarySearch(List<SequenceFeature> features, long target)
474 int width = features.size() / 2;
478 int end = features.get(lastpos).getEnd();
489 // todo correct binary search
490 return lastpos > 1 ? lastpos - 2 : 0;
495 * Adds contact features whose start position lies in the from-to range to the
502 protected void findContactStartFeatures(long from, long to,
503 List<SequenceFeature> result)
505 // TODO binary search for startPosition
506 for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++)
508 SequenceFeature sf = contactFeatureStarts.get(startPosition);
509 if (!sf.isContactFeature())
511 System.err.println("Error! non-contact feature type "
512 + sf.getType() + " in contact features list");
515 int begin = sf.getBegin();
516 if (begin >= from && begin <= to)
524 * Answers a list of all positional features stored, in no guaranteed order
528 public List<SequenceFeature> getPositionalFeatures()
531 * add non-nested features (may be all features for many cases)
533 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
534 result.addAll(nonNestedFeatures);
537 * add any contact features - from the list by start position
539 if (contactFeatureStarts != null)
541 result.addAll(contactFeatureStarts);
545 * add any nested features
547 if (nestedFeatures != null)
549 result.addAll(nestedFeatures.getEntries());
556 * Answers a list of all contact features. If there are none, returns an
557 * immutable empty list.
561 public List<SequenceFeature> getContactFeatures()
563 if (contactFeatureStarts == null)
565 return Collections.emptyList();
567 return new ArrayList<SequenceFeature>(contactFeatureStarts);
571 * Answers a list of all non-positional features. If there are none, returns
572 * an immutable empty list.
576 public List<SequenceFeature> getNonPositionalFeatures()
578 if (nonPositionalFeatures == null)
580 return Collections.emptyList();
582 return new ArrayList<SequenceFeature>(nonPositionalFeatures);
586 * Deletes the given feature from the store, returning true if it was found
587 * (and deleted), else false. This method makes no assumption that the feature
588 * is in the 'expected' place in the store, in case it has been modified since
593 public synchronized boolean delete(SequenceFeature sf)
596 * try the non-nested positional features first
598 boolean removed = nonNestedFeatures.remove(sf);
601 * if not found, try contact positions (and if found, delete
602 * from both lists of contact positions)
604 if (!removed && contactFeatureStarts != null)
606 removed = contactFeatureStarts.remove(sf);
609 contactFeatureEnds.remove(sf);
613 boolean removedNonPositional = false;
616 * if not found, try non-positional features
618 if (!removed && nonPositionalFeatures != null)
620 removedNonPositional = nonPositionalFeatures.remove(sf);
621 removed = removedNonPositional;
625 * if not found, try nested features
627 if (!removed && nestedFeatures != null)
629 removed = nestedFeatures.delete(sf);
634 rebuildFeatureGroups(sf.getFeatureGroup(), removedNonPositional);
641 * Check whether the given feature group is still represented, in either
642 * positional or non-positional features, and if not, remove it from the set
645 * @param featureGroup
646 * @param nonPositional
648 protected void rebuildFeatureGroups(String featureGroup,
649 boolean nonPositional)
651 if (nonPositional && nonPositionalFeatures != null)
653 boolean found = false;
654 for (SequenceFeature sf : nonPositionalFeatures)
656 String group = sf.getFeatureGroup();
657 if (featureGroup == group
658 || (featureGroup != null && featureGroup.equals(group)))
666 nonPositionalFeatureGroups.remove(featureGroup);
669 else if (!findFeatureGroup(featureGroup))
671 positionalFeatureGroups.remove(featureGroup);
676 * Scans all positional features to check whether the given feature group is
677 * found, and returns true if found, else false
679 * @param featureGroup
682 protected boolean findFeatureGroup(String featureGroup)
684 for (SequenceFeature sf : getPositionalFeatures())
686 String group = sf.getFeatureGroup();
687 if (group == featureGroup
688 || (group != null && group.equals(featureGroup)))
697 * Answers true if this store has no features, else false
701 public boolean isEmpty()
703 boolean hasFeatures = !nonNestedFeatures.isEmpty()
704 || (contactFeatureStarts != null && !contactFeatureStarts
706 || (nonPositionalFeatures != null && !nonPositionalFeatures
708 || (nestedFeatures != null && nestedFeatures.size() > 0);
714 * Answers the set of distinct feature groups stored, possibly including null,
715 * as an unmodifiable view of the set. The parameter determines whether the
716 * groups for positional or for non-positional features are returned.
718 * @param positionalFeatures
721 public Set<String> getFeatureGroups(boolean positionalFeatures)
723 if (positionalFeatures)
725 return Collections.unmodifiableSet(positionalFeatureGroups);
729 return nonPositionalFeatureGroups == null ? Collections
730 .<String> emptySet() : Collections
731 .unmodifiableSet(nonPositionalFeatureGroups);