1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashSet;
13 * A data store for a set of sequence features that supports efficient lookup of
14 * features overlapping a given range. Intended for (but not limited to) storage
15 * of features for one sequence and feature type.
20 public class FeatureStore
23 * a class providing criteria for performing a binary search of a list
25 abstract static class SearchCriterion
28 * Answers true if the entry passes the search criterion test
33 abstract boolean compare(SequenceFeature entry);
35 static SearchCriterion byStart(final long target)
37 return new SearchCriterion() {
40 boolean compare(SequenceFeature entry)
42 return entry.getBegin() >= target;
47 static SearchCriterion byEnd(final long target)
49 return new SearchCriterion()
53 boolean compare(SequenceFeature entry)
55 return entry.getEnd() >= target;
60 static SearchCriterion byFeature(final ContiguousI to,
61 final Comparator<ContiguousI> rc)
63 return new SearchCriterion()
67 boolean compare(SequenceFeature entry)
69 return rc.compare(entry, to) >= 0;
75 Comparator<ContiguousI> startOrdering = new RangeComparator(true);
77 Comparator<ContiguousI> endOrdering = new RangeComparator(false);
80 * Non-positional features have no (zero) start/end position.
81 * Kept as a separate list in case this criterion changes in future.
83 List<SequenceFeature> nonPositionalFeatures;
86 * An ordered list of features, with the promise that no feature in the list
87 * properly contains any other. This constraint allows bounded linear search
88 * of the list for features overlapping a region.
89 * Contact features are not included in this list.
91 List<SequenceFeature> nonNestedFeatures;
94 * contact features ordered by first contact position
96 List<SequenceFeature> contactFeatureStarts;
99 * contact features ordered by second contact position
101 List<SequenceFeature> contactFeatureEnds;
104 * Nested Containment List is used to hold any features that are nested
105 * within (properly contained by) any other feature. This is a recursive tree
106 * which supports depth-first scan for features overlapping a range.
107 * It is used here as a 'catch-all' fallback for features that cannot be put
108 * into a simple ordered list without invalidating the search methods.
110 NCList<SequenceFeature> nestedFeatures;
113 * Feature groups represented in stored positional features
114 * (possibly including null)
116 Set<String> positionalFeatureGroups;
119 * Feature groups represented in stored non-positional features
120 * (possibly including null)
122 Set<String> nonPositionalFeatureGroups;
127 public FeatureStore()
129 nonNestedFeatures = new ArrayList<SequenceFeature>();
130 positionalFeatureGroups = new HashSet<String>();
132 // we only construct nonPositionalFeatures, contactFeatures
133 // or the NCList if we need to
137 * Adds one sequence feature to the store, and returns true, unless the
138 * feature is already contained in the store, in which case this method
139 * returns false. Containment is determined by SequenceFeature.equals()
144 public boolean addFeature(SequenceFeature feature)
147 * keep a record of feature groups
149 if (!feature.isNonPositional())
151 positionalFeatureGroups.add(feature.getFeatureGroup());
154 boolean added = false;
156 if (feature.isContactFeature())
158 added = addContactFeature(feature);
160 else if (feature.isNonPositional())
162 added = addNonPositionalFeature(feature);
166 if (!nonNestedFeatures.contains(feature))
168 added = addNonNestedFeature(feature);
172 * detected a nested feature - put it in the NCList structure
174 added = addNestedFeature(feature);
183 * Adds the feature to the list of non-positional features (with lazy
184 * instantiation of the list if it is null), and returns true. If the
185 * non-positional features already include the new feature (by equality test),
186 * then it is not added, and this method returns false. The feature group is
187 * added to the set of distinct feature groups for non-positional features.
191 protected boolean addNonPositionalFeature(SequenceFeature feature)
193 if (nonPositionalFeatures == null)
195 nonPositionalFeatures = new ArrayList<SequenceFeature>();
196 nonPositionalFeatureGroups = new HashSet<String>();
198 if (nonPositionalFeatures.contains(feature))
203 nonPositionalFeatures.add(feature);
205 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
211 * Adds one feature to the NCList that can manage nested features (creating
212 * the NCList if necessary), and returns true. If the feature is already
213 * stored in the NCList (by equality test), then it is not added, and this
214 * method returns false.
216 protected synchronized boolean addNestedFeature(SequenceFeature feature)
218 if (nestedFeatures == null)
220 nestedFeatures = new NCList<SequenceFeature>(feature);
223 return nestedFeatures.add(feature, false);
227 * Add a feature to the list of non-nested features, maintaining the ordering
228 * of the list. A check is made for whether the feature is nested in (properly
229 * contained by) an existing feature. If there is no nesting, the feature is
230 * added to the list and the method returns true. If nesting is found, the
231 * feature is not added and the method returns false.
236 protected boolean addNonNestedFeature(SequenceFeature feature)
238 synchronized (nonNestedFeatures)
241 * find the first stored feature which doesn't precede the new one
243 int insertPosition = binarySearch(nonNestedFeatures,
244 SearchCriterion.byFeature(feature, startOrdering));
247 * fail if we detect feature enclosure - of the new feature by
248 * the one preceding it, or of the next feature by the new one
250 if (insertPosition > 0)
252 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
257 if (insertPosition < nonNestedFeatures.size())
259 // FIXME may have to test more than one feature here
260 // e.g. add [40-60] to [20-42, 30-50, 45-55]
261 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
268 * checks passed - add the feature
270 nonNestedFeatures.add(insertPosition, feature);
277 * Answers true if range1 properly encloses range2, else false
283 protected boolean encloses(ContiguousI range1, ContiguousI range2)
285 int begin1 = range1.getBegin();
286 int begin2 = range2.getBegin();
287 int end1 = range1.getEnd();
288 int end2 = range2.getEnd();
289 if (begin1 == begin2 && end1 > end2)
293 if (begin1 < begin2 && end1 >= end2)
301 * Add a contact feature to the lists that hold them ordered by start (first
302 * contact) and by end (second contact) position, ensuring the lists remain
303 * ordered, and returns true. If the contact feature lists already contain the
304 * given feature (by test for equality), does not add it and returns false.
309 protected synchronized boolean addContactFeature(SequenceFeature feature)
311 if (contactFeatureStarts == null)
313 contactFeatureStarts = new ArrayList<SequenceFeature>();
315 if (contactFeatureEnds == null)
317 contactFeatureEnds = new ArrayList<SequenceFeature>();
320 // TODO binary search for insertion points!
321 if (contactFeatureStarts.contains(feature))
326 contactFeatureStarts.add(feature);
327 Collections.sort(contactFeatureStarts, startOrdering);
328 contactFeatureEnds.add(feature);
329 Collections.sort(contactFeatureEnds, endOrdering);
335 * Returns a (possibly empty) list of features whose extent overlaps the given
336 * range. The returned list is not ordered. Contact features are included if
337 * either of the contact points lies within the range.
340 * start position of overlap range (inclusive)
342 * end position of overlap range (inclusive)
345 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
347 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
349 findNonNestedFeatures(start, end, result);
351 findContactFeatures(start, end, result);
353 if (nestedFeatures != null)
355 result.addAll(nestedFeatures.findOverlaps(start, end));
362 * Adds contact features to the result list where either the second or the
363 * first contact position lies within the target range
369 protected void findContactFeatures(long from, long to,
370 List<SequenceFeature> result)
372 if (contactFeatureStarts != null)
374 findContactStartFeatures(from, to, result);
376 if (contactFeatureEnds != null)
378 findContactEndFeatures(from, to, result);
383 * Adds to the result list any contact features whose end (second contact
384 * point), but not start (first contact point), lies in the query from-to
391 protected void findContactEndFeatures(long from, long to,
392 List<SequenceFeature> result)
395 * find the first contact feature (if any) that does not lie
396 * entirely before the target range
398 int startPosition = binarySearch(contactFeatureEnds,
399 SearchCriterion.byEnd(from));
400 for (; startPosition < contactFeatureEnds.size(); startPosition++)
402 SequenceFeature sf = contactFeatureEnds.get(startPosition);
403 if (!sf.isContactFeature())
405 System.err.println("Error! non-contact feature type "
406 + sf.getType() + " in contact features list");
410 int begin = sf.getBegin();
411 if (begin >= from && begin <= to)
414 * this feature's first contact position lies in the search range
415 * so we don't include it in results a second time
420 int end = sf.getEnd();
421 if (end >= from && end <= to)
433 * Adds non-nested features to the result list that lie within the target
434 * range. Non-positional features (start=end=0), contact features and nested
435 * features are excluded.
441 protected void findNonNestedFeatures(long from, long to,
442 List<SequenceFeature> result)
444 int startIndex = binarySearch(nonNestedFeatures,
445 SearchCriterion.byEnd(from));
447 findNonNestedFeatures(startIndex, from, to, result);
451 * Scans the list of non-nested features, starting from startIndex, to find
452 * those that overlap the from-to range, and adds them to the result list.
453 * Returns the index of the first feature whose start position is after the
454 * target range (or the length of the whole list if none such feature exists).
462 protected int findNonNestedFeatures(final int startIndex, long from,
463 long to, List<SequenceFeature> result)
466 while (i < nonNestedFeatures.size())
468 SequenceFeature sf = nonNestedFeatures.get(i);
469 if (sf.getBegin() > to)
473 int start = sf.getBegin();
474 int end = sf.getEnd();
475 if (start <= to && end >= from)
485 * Adds contact features whose start position lies in the from-to range to the
492 protected void findContactStartFeatures(long from, long to,
493 List<SequenceFeature> result)
495 int startPosition = binarySearch(contactFeatureStarts,
496 SearchCriterion.byStart(from));
498 for (; startPosition < contactFeatureStarts.size(); startPosition++)
500 SequenceFeature sf = contactFeatureStarts.get(startPosition);
501 if (!sf.isContactFeature())
503 System.err.println("Error! non-contact feature type "
504 + sf.getType() + " in contact features list");
507 int begin = sf.getBegin();
508 if (begin >= from && begin <= to)
516 * Answers a list of all positional features stored, in no guaranteed order
520 public List<SequenceFeature> getPositionalFeatures()
523 * add non-nested features (may be all features for many cases)
525 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
526 result.addAll(nonNestedFeatures);
529 * add any contact features - from the list by start position
531 if (contactFeatureStarts != null)
533 result.addAll(contactFeatureStarts);
537 * add any nested features
539 if (nestedFeatures != null)
541 result.addAll(nestedFeatures.getEntries());
548 * Answers a list of all contact features. If there are none, returns an
549 * immutable empty list.
553 public List<SequenceFeature> getContactFeatures()
555 if (contactFeatureStarts == null)
557 return Collections.emptyList();
559 return new ArrayList<SequenceFeature>(contactFeatureStarts);
563 * Answers a list of all non-positional features. If there are none, returns
564 * an immutable empty list.
568 public List<SequenceFeature> getNonPositionalFeatures()
570 if (nonPositionalFeatures == null)
572 return Collections.emptyList();
574 return new ArrayList<SequenceFeature>(nonPositionalFeatures);
578 * Deletes the given feature from the store, returning true if it was found
579 * (and deleted), else false. This method makes no assumption that the feature
580 * is in the 'expected' place in the store, in case it has been modified since
585 public synchronized boolean delete(SequenceFeature sf)
588 * try the non-nested positional features first
590 boolean removed = nonNestedFeatures.remove(sf);
593 * if not found, try contact positions (and if found, delete
594 * from both lists of contact positions)
596 if (!removed && contactFeatureStarts != null)
598 removed = contactFeatureStarts.remove(sf);
601 contactFeatureEnds.remove(sf);
605 boolean removedNonPositional = false;
608 * if not found, try non-positional features
610 if (!removed && nonPositionalFeatures != null)
612 removedNonPositional = nonPositionalFeatures.remove(sf);
613 removed = removedNonPositional;
617 * if not found, try nested features
619 if (!removed && nestedFeatures != null)
621 removed = nestedFeatures.delete(sf);
626 rebuildFeatureGroups(sf.getFeatureGroup(), removedNonPositional);
633 * Check whether the given feature group is still represented, in either
634 * positional or non-positional features, and if not, remove it from the set
637 * @param featureGroup
638 * @param nonPositional
640 protected void rebuildFeatureGroups(String featureGroup,
641 boolean nonPositional)
643 if (nonPositional && nonPositionalFeatures != null)
645 boolean found = false;
646 for (SequenceFeature sf : nonPositionalFeatures)
648 String group = sf.getFeatureGroup();
649 if (featureGroup == group
650 || (featureGroup != null && featureGroup.equals(group)))
658 nonPositionalFeatureGroups.remove(featureGroup);
661 else if (!findFeatureGroup(featureGroup))
663 positionalFeatureGroups.remove(featureGroup);
668 * Scans all positional features to check whether the given feature group is
669 * found, and returns true if found, else false
671 * @param featureGroup
674 protected boolean findFeatureGroup(String featureGroup)
676 for (SequenceFeature sf : getPositionalFeatures())
678 String group = sf.getFeatureGroup();
679 if (group == featureGroup
680 || (group != null && group.equals(featureGroup)))
689 * Answers true if this store has no features, else false
693 public boolean isEmpty()
695 boolean hasFeatures = !nonNestedFeatures.isEmpty()
696 || (contactFeatureStarts != null && !contactFeatureStarts
698 || (nonPositionalFeatures != null && !nonPositionalFeatures
700 || (nestedFeatures != null && nestedFeatures.size() > 0);
706 * Answers the set of distinct feature groups stored, possibly including null,
707 * as an unmodifiable view of the set. The parameter determines whether the
708 * groups for positional or for non-positional features are returned.
710 * @param positionalFeatures
713 public Set<String> getFeatureGroups(boolean positionalFeatures)
715 if (positionalFeatures)
717 return Collections.unmodifiableSet(positionalFeatureGroups);
721 return nonPositionalFeatureGroups == null ? Collections
722 .<String> emptySet() : Collections
723 .unmodifiableSet(nonPositionalFeatureGroups);
728 * Performs a binary search of the (sorted) list to find the index of the
729 * first entry which returns true for the given comparator function. Returns
730 * the length of the list if there is no such entry.
736 protected int binarySearch(List<SequenceFeature> features,
740 int end = features.size() - 1;
741 int matched = features.size();
745 int mid = (start + end) / 2;
746 SequenceFeature entry = features.get(mid);
747 boolean compare = sc.compare(entry);