1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashSet;
13 * A data store for a set of sequence features that supports efficient lookup of
14 * features overlapping a given range. Intended for (but not limited to) storage
15 * of features for one sequence and feature type.
20 public class FeatureStore
23 * a class providing criteria for performing a binary search of a list
25 abstract static class SearchCriterion
28 * Answers true if the entry passes the search criterion test
33 abstract boolean compare(SequenceFeature entry);
35 static SearchCriterion byStart(final long target)
37 return new SearchCriterion() {
40 boolean compare(SequenceFeature entry)
42 return entry.getBegin() >= target;
47 static SearchCriterion byEnd(final long target)
49 return new SearchCriterion()
53 boolean compare(SequenceFeature entry)
55 return entry.getEnd() >= target;
60 static SearchCriterion byFeature(final ContiguousI to,
61 final Comparator<ContiguousI> rc)
63 return new SearchCriterion()
67 boolean compare(SequenceFeature entry)
69 return rc.compare(entry, to) >= 0;
76 * Non-positional features have no (zero) start/end position.
77 * Kept as a separate list in case this criterion changes in future.
79 List<SequenceFeature> nonPositionalFeatures;
82 * An ordered list of features, with the promise that no feature in the list
83 * properly contains any other. This constraint allows bounded linear search
84 * of the list for features overlapping a region.
85 * Contact features are not included in this list.
87 List<SequenceFeature> nonNestedFeatures;
90 * contact features ordered by first contact position
92 List<SequenceFeature> contactFeatureStarts;
95 * contact features ordered by second contact position
97 List<SequenceFeature> contactFeatureEnds;
100 * Nested Containment List is used to hold any features that are nested
101 * within (properly contained by) any other feature. This is a recursive tree
102 * which supports depth-first scan for features overlapping a range.
103 * It is used here as a 'catch-all' fallback for features that cannot be put
104 * into a simple ordered list without invalidating the search methods.
106 NCList<SequenceFeature> nestedFeatures;
109 * Feature groups represented in stored positional features
110 * (possibly including null)
112 Set<String> positionalFeatureGroups;
115 * Feature groups represented in stored non-positional features
116 * (possibly including null)
118 Set<String> nonPositionalFeatureGroups;
121 * the total length of all positional features; contact features count 1 to
122 * the total and 1 to size(), consistent with an average 'feature length' of 1
126 float positionalMinScore;
128 float positionalMaxScore;
130 float nonPositionalMinScore;
132 float nonPositionalMaxScore;
137 public FeatureStore()
139 nonNestedFeatures = new ArrayList<SequenceFeature>();
140 positionalFeatureGroups = new HashSet<String>();
141 nonPositionalFeatureGroups = new HashSet<String>();
142 positionalMinScore = Float.NaN;
143 positionalMaxScore = Float.NaN;
144 nonPositionalMinScore = Float.NaN;
145 nonPositionalMaxScore = Float.NaN;
147 // we only construct nonPositionalFeatures, contactFeatures
148 // or the NCList if we need to
152 * Adds one sequence feature to the store, and returns true, unless the
153 * feature is already contained in the store, in which case this method
154 * returns false. Containment is determined by SequenceFeature.equals()
159 public boolean addFeature(SequenceFeature feature)
162 * keep a record of feature groups
164 if (!feature.isNonPositional())
166 positionalFeatureGroups.add(feature.getFeatureGroup());
169 boolean added = false;
171 if (feature.isContactFeature())
173 added = addContactFeature(feature);
175 else if (feature.isNonPositional())
177 added = addNonPositionalFeature(feature);
181 if (!contains(nonNestedFeatures, feature))
183 added = addNonNestedFeature(feature);
187 * detected a nested feature - put it in the NCList structure
189 added = addNestedFeature(feature);
197 * record the total extent of positional features, to make
198 * getTotalFeatureLength possible; we count the length of a
199 * contact feature as 1
201 totalExtent += getFeatureLength(feature);
204 * record the minimum and maximum score for positional
205 * and non-positional features
207 float score = feature.getScore();
208 if (!Float.isNaN(score))
210 if (feature.isNonPositional())
212 nonPositionalMinScore = min(nonPositionalMinScore, score);
213 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
217 positionalMinScore = min(positionalMinScore, score);
218 positionalMaxScore = max(positionalMaxScore, score);
227 * Answers the 'length' of the feature, counting 0 for non-positional features
228 * and 1 for contact features
233 protected static int getFeatureLength(SequenceFeature feature)
235 if (feature.isNonPositional())
239 if (feature.isContactFeature())
243 return 1 + feature.getEnd() - feature.getBegin();
247 * Adds the feature to the list of non-positional features (with lazy
248 * instantiation of the list if it is null), and returns true. If the
249 * non-positional features already include the new feature (by equality test),
250 * then it is not added, and this method returns false. The feature group is
251 * added to the set of distinct feature groups for non-positional features.
255 protected boolean addNonPositionalFeature(SequenceFeature feature)
257 if (nonPositionalFeatures == null)
259 nonPositionalFeatures = new ArrayList<SequenceFeature>();
261 if (nonPositionalFeatures.contains(feature))
266 nonPositionalFeatures.add(feature);
268 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
274 * Adds one feature to the NCList that can manage nested features (creating
275 * the NCList if necessary), and returns true. If the feature is already
276 * stored in the NCList (by equality test), then it is not added, and this
277 * method returns false.
279 protected synchronized boolean addNestedFeature(SequenceFeature feature)
281 if (nestedFeatures == null)
283 nestedFeatures = new NCList<SequenceFeature>(feature);
286 return nestedFeatures.add(feature, false);
290 * Add a feature to the list of non-nested features, maintaining the ordering
291 * of the list. A check is made for whether the feature is nested in (properly
292 * contained by) an existing feature. If there is no nesting, the feature is
293 * added to the list and the method returns true. If nesting is found, the
294 * feature is not added and the method returns false.
299 protected boolean addNonNestedFeature(SequenceFeature feature)
301 synchronized (nonNestedFeatures)
304 * find the first stored feature which doesn't precede the new one
306 int insertPosition = binarySearch(nonNestedFeatures,
307 SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION));
310 * fail if we detect feature enclosure - of the new feature by
311 * the one preceding it, or of the next feature by the new one
313 if (insertPosition > 0)
315 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
320 if (insertPosition < nonNestedFeatures.size())
322 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
329 * checks passed - add the feature
331 nonNestedFeatures.add(insertPosition, feature);
338 * Answers true if range1 properly encloses range2, else false
344 protected boolean encloses(ContiguousI range1, ContiguousI range2)
346 int begin1 = range1.getBegin();
347 int begin2 = range2.getBegin();
348 int end1 = range1.getEnd();
349 int end2 = range2.getEnd();
350 if (begin1 == begin2 && end1 > end2)
354 if (begin1 < begin2 && end1 >= end2)
362 * Add a contact feature to the lists that hold them ordered by start (first
363 * contact) and by end (second contact) position, ensuring the lists remain
364 * ordered, and returns true. If the contact feature lists already contain the
365 * given feature (by test for equality), does not add it and returns false.
370 protected synchronized boolean addContactFeature(SequenceFeature feature)
372 if (contactFeatureStarts == null)
374 contactFeatureStarts = new ArrayList<SequenceFeature>();
376 if (contactFeatureEnds == null)
378 contactFeatureEnds = new ArrayList<SequenceFeature>();
381 if (contains(contactFeatureStarts, feature))
387 * binary search the sorted list to find the insertion point
389 int insertPosition = binarySearch(contactFeatureStarts,
390 SearchCriterion.byFeature(feature,
391 RangeComparator.BY_START_POSITION));
392 contactFeatureStarts.add(insertPosition, feature);
393 // and resort to mak siccar...just in case insertion point not quite right
394 Collections.sort(contactFeatureStarts, RangeComparator.BY_START_POSITION);
396 insertPosition = binarySearch(contactFeatureStarts,
397 SearchCriterion.byFeature(feature,
398 RangeComparator.BY_END_POSITION));
399 contactFeatureEnds.add(feature);
400 Collections.sort(contactFeatureEnds, RangeComparator.BY_END_POSITION);
406 * Answers true if the list contains the feature, else false. This method is
407 * optimised for the condition that the list is sorted on feature start
408 * position ascending, and will give unreliable results if this does not hold.
414 protected static boolean contains(List<SequenceFeature> features,
415 SequenceFeature feature)
417 if (features == null || feature == null)
423 * locate the first entry in the list which does not precede the feature
425 int pos = binarySearch(features,
426 SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION));
427 int len = features.size();
430 SequenceFeature sf = features.get(pos);
431 if (sf.getBegin() > feature.getBegin())
433 return false; // no match found
435 if (sf.equals(feature))
445 * Returns a (possibly empty) list of features whose extent overlaps the given
446 * range. The returned list is not ordered. Contact features are included if
447 * either of the contact points lies within the range.
450 * start position of overlap range (inclusive)
452 * end position of overlap range (inclusive)
455 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
457 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
459 findNonNestedFeatures(start, end, result);
461 findContactFeatures(start, end, result);
463 if (nestedFeatures != null)
465 result.addAll(nestedFeatures.findOverlaps(start, end));
472 * Adds contact features to the result list where either the second or the
473 * first contact position lies within the target range
479 protected void findContactFeatures(long from, long to,
480 List<SequenceFeature> result)
482 if (contactFeatureStarts != null)
484 findContactStartFeatures(from, to, result);
486 if (contactFeatureEnds != null)
488 findContactEndFeatures(from, to, result);
493 * Adds to the result list any contact features whose end (second contact
494 * point), but not start (first contact point), lies in the query from-to
501 protected void findContactEndFeatures(long from, long to,
502 List<SequenceFeature> result)
505 * find the first contact feature (if any) that does not lie
506 * entirely before the target range
508 int startPosition = binarySearch(contactFeatureEnds,
509 SearchCriterion.byEnd(from));
510 for (; startPosition < contactFeatureEnds.size(); startPosition++)
512 SequenceFeature sf = contactFeatureEnds.get(startPosition);
513 if (!sf.isContactFeature())
515 System.err.println("Error! non-contact feature type "
516 + sf.getType() + " in contact features list");
520 int begin = sf.getBegin();
521 if (begin >= from && begin <= to)
524 * this feature's first contact position lies in the search range
525 * so we don't include it in results a second time
530 int end = sf.getEnd();
531 if (end >= from && end <= to)
543 * Adds non-nested features to the result list that lie within the target
544 * range. Non-positional features (start=end=0), contact features and nested
545 * features are excluded.
551 protected void findNonNestedFeatures(long from, long to,
552 List<SequenceFeature> result)
554 int startIndex = binarySearch(nonNestedFeatures,
555 SearchCriterion.byEnd(from));
557 findNonNestedFeatures(startIndex, from, to, result);
561 * Scans the list of non-nested features, starting from startIndex, to find
562 * those that overlap the from-to range, and adds them to the result list.
563 * Returns the index of the first feature whose start position is after the
564 * target range (or the length of the whole list if none such feature exists).
572 protected int findNonNestedFeatures(final int startIndex, long from,
573 long to, List<SequenceFeature> result)
576 while (i < nonNestedFeatures.size())
578 SequenceFeature sf = nonNestedFeatures.get(i);
579 if (sf.getBegin() > to)
583 int start = sf.getBegin();
584 int end = sf.getEnd();
585 if (start <= to && end >= from)
595 * Adds contact features whose start position lies in the from-to range to the
602 protected void findContactStartFeatures(long from, long to,
603 List<SequenceFeature> result)
605 int startPosition = binarySearch(contactFeatureStarts,
606 SearchCriterion.byStart(from));
608 for (; startPosition < contactFeatureStarts.size(); startPosition++)
610 SequenceFeature sf = contactFeatureStarts.get(startPosition);
611 if (!sf.isContactFeature())
613 System.err.println("Error! non-contact feature type "
614 + sf.getType() + " in contact features list");
617 int begin = sf.getBegin();
618 if (begin >= from && begin <= to)
626 * Answers a list of all positional features stored, in no guaranteed order
630 public List<SequenceFeature> getPositionalFeatures()
633 * add non-nested features (may be all features for many cases)
635 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
636 result.addAll(nonNestedFeatures);
639 * add any contact features - from the list by start position
641 if (contactFeatureStarts != null)
643 result.addAll(contactFeatureStarts);
647 * add any nested features
649 if (nestedFeatures != null)
651 result.addAll(nestedFeatures.getEntries());
658 * Answers a list of all contact features. If there are none, returns an
659 * immutable empty list.
663 public List<SequenceFeature> getContactFeatures()
665 if (contactFeatureStarts == null)
667 return Collections.emptyList();
669 return new ArrayList<SequenceFeature>(contactFeatureStarts);
673 * Answers a list of all non-positional features. If there are none, returns
674 * an immutable empty list.
678 public List<SequenceFeature> getNonPositionalFeatures()
680 if (nonPositionalFeatures == null)
682 return Collections.emptyList();
684 return new ArrayList<SequenceFeature>(nonPositionalFeatures);
688 * Deletes the given feature from the store, returning true if it was found
689 * (and deleted), else false. This method makes no assumption that the feature
690 * is in the 'expected' place in the store, in case it has been modified since
695 public synchronized boolean delete(SequenceFeature sf)
698 * try the non-nested positional features first
700 boolean removed = nonNestedFeatures.remove(sf);
703 * if not found, try contact positions (and if found, delete
704 * from both lists of contact positions)
706 if (!removed && contactFeatureStarts != null)
708 removed = contactFeatureStarts.remove(sf);
711 contactFeatureEnds.remove(sf);
715 boolean removedNonPositional = false;
718 * if not found, try non-positional features
720 if (!removed && nonPositionalFeatures != null)
722 removedNonPositional = nonPositionalFeatures.remove(sf);
723 removed = removedNonPositional;
727 * if not found, try nested features
729 if (!removed && nestedFeatures != null)
731 removed = nestedFeatures.delete(sf);
743 * Rescan all features to recompute any cached values after an entry has been
744 * deleted. This is expected to be an infrequent event, so performance here is
747 protected synchronized void rescanAfterDelete()
749 positionalFeatureGroups.clear();
750 nonPositionalFeatureGroups.clear();
752 positionalMinScore = Float.NaN;
753 positionalMaxScore = Float.NaN;
754 nonPositionalMinScore = Float.NaN;
755 nonPositionalMaxScore = Float.NaN;
758 * scan non-positional features for groups and scores
760 for (SequenceFeature sf : getNonPositionalFeatures())
762 nonPositionalFeatureGroups.add(sf.getFeatureGroup());
763 float score = sf.getScore();
764 nonPositionalMinScore = min(nonPositionalMinScore, score);
765 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
769 * scan positional features for groups, scores and extents
771 for (SequenceFeature sf : getPositionalFeatures())
773 positionalFeatureGroups.add(sf.getFeatureGroup());
774 float score = sf.getScore();
775 positionalMinScore = min(positionalMinScore, score);
776 positionalMaxScore = max(positionalMaxScore, score);
777 totalExtent += getFeatureLength(sf);
782 * A helper method to return the minimum of two floats, where a non-NaN value
783 * is treated as 'less than' a NaN value (unlike Math.min which does the
789 protected static float min(float f1, float f2)
793 return Float.isNaN(f2) ? f1 : f2;
797 return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
802 * A helper method to return the maximum of two floats, where a non-NaN value
803 * is treated as 'greater than' a NaN value (unlike Math.max which does the
809 protected static float max(float f1, float f2)
813 return Float.isNaN(f2) ? f1 : f2;
817 return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
822 * Scans all positional features to check whether the given feature group is
823 * found, and returns true if found, else false
825 * @param featureGroup
828 protected boolean findFeatureGroup(String featureGroup)
830 for (SequenceFeature sf : getPositionalFeatures())
832 String group = sf.getFeatureGroup();
833 if (group == featureGroup
834 || (group != null && group.equals(featureGroup)))
843 * Answers true if this store has no features, else false
847 public boolean isEmpty()
849 boolean hasFeatures = !nonNestedFeatures.isEmpty()
850 || (contactFeatureStarts != null && !contactFeatureStarts
852 || (nonPositionalFeatures != null && !nonPositionalFeatures
854 || (nestedFeatures != null && nestedFeatures.size() > 0);
860 * Answers the set of distinct feature groups stored, possibly including null,
861 * as an unmodifiable view of the set. The parameter determines whether the
862 * groups for positional or for non-positional features are returned.
864 * @param positionalFeatures
867 public Set<String> getFeatureGroups(boolean positionalFeatures)
869 if (positionalFeatures)
871 return Collections.unmodifiableSet(positionalFeatureGroups);
875 return nonPositionalFeatureGroups == null ? Collections
876 .<String> emptySet() : Collections
877 .unmodifiableSet(nonPositionalFeatureGroups);
882 * Performs a binary search of the (sorted) list to find the index of the
883 * first entry which returns true for the given comparator function. Returns
884 * the length of the list if there is no such entry.
890 protected static int binarySearch(List<SequenceFeature> features,
894 int end = features.size() - 1;
895 int matched = features.size();
899 int mid = (start + end) / 2;
900 SequenceFeature entry = features.get(mid);
901 boolean compare = sc.compare(entry);
917 * Answers the number of positional (or non-positional) features stored.
918 * Contact features count as 1.
923 public int getFeatureCount(boolean positional)
927 return nonPositionalFeatures == null ? 0 : nonPositionalFeatures
931 int size = nonNestedFeatures.size();
933 if (contactFeatureStarts != null)
935 // note a contact feature (start/end) counts as one
936 size += contactFeatureStarts.size();
939 if (nestedFeatures != null)
941 size += nestedFeatures.size();
948 * Answers the total length of positional features (or zero if there are
949 * none). Contact features contribute a value of 1 to the total.
953 public int getTotalFeatureLength()
959 * Answers the minimum score held for positional or non-positional features.
960 * This may be Float.NaN if there are no features, are none has a non-NaN
966 public float getMinimumScore(boolean positional)
968 return positional ? positionalMinScore : nonPositionalMinScore;
972 * Answers the maximum score held for positional or non-positional features.
973 * This may be Float.NaN if there are no features, are none has a non-NaN
979 public float getMaximumScore(boolean positional)
981 return positional ? positionalMaxScore : nonPositionalMaxScore;