1 package jalview.datamodel.features;
3 import jalview.datamodel.SequenceFeature;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashSet;
13 * A data store for a set of sequence features that supports efficient lookup of
14 * features overlapping a given range. Intended for (but not limited to) storage
15 * of features for one sequence and feature type.
20 public class FeatureStore
23 * a class providing criteria for performing a binary search of a list
25 abstract static class SearchCriterion
28 * Answers true if the entry passes the search criterion test
33 abstract boolean compare(SequenceFeature entry);
35 static SearchCriterion byStart(final long target)
37 return new SearchCriterion() {
40 boolean compare(SequenceFeature entry)
42 return entry.getBegin() >= target;
47 static SearchCriterion byEnd(final long target)
49 return new SearchCriterion()
53 boolean compare(SequenceFeature entry)
55 return entry.getEnd() >= target;
60 static SearchCriterion byFeature(final ContiguousI to,
61 final Comparator<ContiguousI> rc)
63 return new SearchCriterion()
67 boolean compare(SequenceFeature entry)
69 return rc.compare(entry, to) >= 0;
75 Comparator<ContiguousI> startOrdering = new RangeComparator(true);
77 Comparator<ContiguousI> endOrdering = new RangeComparator(false);
80 * Non-positional features have no (zero) start/end position.
81 * Kept as a separate list in case this criterion changes in future.
83 List<SequenceFeature> nonPositionalFeatures;
86 * An ordered list of features, with the promise that no feature in the list
87 * properly contains any other. This constraint allows bounded linear search
88 * of the list for features overlapping a region.
89 * Contact features are not included in this list.
91 List<SequenceFeature> nonNestedFeatures;
94 * contact features ordered by first contact position
96 List<SequenceFeature> contactFeatureStarts;
99 * contact features ordered by second contact position
101 List<SequenceFeature> contactFeatureEnds;
104 * Nested Containment List is used to hold any features that are nested
105 * within (properly contained by) any other feature. This is a recursive tree
106 * which supports depth-first scan for features overlapping a range.
107 * It is used here as a 'catch-all' fallback for features that cannot be put
108 * into a simple ordered list without invalidating the search methods.
110 NCList<SequenceFeature> nestedFeatures;
113 * Feature groups represented in stored positional features
114 * (possibly including null)
116 Set<String> positionalFeatureGroups;
119 * Feature groups represented in stored non-positional features
120 * (possibly including null)
122 Set<String> nonPositionalFeatureGroups;
125 * the total length of all positional features; contact features count 1 to
126 * the total and 1 to size(), consistent with an average 'feature length' of 1
130 float positionalMinScore;
132 float positionalMaxScore;
134 float nonPositionalMinScore;
136 float nonPositionalMaxScore;
141 public FeatureStore()
143 nonNestedFeatures = new ArrayList<SequenceFeature>();
144 positionalFeatureGroups = new HashSet<String>();
145 nonPositionalFeatureGroups = new HashSet<String>();
146 positionalMinScore = Float.NaN;
147 positionalMaxScore = Float.NaN;
148 nonPositionalMinScore = Float.NaN;
149 nonPositionalMaxScore = Float.NaN;
151 // we only construct nonPositionalFeatures, contactFeatures
152 // or the NCList if we need to
156 * Adds one sequence feature to the store, and returns true, unless the
157 * feature is already contained in the store, in which case this method
158 * returns false. Containment is determined by SequenceFeature.equals()
163 public boolean addFeature(SequenceFeature feature)
166 * keep a record of feature groups
168 if (!feature.isNonPositional())
170 positionalFeatureGroups.add(feature.getFeatureGroup());
173 boolean added = false;
175 if (feature.isContactFeature())
177 added = addContactFeature(feature);
179 else if (feature.isNonPositional())
181 added = addNonPositionalFeature(feature);
185 if (!nonNestedFeatures.contains(feature))
187 added = addNonNestedFeature(feature);
191 * detected a nested feature - put it in the NCList structure
193 added = addNestedFeature(feature);
201 * record the total extent of positional features, to make
202 * getTotalFeatureLength possible; we count the length of a
203 * contact feature as 1
205 totalExtent += getFeatureLength(feature);
208 * record the minimum and maximum score for positional
209 * and non-positional features
211 float score = feature.getScore();
212 if (!Float.isNaN(score))
214 if (feature.isNonPositional())
216 nonPositionalMinScore = min(nonPositionalMinScore, score);
217 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
221 positionalMinScore = min(positionalMinScore, score);
222 positionalMaxScore = max(positionalMaxScore, score);
231 * Answers the 'length' of the feature, counting 0 for non-positional features
232 * and 1 for contact features
237 protected static int getFeatureLength(SequenceFeature feature)
239 if (feature.isNonPositional())
243 if (feature.isContactFeature())
247 return 1 + feature.getEnd() - feature.getBegin();
251 * Adds the feature to the list of non-positional features (with lazy
252 * instantiation of the list if it is null), and returns true. If the
253 * non-positional features already include the new feature (by equality test),
254 * then it is not added, and this method returns false. The feature group is
255 * added to the set of distinct feature groups for non-positional features.
259 protected boolean addNonPositionalFeature(SequenceFeature feature)
261 if (nonPositionalFeatures == null)
263 nonPositionalFeatures = new ArrayList<SequenceFeature>();
265 if (nonPositionalFeatures.contains(feature))
270 nonPositionalFeatures.add(feature);
272 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
278 * Adds one feature to the NCList that can manage nested features (creating
279 * the NCList if necessary), and returns true. If the feature is already
280 * stored in the NCList (by equality test), then it is not added, and this
281 * method returns false.
283 protected synchronized boolean addNestedFeature(SequenceFeature feature)
285 if (nestedFeatures == null)
287 nestedFeatures = new NCList<SequenceFeature>(feature);
290 return nestedFeatures.add(feature, false);
294 * Add a feature to the list of non-nested features, maintaining the ordering
295 * of the list. A check is made for whether the feature is nested in (properly
296 * contained by) an existing feature. If there is no nesting, the feature is
297 * added to the list and the method returns true. If nesting is found, the
298 * feature is not added and the method returns false.
303 protected boolean addNonNestedFeature(SequenceFeature feature)
305 synchronized (nonNestedFeatures)
308 * find the first stored feature which doesn't precede the new one
310 int insertPosition = binarySearch(nonNestedFeatures,
311 SearchCriterion.byFeature(feature, startOrdering));
314 * fail if we detect feature enclosure - of the new feature by
315 * the one preceding it, or of the next feature by the new one
317 if (insertPosition > 0)
319 if (encloses(nonNestedFeatures.get(insertPosition - 1), feature))
324 if (insertPosition < nonNestedFeatures.size())
326 if (encloses(feature, nonNestedFeatures.get(insertPosition)))
333 * checks passed - add the feature
335 nonNestedFeatures.add(insertPosition, feature);
342 * Answers true if range1 properly encloses range2, else false
348 protected boolean encloses(ContiguousI range1, ContiguousI range2)
350 int begin1 = range1.getBegin();
351 int begin2 = range2.getBegin();
352 int end1 = range1.getEnd();
353 int end2 = range2.getEnd();
354 if (begin1 == begin2 && end1 > end2)
358 if (begin1 < begin2 && end1 >= end2)
366 * Add a contact feature to the lists that hold them ordered by start (first
367 * contact) and by end (second contact) position, ensuring the lists remain
368 * ordered, and returns true. If the contact feature lists already contain the
369 * given feature (by test for equality), does not add it and returns false.
374 protected synchronized boolean addContactFeature(SequenceFeature feature)
376 if (contactFeatureStarts == null)
378 contactFeatureStarts = new ArrayList<SequenceFeature>();
380 if (contactFeatureEnds == null)
382 contactFeatureEnds = new ArrayList<SequenceFeature>();
385 // TODO binary search for insertion points!
386 if (contactFeatureStarts.contains(feature))
391 contactFeatureStarts.add(feature);
392 Collections.sort(contactFeatureStarts, startOrdering);
393 contactFeatureEnds.add(feature);
394 Collections.sort(contactFeatureEnds, endOrdering);
400 * Returns a (possibly empty) list of features whose extent overlaps the given
401 * range. The returned list is not ordered. Contact features are included if
402 * either of the contact points lies within the range.
405 * start position of overlap range (inclusive)
407 * end position of overlap range (inclusive)
410 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
412 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
414 findNonNestedFeatures(start, end, result);
416 findContactFeatures(start, end, result);
418 if (nestedFeatures != null)
420 result.addAll(nestedFeatures.findOverlaps(start, end));
427 * Adds contact features to the result list where either the second or the
428 * first contact position lies within the target range
434 protected void findContactFeatures(long from, long to,
435 List<SequenceFeature> result)
437 if (contactFeatureStarts != null)
439 findContactStartFeatures(from, to, result);
441 if (contactFeatureEnds != null)
443 findContactEndFeatures(from, to, result);
448 * Adds to the result list any contact features whose end (second contact
449 * point), but not start (first contact point), lies in the query from-to
456 protected void findContactEndFeatures(long from, long to,
457 List<SequenceFeature> result)
460 * find the first contact feature (if any) that does not lie
461 * entirely before the target range
463 int startPosition = binarySearch(contactFeatureEnds,
464 SearchCriterion.byEnd(from));
465 for (; startPosition < contactFeatureEnds.size(); startPosition++)
467 SequenceFeature sf = contactFeatureEnds.get(startPosition);
468 if (!sf.isContactFeature())
470 System.err.println("Error! non-contact feature type "
471 + sf.getType() + " in contact features list");
475 int begin = sf.getBegin();
476 if (begin >= from && begin <= to)
479 * this feature's first contact position lies in the search range
480 * so we don't include it in results a second time
485 int end = sf.getEnd();
486 if (end >= from && end <= to)
498 * Adds non-nested features to the result list that lie within the target
499 * range. Non-positional features (start=end=0), contact features and nested
500 * features are excluded.
506 protected void findNonNestedFeatures(long from, long to,
507 List<SequenceFeature> result)
509 int startIndex = binarySearch(nonNestedFeatures,
510 SearchCriterion.byEnd(from));
512 findNonNestedFeatures(startIndex, from, to, result);
516 * Scans the list of non-nested features, starting from startIndex, to find
517 * those that overlap the from-to range, and adds them to the result list.
518 * Returns the index of the first feature whose start position is after the
519 * target range (or the length of the whole list if none such feature exists).
527 protected int findNonNestedFeatures(final int startIndex, long from,
528 long to, List<SequenceFeature> result)
531 while (i < nonNestedFeatures.size())
533 SequenceFeature sf = nonNestedFeatures.get(i);
534 if (sf.getBegin() > to)
538 int start = sf.getBegin();
539 int end = sf.getEnd();
540 if (start <= to && end >= from)
550 * Adds contact features whose start position lies in the from-to range to the
557 protected void findContactStartFeatures(long from, long to,
558 List<SequenceFeature> result)
560 int startPosition = binarySearch(contactFeatureStarts,
561 SearchCriterion.byStart(from));
563 for (; startPosition < contactFeatureStarts.size(); startPosition++)
565 SequenceFeature sf = contactFeatureStarts.get(startPosition);
566 if (!sf.isContactFeature())
568 System.err.println("Error! non-contact feature type "
569 + sf.getType() + " in contact features list");
572 int begin = sf.getBegin();
573 if (begin >= from && begin <= to)
581 * Answers a list of all positional features stored, in no guaranteed order
585 public List<SequenceFeature> getPositionalFeatures()
588 * add non-nested features (may be all features for many cases)
590 List<SequenceFeature> result = new ArrayList<SequenceFeature>();
591 result.addAll(nonNestedFeatures);
594 * add any contact features - from the list by start position
596 if (contactFeatureStarts != null)
598 result.addAll(contactFeatureStarts);
602 * add any nested features
604 if (nestedFeatures != null)
606 result.addAll(nestedFeatures.getEntries());
613 * Answers a list of all contact features. If there are none, returns an
614 * immutable empty list.
618 public List<SequenceFeature> getContactFeatures()
620 if (contactFeatureStarts == null)
622 return Collections.emptyList();
624 return new ArrayList<SequenceFeature>(contactFeatureStarts);
628 * Answers a list of all non-positional features. If there are none, returns
629 * an immutable empty list.
633 public List<SequenceFeature> getNonPositionalFeatures()
635 if (nonPositionalFeatures == null)
637 return Collections.emptyList();
639 return new ArrayList<SequenceFeature>(nonPositionalFeatures);
643 * Deletes the given feature from the store, returning true if it was found
644 * (and deleted), else false. This method makes no assumption that the feature
645 * is in the 'expected' place in the store, in case it has been modified since
650 public synchronized boolean delete(SequenceFeature sf)
653 * try the non-nested positional features first
655 boolean removed = nonNestedFeatures.remove(sf);
658 * if not found, try contact positions (and if found, delete
659 * from both lists of contact positions)
661 if (!removed && contactFeatureStarts != null)
663 removed = contactFeatureStarts.remove(sf);
666 contactFeatureEnds.remove(sf);
670 boolean removedNonPositional = false;
673 * if not found, try non-positional features
675 if (!removed && nonPositionalFeatures != null)
677 removedNonPositional = nonPositionalFeatures.remove(sf);
678 removed = removedNonPositional;
682 * if not found, try nested features
684 if (!removed && nestedFeatures != null)
686 removed = nestedFeatures.delete(sf);
698 * Rescan all features to recompute any cached values after an entry has been
701 protected synchronized void rescanAfterDelete()
703 positionalFeatureGroups.clear();
704 nonPositionalFeatureGroups.clear();
706 positionalMinScore = Float.NaN;
707 positionalMaxScore = Float.NaN;
708 nonPositionalMinScore = Float.NaN;
709 nonPositionalMaxScore = Float.NaN;
712 * scan non-positional features for groups and scores
714 for (SequenceFeature sf : getNonPositionalFeatures())
716 nonPositionalFeatureGroups.add(sf.getFeatureGroup());
717 float score = sf.getScore();
718 nonPositionalMinScore = min(nonPositionalMinScore, score);
719 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
723 * scan positional features for groups, scores and extents
725 for (SequenceFeature sf : getPositionalFeatures())
727 positionalFeatureGroups.add(sf.getFeatureGroup());
728 float score = sf.getScore();
729 positionalMinScore = min(positionalMinScore, score);
730 positionalMaxScore = max(positionalMaxScore, score);
731 totalExtent += getFeatureLength(sf);
736 * A helper method to return the minimum of two floats, where a non-NaN value
737 * is treated as 'less than' a NaN value (unlike Math.min which does the
743 protected static float min(float f1, float f2)
747 return Float.isNaN(f2) ? f1 : f2;
751 return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
756 * A helper method to return the maximum of two floats, where a non-NaN value
757 * is treated as 'greater than' a NaN value (unlike Math.max which does the
763 protected static float max(float f1, float f2)
767 return Float.isNaN(f2) ? f1 : f2;
771 return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
776 * Scans all positional features to check whether the given feature group is
777 * found, and returns true if found, else false
779 * @param featureGroup
782 protected boolean findFeatureGroup(String featureGroup)
784 for (SequenceFeature sf : getPositionalFeatures())
786 String group = sf.getFeatureGroup();
787 if (group == featureGroup
788 || (group != null && group.equals(featureGroup)))
797 * Answers true if this store has no features, else false
801 public boolean isEmpty()
803 boolean hasFeatures = !nonNestedFeatures.isEmpty()
804 || (contactFeatureStarts != null && !contactFeatureStarts
806 || (nonPositionalFeatures != null && !nonPositionalFeatures
808 || (nestedFeatures != null && nestedFeatures.size() > 0);
814 * Answers the set of distinct feature groups stored, possibly including null,
815 * as an unmodifiable view of the set. The parameter determines whether the
816 * groups for positional or for non-positional features are returned.
818 * @param positionalFeatures
821 public Set<String> getFeatureGroups(boolean positionalFeatures)
823 if (positionalFeatures)
825 return Collections.unmodifiableSet(positionalFeatureGroups);
829 return nonPositionalFeatureGroups == null ? Collections
830 .<String> emptySet() : Collections
831 .unmodifiableSet(nonPositionalFeatureGroups);
836 * Performs a binary search of the (sorted) list to find the index of the
837 * first entry which returns true for the given comparator function. Returns
838 * the length of the list if there is no such entry.
844 protected int binarySearch(List<SequenceFeature> features,
848 int end = features.size() - 1;
849 int matched = features.size();
853 int mid = (start + end) / 2;
854 SequenceFeature entry = features.get(mid);
855 boolean compare = sc.compare(entry);
871 * Answers the number of positional (or non-positional) features stored.
872 * Contact features count as 1.
877 public int getFeatureCount(boolean positional)
881 return nonPositionalFeatures == null ? 0 : nonPositionalFeatures
885 int size = nonNestedFeatures.size();
887 if (contactFeatureStarts != null)
889 // note a contact feature (start/end) counts as one
890 size += contactFeatureStarts.size();
893 if (nestedFeatures != null)
895 size += nestedFeatures.size();
902 * Answers the total length of positional features (or zero if there are
903 * none). Contact features contribute a value of 1 to the total.
907 public int getTotalFeatureLength()
913 * Answers the minimum score held for positional or non-positional features.
914 * This may be Float.NaN if there are no features, are none has a non-NaN
920 public float getMinimumScore(boolean positional)
922 return positional ? positionalMinScore : nonPositionalMinScore;
926 * Answers the maximum score held for positional or non-positional features.
927 * This may be Float.NaN if there are no features, are none has a non-NaN
933 public float getMaximumScore(boolean positional)
935 return positional ? positionalMaxScore : nonPositionalMaxScore;