X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=84e77365e48e2daaa514c106344aaff3961a392b;hb=e6798fd04b1d7a35836a2e84deae5a94a35b88b9;hp=cb4bd6fe18d3ac0f42e1ec14fe928186b616dd65;hpb=fdea751663ec46a587cfdf45bfae9ec667043efb;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index cb4bd6f..84e7736 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -1,11 +1,14 @@ package jalview.datamodel.features; +import jalview.datamodel.ContiguousI; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * A data store for a set of sequence features that supports efficient lookup of @@ -17,12 +20,62 @@ import java.util.List; */ public class FeatureStore { - Comparator startOrdering = new RangeComparator(true); + /** + * a class providing criteria for performing a binary search of a list + */ + abstract static class SearchCriterion + { + /** + * Answers true if the entry passes the search criterion test + * + * @param entry + * @return + */ + abstract boolean compare(SequenceFeature entry); + + static SearchCriterion byStart(final long target) + { + return new SearchCriterion() { + + @Override + boolean compare(SequenceFeature entry) + { + return entry.getBegin() >= target; + } + }; + } + + static SearchCriterion byEnd(final long target) + { + return new SearchCriterion() + { + + @Override + boolean compare(SequenceFeature entry) + { + return entry.getEnd() >= target; + } + }; + } + + static SearchCriterion byFeature(final ContiguousI to, + final Comparator rc) + { + return new SearchCriterion() + { - Comparator endOrdering = new RangeComparator(false); + @Override + boolean compare(SequenceFeature entry) + { + return rc.compare(entry, to) >= 0; + } + }; + } + } /* * Non-positional features have no (zero) start/end position. + * Kept as a separate list in case this criterion changes in future. */ List nonPositionalFeatures; @@ -53,12 +106,45 @@ public class FeatureStore */ NCList nestedFeatures; + /* + * Feature groups represented in stored positional features + * (possibly including null) + */ + Set positionalFeatureGroups; + + /* + * Feature groups represented in stored non-positional features + * (possibly including null) + */ + Set nonPositionalFeatureGroups; + + /* + * the total length of all positional features; contact features count 1 to + * the total and 1 to size(), consistent with an average 'feature length' of 1 + */ + int totalExtent; + + float positionalMinScore; + + float positionalMaxScore; + + float nonPositionalMinScore; + + float nonPositionalMaxScore; + /** * Constructor */ public FeatureStore() { nonNestedFeatures = new ArrayList(); + positionalFeatureGroups = new HashSet(); + nonPositionalFeatureGroups = new HashSet(); + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; + // we only construct nonPositionalFeatures, contactFeatures // or the NCList if we need to } @@ -73,6 +159,14 @@ public class FeatureStore */ public boolean addFeature(SequenceFeature feature) { + /* + * keep a record of feature groups + */ + if (!feature.isNonPositional()) + { + positionalFeatureGroups.add(feature.getFeatureGroup()); + } + boolean added = false; if (feature.isContactFeature()) @@ -85,7 +179,7 @@ public class FeatureStore } else { - if (!nonNestedFeatures.contains(feature)) + if (!contains(nonNestedFeatures, feature)) { added = addNonNestedFeature(feature); if (!added) @@ -98,14 +192,64 @@ public class FeatureStore } } + if (added) + { + /* + * record the total extent of positional features, to make + * getTotalFeatureLength possible; we count the length of a + * contact feature as 1 + */ + totalExtent += getFeatureLength(feature); + + /* + * record the minimum and maximum score for positional + * and non-positional features + */ + float score = feature.getScore(); + if (!Float.isNaN(score)) + { + if (feature.isNonPositional()) + { + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); + } + else + { + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); + } + } + } + return added; } /** + * Answers the 'length' of the feature, counting 0 for non-positional features + * and 1 for contact features + * + * @param feature + * @return + */ + protected static int getFeatureLength(SequenceFeature feature) + { + if (feature.isNonPositional()) + { + return 0; + } + if (feature.isContactFeature()) + { + return 1; + } + return 1 + feature.getEnd() - feature.getBegin(); + } + + /** * Adds the feature to the list of non-positional features (with lazy * instantiation of the list if it is null), and returns true. If the * non-positional features already include the new feature (by equality test), - * then it is not added, and this method returns false. + * then it is not added, and this method returns false. The feature group is + * added to the set of distinct feature groups for non-positional features. * * @param feature */ @@ -119,7 +263,11 @@ public class FeatureStore { return false; } + nonPositionalFeatures.add(feature); + + nonPositionalFeatureGroups.add(feature.getFeatureGroup()); + return true; } @@ -133,7 +281,7 @@ public class FeatureStore { if (nestedFeatures == null) { - nestedFeatures = new NCList(feature); + nestedFeatures = new NCList<>(feature); return true; } return nestedFeatures.add(feature, false); @@ -145,8 +293,6 @@ public class FeatureStore * contained by) an existing feature. If there is no nesting, the feature is * added to the list and the method returns true. If nesting is found, the * feature is not added and the method returns false. - *

- * Contact features are added at the position of their first contact point * * @param feature * @return @@ -155,7 +301,11 @@ public class FeatureStore { synchronized (nonNestedFeatures) { - int insertPosition = binarySearchForAdd(nonNestedFeatures, feature); + /* + * find the first stored feature which doesn't precede the new one + */ + int insertPosition = binarySearch(nonNestedFeatures, + SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); /* * fail if we detect feature enclosure - of the new feature by @@ -177,16 +327,10 @@ public class FeatureStore } /* - * checks passed - add or append the feature + * checks passed - add the feature */ - if (insertPosition == nonNestedFeatures.size()) - { - nonNestedFeatures.add(feature); - } - else - { - nonNestedFeatures.add(insertPosition, feature); - } + nonNestedFeatures.add(insertPosition, feature); + return true; } } @@ -216,31 +360,6 @@ public class FeatureStore } /** - * Answers the index of the first element in the given list which follows or - * matches the given feature in the sort order. If no such element, answers - * the length of the list. - * - * @param list - * @param feature - * - * @return - */ - protected int binarySearchForAdd(List list, SequenceFeature feature) - { - // TODO binary search! - int i = 0; - while (i < list.size()) - { - if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0) - { - break; - } - i++; - } - return i; - } - - /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain * ordered, and returns true. If the contact feature lists already contain the @@ -260,22 +379,71 @@ public class FeatureStore contactFeatureEnds = new ArrayList(); } - // TODO binary search for insertion points! - if (contactFeatureStarts.contains(feature)) + if (contains(contactFeatureStarts, feature)) { return false; } - contactFeatureStarts.add(feature); - Collections.sort(contactFeatureStarts, startOrdering); + /* + * binary search the sorted list to find the insertion point + */ + int insertPosition = binarySearch(contactFeatureStarts, + SearchCriterion.byFeature(feature, + RangeComparator.BY_START_POSITION)); + contactFeatureStarts.add(insertPosition, feature); + // and resort to mak siccar...just in case insertion point not quite right + Collections.sort(contactFeatureStarts, RangeComparator.BY_START_POSITION); + + insertPosition = binarySearch(contactFeatureStarts, + SearchCriterion.byFeature(feature, + RangeComparator.BY_END_POSITION)); contactFeatureEnds.add(feature); - Collections.sort(contactFeatureEnds, endOrdering); + Collections.sort(contactFeatureEnds, RangeComparator.BY_END_POSITION); return true; } /** - * Returns a (possibly empty) list of entries whose range overlaps the given + * Answers true if the list contains the feature, else false. This method is + * optimised for the condition that the list is sorted on feature start + * position ascending, and will give unreliable results if this does not hold. + * + * @param features + * @param feature + * @return + */ + protected static boolean contains(List features, + SequenceFeature feature) + { + if (features == null || feature == null) + { + return false; + } + + /* + * locate the first entry in the list which does not precede the feature + */ + int pos = binarySearch(features, + SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); + int len = features.size(); + while (pos < len) + { + SequenceFeature sf = features.get(pos); + if (sf.getBegin() > feature.getBegin()) + { + return false; // no match found + } + if (sf.equals(feature)) + { + return true; + } + pos++; + } + return false; + } + + /** + * Returns a (possibly empty) list of features whose extent overlaps the given * range. The returned list is not ordered. Contact features are included if * either of the contact points lies within the range. * @@ -287,7 +455,7 @@ public class FeatureStore */ public List findOverlappingFeatures(long start, long end) { - List result = new ArrayList(); + List result = new ArrayList<>(); findNonNestedFeatures(start, end, result); @@ -303,7 +471,7 @@ public class FeatureStore /** * Adds contact features to the result list where either the second or the - * first contact position lies within the target range. + * first contact position lies within the target range * * @param from * @param to @@ -323,6 +491,10 @@ public class FeatureStore } /** + * Adds to the result list any contact features whose end (second contact + * point), but not start (first contact point), lies in the query from-to + * range + * * @param from * @param to * @param result @@ -330,8 +502,13 @@ public class FeatureStore protected void findContactEndFeatures(long from, long to, List result) { - // TODO binary search for startPosition - for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++) + /* + * find the first contact feature (if any) that does not lie + * entirely before the target range + */ + int startPosition = binarySearch(contactFeatureEnds, + SearchCriterion.byEnd(from)); + for (; startPosition < contactFeatureEnds.size(); startPosition++) { SequenceFeature sf = contactFeatureEnds.get(startPosition); if (!sf.isContactFeature()) @@ -340,6 +517,7 @@ public class FeatureStore + sf.getType() + " in contact features list"); continue; } + int begin = sf.getBegin(); if (begin >= from && begin <= to) { @@ -349,41 +527,22 @@ public class FeatureStore */ continue; } + int end = sf.getEnd(); if (end >= from && end <= to) { result.add(sf); } - } - } - - /** - * Returns the index of the first contact feature found whose end (second - * contact position) is not before the given start position. If no such - * feature is found, returns the length of the contact features list. - * - * @param start - * @return - */ - protected int contactsBinarySearch(long start) - { - // TODO binary search!! - int i = 0; - while (i < contactFeatureEnds.size()) - { - if (contactFeatureEnds.get(i).getEnd() >= start) + if (end > to) { break; } - i++; } - - return i; } /** - * Adds features to the result list that are at a single position which lies - * within the target range. Non-positional features (start=end=0) and contact + * Adds non-nested features to the result list that lie within the target + * range. Non-positional features (start=end=0), contact features and nested * features are excluded. * * @param from @@ -393,7 +552,9 @@ public class FeatureStore protected void findNonNestedFeatures(long from, long to, List result) { - int startIndex = binarySearch(nonNestedFeatures, from); + int startIndex = binarySearch(nonNestedFeatures, + SearchCriterion.byEnd(from)); + findNonNestedFeatures(startIndex, from, to, result); } @@ -410,8 +571,7 @@ public class FeatureStore * @return */ protected int findNonNestedFeatures(final int startIndex, long from, - long to, - List result) + long to, List result) { int i = startIndex; while (i < nonNestedFeatures.size()) @@ -433,37 +593,6 @@ public class FeatureStore } /** - * Performs a binary search of the (sorted) list to find the index of the - * first entry whose end position is not less than the target position (i.e. - * skip all features that properly precede the given position) - * - * @param features - * @param target - * @return - */ - protected int binarySearch(List features, long target) - { - int width = features.size() / 2; - int lastpos = width; - while (width > 0) - { - int end = features.get(lastpos).getEnd(); - width = width / 2; - if (end > target) - { - lastpos -= width; - } - else - { - lastpos += width; - } - } - // todo correct binary search - return lastpos > 1 ? lastpos - 2 : 0; - // return lastpos; - } - - /** * Adds contact features whose start position lies in the from-to range to the * result list * @@ -474,8 +603,10 @@ public class FeatureStore protected void findContactStartFeatures(long from, long to, List result) { - // TODO binary search for startPosition - for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++) + int startPosition = binarySearch(contactFeatureStarts, + SearchCriterion.byStart(from)); + + for (; startPosition < contactFeatureStarts.size(); startPosition++) { SequenceFeature sf = contactFeatureStarts.get(startPosition); if (!sf.isContactFeature()) @@ -493,17 +624,16 @@ public class FeatureStore } /** - * Answers a list of all features stored (including any non-positional - * features), in no guaranteed order + * Answers a list of all positional features stored, in no guaranteed order * * @return */ - public List getFeatures() + public List getPositionalFeatures() { /* * add non-nested features (may be all features for many cases) */ - List result = new ArrayList(); + List result = new ArrayList<>(); result.addAll(nonNestedFeatures); /* @@ -515,14 +645,6 @@ public class FeatureStore } /* - * add any non-positional features - */ - if (nonPositionalFeatures != null) - { - result.addAll(nonPositionalFeatures); - } - - /* * add any nested features */ if (nestedFeatures != null) @@ -545,7 +667,7 @@ public class FeatureStore { return Collections.emptyList(); } - return new ArrayList(contactFeatureStarts); + return new ArrayList<>(contactFeatureStarts); } /** @@ -560,7 +682,7 @@ public class FeatureStore { return Collections.emptyList(); } - return new ArrayList(nonPositionalFeatures); + return new ArrayList<>(nonPositionalFeatures); } /** @@ -571,7 +693,7 @@ public class FeatureStore * * @param sf */ - public boolean delete(SequenceFeature sf) + public synchronized boolean delete(SequenceFeature sf) { /* * try the non-nested positional features first @@ -591,12 +713,15 @@ public class FeatureStore } } + boolean removedNonPositional = false; + /* * if not found, try non-positional features */ if (!removed && nonPositionalFeatures != null) { - removed = nonPositionalFeatures.remove(sf); + removedNonPositional = nonPositionalFeatures.remove(sf); + removed = removedNonPositional; } /* @@ -607,6 +732,305 @@ public class FeatureStore removed = nestedFeatures.delete(sf); } + if (removed) + { + rescanAfterDelete(); + } + return removed; } + + /** + * Rescan all features to recompute any cached values after an entry has been + * deleted. This is expected to be an infrequent event, so performance here is + * not critical. + */ + protected synchronized void rescanAfterDelete() + { + positionalFeatureGroups.clear(); + nonPositionalFeatureGroups.clear(); + totalExtent = 0; + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; + + /* + * scan non-positional features for groups and scores + */ + for (SequenceFeature sf : getNonPositionalFeatures()) + { + nonPositionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); + } + + /* + * scan positional features for groups, scores and extents + */ + for (SequenceFeature sf : getPositionalFeatures()) + { + positionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); + totalExtent += getFeatureLength(sf); + } + } + + /** + * A helper method to return the minimum of two floats, where a non-NaN value + * is treated as 'less than' a NaN value (unlike Math.min which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float min(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.min(f1, f2); + } + } + + /** + * A helper method to return the maximum of two floats, where a non-NaN value + * is treated as 'greater than' a NaN value (unlike Math.max which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float max(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.max(f1, f2); + } + } + + /** + * Answers true if this store has no features, else false + * + * @return + */ + public boolean isEmpty() + { + boolean hasFeatures = !nonNestedFeatures.isEmpty() + || (contactFeatureStarts != null && !contactFeatureStarts + .isEmpty()) + || (nonPositionalFeatures != null && !nonPositionalFeatures + .isEmpty()) + || (nestedFeatures != null && nestedFeatures.size() > 0); + + return !hasFeatures; + } + + /** + * Answers the set of distinct feature groups stored, possibly including null, + * as an unmodifiable view of the set. The parameter determines whether the + * groups for positional or for non-positional features are returned. + * + * @param positionalFeatures + * @return + */ + public Set getFeatureGroups(boolean positionalFeatures) + { + if (positionalFeatures) + { + return Collections.unmodifiableSet(positionalFeatureGroups); + } + else + { + return nonPositionalFeatureGroups == null ? Collections + . emptySet() : Collections + .unmodifiableSet(nonPositionalFeatureGroups); + } + } + + /** + * Performs a binary search of the (sorted) list to find the index of the + * first entry which returns true for the given comparator function. Returns + * the length of the list if there is no such entry. + * + * @param features + * @param sc + * @return + */ + protected static int binarySearch(List features, + SearchCriterion sc) + { + int start = 0; + int end = features.size() - 1; + int matched = features.size(); + + while (start <= end) + { + int mid = (start + end) / 2; + SequenceFeature entry = features.get(mid); + boolean compare = sc.compare(entry); + if (compare) + { + matched = mid; + end = mid - 1; + } + else + { + start = mid + 1; + } + } + + return matched; + } + + /** + * Answers the number of positional (or non-positional) features stored. + * Contact features count as 1. + * + * @param positional + * @return + */ + public int getFeatureCount(boolean positional) + { + if (!positional) + { + return nonPositionalFeatures == null ? 0 : nonPositionalFeatures + .size(); + } + + int size = nonNestedFeatures.size(); + + if (contactFeatureStarts != null) + { + // note a contact feature (start/end) counts as one + size += contactFeatureStarts.size(); + } + + if (nestedFeatures != null) + { + size += nestedFeatures.size(); + } + + return size; + } + + /** + * Answers the total length of positional features (or zero if there are + * none). Contact features contribute a value of 1 to the total. + * + * @return + */ + public int getTotalFeatureLength() + { + return totalExtent; + } + + /** + * Answers the minimum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. + * + * @param positional + * @return + */ + public float getMinimumScore(boolean positional) + { + return positional ? positionalMinScore : nonPositionalMinScore; + } + + /** + * Answers the maximum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. + * + * @param positional + * @return + */ + public float getMaximumScore(boolean positional) + { + return positional ? positionalMaxScore : nonPositionalMaxScore; + } + + /** + * Answers a list of all either positional or non-positional features whose + * feature group matches the given group (which may be null) + * + * @param positional + * @param group + * @return + */ + public List getFeaturesForGroup(boolean positional, + String group) + { + List result = new ArrayList<>(); + + /* + * if we know features don't include the target group, no need + * to inspect them for matches + */ + if (positional && !positionalFeatureGroups.contains(group) + || !positional && !nonPositionalFeatureGroups.contains(group)) + { + return result; + } + + List sfs = positional ? getPositionalFeatures() + : getNonPositionalFeatures(); + for (SequenceFeature sf : sfs) + { + String featureGroup = sf.getFeatureGroup(); + if (group == null && featureGroup == null || group != null + && group.equals(featureGroup)) + { + result.add(sf); + } + } + return result; + } + + /** + * Adds the shift value to the start and end of all positional features. + * Returns true if at least one feature was updated, else false. + * + * @param shift + * @return + */ + public synchronized boolean shiftFeatures(int shift) + { + /* + * Because begin and end are final fields (to ensure the data store's + * integrity), we have to delete each feature and re-add it as amended. + * (Although a simple shift of all values would preserve data integrity!) + */ + boolean modified = false; + for (SequenceFeature sf : getPositionalFeatures()) + { + modified = true; + int newBegin = sf.getBegin() + shift; + int newEnd = sf.getEnd() + shift; + + /* + * sanity check: don't shift left of the first residue + */ + if (newEnd > 0) + { + newBegin = Math.max(1, newBegin); + SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd, + sf.getFeatureGroup(), sf.getScore()); + addFeature(sf2); + } + delete(sf); + } + return modified; + } }