X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=653d389ce54cb5e23ec69ff3fe65b3fc82aff65d;hb=a30b21bb66cb7faef19bd1c2417be687970babcf;hp=d832f4de226f8ebeaf2c0a7f3fb23b75a2e098fe;hpb=33a853ff07bf9fbdace2744c6c38e28786147e53;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index d832f4d..653d389 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -29,13 +29,84 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -public abstract class FeatureStore implements FeatureStoreI +import intervalstore.api.IntervalStoreI; +import intervalstore.impl.BinarySearcher; +import intervalstore.impl.BinarySearcher.Compare; + +public class FeatureStore { + public enum IntervalStoreType + { + /** + * original NCList-based IntervalStore + */ + INTERVAL_STORE_NCLIST, - /** - * track last start for quick insertion of ordered features + /** + * linked-list IntervalStore + */ + INTERVAL_STORE_LINKED_LIST, + + /** + * NCList as array buffer IntervalStore + */ + INTERVAL_STORE_NCARRAY + } + + /* + * track largest start for quick insertion of ordered features + */ + protected int maxStart = -1; + + protected int maxContactStart = -1; + + /* + * Non-positional features have no (zero) start/end position. + * Kept as a separate list in case this criterion changes in future. + */ + List nonPositionalFeatures; + + /* + * contact features ordered by first contact position + */ + List contactFeatureStarts; + + /* + * contact features ordered by second contact position + */ + List contactFeatureEnds; + + /* + * IntervalStore holds remaining features and provides efficient + * query for features overlapping any given interval + */ + IntervalStoreI features; + + /* + * Feature groups represented in stored positional features + * (possibly including null) + */ + Set positionalFeatureGroups; + + /* + * Feature groups represented in stored non-positional features + * (possibly including null) */ - protected int lastStart = -1, lastContactStart = -1; + Set nonPositionalFeatureGroups; + + /* + * the total length of all positional features; contact features count 1 to + * the total and 1 to size(), consistent with an average 'feature length' of 1 + */ + int totalExtent; + + float positionalMinScore; + + float positionalMaxScore; + + float nonPositionalMinScore; + + float nonPositionalMaxScore; /** * Answers the 'length' of the feature, counting 0 for non-positional features @@ -66,8 +137,7 @@ public abstract class FeatureStore implements FeatureStoreI * @param feature * @return */ - @Override - public boolean listContains(List list, + public static boolean listContains(List list, SequenceFeature feature) { if (list == null || feature == null) @@ -75,40 +145,26 @@ public abstract class FeatureStore implements FeatureStoreI return false; } - return (getEquivalentFeatureIndex(list, feature) >= 0); - } - - /** - * Binary search for the index (>= 0) of a feature in a list. - * - * @param list - * @param feature - * @return index if found; -1 if not - */ - protected int getEquivalentFeatureIndex(List list, - SequenceFeature feature) - { - /* * locate the first entry in the list which does not precede the feature */ int begin = feature.begin; - int pos = findFirstBegin(list, begin); + int pos = BinarySearcher.findFirst(list, true, Compare.GE, begin); int len = list.size(); while (pos < len) { SequenceFeature sf = list.get(pos); if (sf.begin > begin) { - return -1; // no match found + return false; // no match found } if (sf.equals(feature)) { - return pos; + return true; } pos++; } - return -1; + return false; } /** @@ -151,59 +207,21 @@ public abstract class FeatureStore implements FeatureStoreI } } - /* - * Non-positional features have no (zero) start/end position. - * Kept as a separate list in case this criterion changes in future. - */ - List nonPositionalFeatures; - - /* - * contact features ordered by first contact position - */ - List contactFeatureStarts; - - /* - * contact features ordered by second contact position - */ - List contactFeatureEnds; - - /* - * IntervalStore holds remaining features and provides efficient - * query for features overlapping any given interval - */ - Collection features; - - /* - * Feature groups represented in stored positional features - * (possibly including null) - */ - Set positionalFeatureGroups; - - /* - * Feature groups represented in stored non-positional features - * (possibly including null) - */ - Set nonPositionalFeatureGroups; - - /* - * the total length of all positional features; contact features count 1 to - * the total and 1 to size(), consistent with an average 'feature length' of 1 + /** + * Constructor that defaults to using NCList IntervalStore */ - int totalExtent; - - float positionalMinScore; - - float positionalMaxScore; - - float nonPositionalMinScore; - - float nonPositionalMaxScore; + public FeatureStore() + { + this(IntervalStoreType.INTERVAL_STORE_NCLIST); + } /** - * Constructor + * Constructor that allows an alternative IntervalStore implementation to be + * chosen */ - public FeatureStore() + public FeatureStore(IntervalStoreType intervalStoreType) { + features = getIntervalStore(intervalStoreType); positionalFeatureGroups = new HashSet<>(); nonPositionalFeatureGroups = new HashSet<>(); positionalMinScore = Float.NaN; @@ -211,7 +229,29 @@ public abstract class FeatureStore implements FeatureStoreI nonPositionalMinScore = Float.NaN; nonPositionalMaxScore = Float.NaN; - // we only construct nonPositionalFeatures, contactFeatures if we need to + // only construct nonPositionalFeatures or contactFeatures if needed + } + + /** + * Returns a new instance of IntervalStoreI of implementation as selected by + * the type parameter + * + * @param type + * @return + */ + private IntervalStoreI getIntervalStore( + IntervalStoreType type) + { + switch (type) + { + default: + case INTERVAL_STORE_NCLIST: + return new intervalstore.impl.IntervalStore<>(); + case INTERVAL_STORE_NCARRAY: + return new intervalstore.nonc.IntervalStore<>(); + case INTERVAL_STORE_LINKED_LIST: + return new intervalstore.nonc.IntervalStore0<>(); + } } /** @@ -235,8 +275,9 @@ public abstract class FeatureStore implements FeatureStoreI * insert into list sorted by start (first contact position): * binary search the sorted list to find the insertion point */ - contactFeatureStarts.add( - findFirstBegin(contactFeatureStarts, feature.begin), feature); + int insertAt = BinarySearcher.findFirst(contactFeatureStarts, true, + Compare.GE, feature.begin); + contactFeatureStarts.add(insertAt, feature); /* * insert into list sorted by end (second contact position): * binary search the sorted list to find the insertion point @@ -255,41 +296,40 @@ public abstract class FeatureStore implements FeatureStoreI * * @param feature */ - - @Override public boolean addFeature(SequenceFeature feature) { - if (contains(feature)) - { - return false; - } - - /* - * keep a record of feature groups - */ - if (!feature.isNonPositional()) - { - positionalFeatureGroups.add(feature.getFeatureGroup()); - } - if (feature.isContactFeature()) { - if (feature.begin > lastContactStart) + if (containsContactFeature(feature)) { - lastContactStart = feature.begin; + return false; + } + positionalFeatureGroups.add(feature.getFeatureGroup()); + if (feature.begin > maxContactStart) + { + maxContactStart = feature.begin; } addContactFeature(feature); } else if (feature.isNonPositional()) { + if (containsNonPositionalFeature(feature)) + { + return false; + } + addNonPositionalFeature(feature); } else { - addPositionalFeature(feature); - if (feature.begin > lastStart) + if (!features.add(feature, false)) + { + return false; + } + positionalFeatureGroups.add(feature.getFeatureGroup()); + if (feature.begin > maxStart) { - lastStart = feature.begin; + maxStart = feature.begin; } } @@ -322,6 +362,14 @@ public abstract class FeatureStore implements FeatureStoreI return true; } + /** + * A helper method that adds to the result list any features from the + * collection provided whose feature group matches the specified group + * + * @param group + * @param sfs + * @param result + */ private void addFeaturesForGroup(String group, Collection sfs, List result) { @@ -341,12 +389,6 @@ public abstract class FeatureStore implements FeatureStoreI } /** - * Adds one feature to the IntervalStore that can manage nested features - * (creating the IntervalStore if necessary) - */ - abstract protected void addPositionalFeature(SequenceFeature feature); - - /** * Adds the feature to the list of non-positional features (with lazy * instantiation of the list if it is null), and returns true. The feature * group is added to the set of distinct feature groups for non-positional @@ -376,28 +418,52 @@ public abstract class FeatureStore implements FeatureStoreI * @param feature * @return */ - @Override public boolean contains(SequenceFeature feature) { if (feature.isNonPositional()) { - return nonPositionalFeatures == null ? false - : nonPositionalFeatures.contains(feature); + return containsNonPositionalFeature(feature); } if (feature.isContactFeature()) { - return contactFeatureStarts != null - && feature.begin <= lastContactStart - && listContains(contactFeatureStarts, feature); + return containsContactFeature(feature); } - return features == null || feature.begin > lastStart ? false - : containsFeature(feature); + return containsPositionalFeature(feature); } + private boolean containsPositionalFeature(SequenceFeature feature) + { + return features == null || feature.begin > maxStart ? false + : features.contains(feature); + } - abstract protected boolean containsFeature(SequenceFeature feature); + /** + * Answers true if this store already contains a contact feature equal to the + * given feature (by {@code SequenceFeature.equals()} test), else false + * + * @param feature + * @return + */ + private boolean containsContactFeature(SequenceFeature feature) + { + return contactFeatureStarts != null && feature.begin <= maxContactStart + && listContains(contactFeatureStarts, feature); + } + + /** + * Answers true if this store already contains a non-positional feature equal + * to the given feature (by {@code SequenceFeature.equals()} test), else false + * + * @param feature + * @return + */ + private boolean containsNonPositionalFeature(SequenceFeature feature) + { + return nonPositionalFeatures == null ? false + : nonPositionalFeatures.contains(feature); + } /** * Deletes the given feature from the store, returning true if it was found @@ -407,8 +473,6 @@ public abstract class FeatureStore implements FeatureStoreI * * @param sf */ - - @Override public synchronized boolean delete(SequenceFeature sf) { boolean removed = false; @@ -439,7 +503,7 @@ public abstract class FeatureStore implements FeatureStoreI */ if (!removed && features != null) { - removed = findAndRemoveNonContactFeature(sf); + removed = features.remove(sf); } if (removed) @@ -450,44 +514,62 @@ public abstract class FeatureStore implements FeatureStoreI return removed; } - abstract protected boolean findAndRemoveNonContactFeature(SequenceFeature sf); - - abstract protected void findContactFeatures(long from, long to, - List result); + public List findFeatures(long start, long end) + { + return findFeatures(start, end, null); + } - abstract protected int findFirstBegin(List list, - long pos); + /** + * Returns a (possibly empty) list of features whose extent overlaps the given + * range. The returned list is not ordered. Contact features are included if + * either of the contact points lies within the range. If the {@code result} + * parameter is not null, new entries are added to this list and the (possibly + * extended) list returned. + * + * @param start + * start position of overlap range (inclusive) + * @param end + * end position of overlap range (inclusive) + * @param result + * @return + */ + public List findFeatures(long start, long end, + List result) + { + if (result == null) + { + result = new ArrayList<>(); + } - abstract protected int findFirstEnd(List list, long pos); + findContactFeatures(start, end, result); + features.findOverlaps(start, end, result); - @Override - public List findOverlappingFeatures(long start, long end) - { - return findOverlappingFeatures(start, end, null); + return result; } - @Override + /** + * Returns a (possibly empty) list of stored contact features + * + * @return + */ public List getContactFeatures() { - return getContactFeatures(new ArrayList<>()); + List result = new ArrayList<>(); + getContactFeatures(result); + return result; } /** - * Answers a list of all contact features. If there are none, returns an - * immutable empty list. + * Adds any stored contact features to the result list * * @return */ - - @Override - public List getContactFeatures( - List result) + public void getContactFeatures(List result) { if (contactFeatureStarts != null) { result.addAll(contactFeatureStarts); } - return result; } /** @@ -497,8 +579,6 @@ public abstract class FeatureStore implements FeatureStoreI * @param positional * @return */ - - @Override public int getFeatureCount(boolean positional) { if (!positional) @@ -507,9 +587,19 @@ public abstract class FeatureStore implements FeatureStoreI : nonPositionalFeatures.size(); } - return (contactFeatureStarts == null ? 0 : contactFeatureStarts.size()) - + features.size(); + int size = 0; + + if (contactFeatureStarts != null) + { + // note a contact feature (start/end) counts as one + size += contactFeatureStarts.size(); + } + if (features != null) + { + size += features.size(); + } + return size; } /** @@ -520,8 +610,6 @@ public abstract class FeatureStore implements FeatureStoreI * @param positionalFeatures * @return */ - - @Override public Set getFeatureGroups(boolean positionalFeatures) { if (positionalFeatures) @@ -536,12 +624,6 @@ public abstract class FeatureStore implements FeatureStoreI } } - @Override - public Collection getFeatures() - { - return features; - } - /** * Answers a list of all either positional or non-positional features whose * feature group matches the given group (which may be null) @@ -550,8 +632,6 @@ public abstract class FeatureStore implements FeatureStoreI * @param group * @return */ - - @Override public List getFeaturesForGroup(boolean positional, String group) { @@ -587,8 +667,6 @@ public abstract class FeatureStore implements FeatureStoreI * @param positional * @return */ - - @Override public float getMaximumScore(boolean positional) { return positional ? positionalMaxScore : nonPositionalMaxScore; @@ -602,54 +680,55 @@ public abstract class FeatureStore implements FeatureStoreI * @param positional * @return */ - - @Override public float getMinimumScore(boolean positional) { return positional ? positionalMinScore : nonPositionalMinScore; } - @Override + /** + * Answers a (possibly empty) list of all non-positional features + * + * @return + */ public List getNonPositionalFeatures() { - return getNonPositionalFeatures(new ArrayList<>()); + List result = new ArrayList<>(); + getNonPositionalFeatures(result); + return result; } /** - * Answers a list of all non-positional features. If there are none, returns - * an immutable empty list. + * Adds any stored non-positional features to the result list * * @return */ - - @Override - public List getNonPositionalFeatures( - List result) + public void getNonPositionalFeatures(List result) { if (nonPositionalFeatures != null) { result.addAll(nonPositionalFeatures); } - return result; } - @Override + /** + * Returns a (possibly empty) list of all positional features stored + * + * @return + */ public List getPositionalFeatures() { - return getPositionalFeatures(new ArrayList<>()); + List result = new ArrayList<>(); + getPositionalFeatures(result); + + return result; } /** - * Answers a list of all positional features stored, in no guaranteed order - * - * @return + * Adds all positional features stored to the result list, in no guaranteed + * order, and with no check for duplicates */ - - @Override - public List getPositionalFeatures( - List result) + public void getPositionalFeatures(List result) { - /* * add any contact features - from the list by start position */ @@ -665,8 +744,6 @@ public abstract class FeatureStore implements FeatureStoreI { result.addAll(features); } - - return result; } /** @@ -675,8 +752,6 @@ public abstract class FeatureStore implements FeatureStoreI * * @return */ - - @Override public int getTotalFeatureLength() { return totalExtent; @@ -687,15 +762,13 @@ public abstract class FeatureStore implements FeatureStoreI * * @return */ - - @Override public boolean isEmpty() { boolean hasFeatures = (contactFeatureStarts != null && !contactFeatureStarts.isEmpty()) || (nonPositionalFeatures != null && !nonPositionalFeatures.isEmpty()) - || features.size() > 0; + || (features != null && features.size() > 0); return !hasFeatures; } @@ -719,10 +792,9 @@ public abstract class FeatureStore implements FeatureStoreI */ if (nonPositionalFeatures != null) { - List list = nonPositionalFeatures; - for (int i = 0, n = list.size(); i < n; i++) + for (int i = 0, n = nonPositionalFeatures.size(); i < n; i++) { - SequenceFeature sf = list.get(i); + SequenceFeature sf = nonPositionalFeatures.get(i); nonPositionalFeatureGroups.add(sf.getFeatureGroup()); float score = sf.getScore(); nonPositionalMinScore = min(nonPositionalMinScore, score); @@ -730,14 +802,17 @@ public abstract class FeatureStore implements FeatureStoreI } } - /* - * scan positional features for groups, scores and extents - */ - rescanPositional(contactFeatureStarts); rescanPositional(features); } + /** + * Scans the given features and updates cached feature groups, minimum and + * maximum feature score, and total feature extent (length) for positional + * features + * + * @param sfs + */ private void rescanPositional(Collection sfs) { if (sfs == null) @@ -763,8 +838,6 @@ public abstract class FeatureStore implements FeatureStoreI * @param shiftBy * @return */ - - @Override public synchronized boolean shiftFeatures(int fromPosition, int shiftBy) { /* @@ -799,4 +872,135 @@ public abstract class FeatureStore implements FeatureStoreI return modified; } + /** + * Answers the position (0, 1...) in the list of the first entry whose end + * position is not less than {@ pos}. If no such entry is found, answers the + * length of the list. + * + * @param list + * @param pos + * @return + */ + protected int findFirstEnd(List list, long pos) + { + return BinarySearcher.findFirst(list, false, Compare.GE, (int) pos); + } + + /** + * Adds contact features to the result list where either the second or the + * first contact position lies within the target range + * + * @param from + * @param to + * @param result + */ + protected void findContactFeatures(long from, long to, + List result) + { + if (contactFeatureStarts != null) + { + findContactStartOverlaps(from, to, result); + findContactEndOverlaps(from, to, result); + } + } + + /** + * Adds to the result list any contact features whose end (second contact + * point), but not start (first contact point), lies in the query from-to + * range + * + * @param from + * @param to + * @param result + */ + private void findContactEndOverlaps(long from, long to, + List result) + { + /* + * find the first contact feature (if any) + * whose end point is not before the target range + */ + int index = findFirstEnd(contactFeatureEnds, from); + + int n = contactFeatureEnds.size(); + while (index < n) + { + SequenceFeature sf = contactFeatureEnds.get(index); + if (!sf.isContactFeature()) + { + System.err.println("Error! non-contact feature type " + sf.getType() + + " in contact features list"); + index++; + continue; + } + + int begin = sf.getBegin(); + if (begin >= from && begin <= to) + { + /* + * this feature's first contact position lies in the search range + * so we don't include it in results a second time + */ + index++; + continue; + } + + if (sf.getEnd() > to) + { + /* + * this feature (and all following) has end point after the target range + */ + break; + } + + /* + * feature has end >= from and end <= to + * i.e. contact end point lies within overlap search range + */ + result.add(sf); + index++; + } + } + + /** + * Adds contact features whose start position lies in the from-to range to the + * result list + * + * @param from + * @param to + * @param result + */ + private void findContactStartOverlaps(long from, long to, + List result) + { + int index = BinarySearcher.findFirst(contactFeatureStarts, true, + Compare.GE, (int) from); + + while (index < contactFeatureStarts.size()) + { + SequenceFeature sf = contactFeatureStarts.get(index); + if (!sf.isContactFeature()) + { + System.err.println("Error! non-contact feature " + sf.toString() + + " in contact features list"); + index++; + continue; + } + if (sf.getBegin() > to) + { + /* + * this feature's start (and all following) follows the target range + */ + break; + } + + /* + * feature has begin >= from and begin <= to + * i.e. contact start point lies within overlap search range + */ + result.add(sf); + index++; + } + } + }