X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=653d389ce54cb5e23ec69ff3fe65b3fc82aff65d;hb=a30b21bb66cb7faef19bd1c2417be687970babcf;hp=686ac26cf17b48ceaa9c1bbcbb5bf6173ce584ee;hpb=fbf6a124e2ffe81bcde747fda6f004748eb3b479;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index 686ac26..653d389 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -23,6 +23,7 @@ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -30,18 +31,35 @@ import java.util.Set; import intervalstore.api.IntervalStoreI; import intervalstore.impl.BinarySearcher; -import intervalstore.impl.IntervalStore; +import intervalstore.impl.BinarySearcher.Compare; -/** - * A data store for a set of sequence features that supports efficient lookup of - * features overlapping a given range. Intended for (but not limited to) storage - * of features for one sequence and feature type. - * - * @author gmcarstairs - * - */ public class FeatureStore { + public enum IntervalStoreType + { + /** + * original NCList-based IntervalStore + */ + INTERVAL_STORE_NCLIST, + + /** + * linked-list IntervalStore + */ + INTERVAL_STORE_LINKED_LIST, + + /** + * NCList as array buffer IntervalStore + */ + INTERVAL_STORE_NCARRAY + } + + /* + * track largest start for quick insertion of ordered features + */ + protected int maxStart = -1; + + protected int maxContactStart = -1; + /* * Non-positional features have no (zero) start/end position. * Kept as a separate list in case this criterion changes in future. @@ -90,16 +108,120 @@ public class FeatureStore float nonPositionalMaxScore; - private SequenceFeature[] temp = new SequenceFeature[3]; + /** + * Answers the 'length' of the feature, counting 0 for non-positional features + * and 1 for contact features + * + * @param feature + * @return + */ + protected static int getFeatureLength(SequenceFeature feature) + { + if (feature.isNonPositional()) + { + return 0; + } + if (feature.isContactFeature()) + { + return 1; + } + return 1 + feature.getEnd() - feature.getBegin(); + } + + /** + * Answers true if the list contains the feature, else false. This method is + * optimised for the condition that the list is sorted on feature start + * position ascending, and will give unreliable results if this does not hold. + * + * @param list + * @param feature + * @return + */ + public static boolean listContains(List list, + SequenceFeature feature) + { + if (list == null || feature == null) + { + return false; + } + + /* + * locate the first entry in the list which does not precede the feature + */ + int begin = feature.begin; + int pos = BinarySearcher.findFirst(list, true, Compare.GE, begin); + int len = list.size(); + while (pos < len) + { + SequenceFeature sf = list.get(pos); + if (sf.begin > begin) + { + return false; // no match found + } + if (sf.equals(feature)) + { + return true; + } + pos++; + } + return false; + } + + /** + * A helper method to return the maximum of two floats, where a non-NaN value + * is treated as 'greater than' a NaN value (unlike Math.max which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float max(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.max(f1, f2); + } + } - private boolean isTainted; + /** + * A helper method to return the minimum of two floats, where a non-NaN value + * is treated as 'less than' a NaN value (unlike Math.min which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float min(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.min(f1, f2); + } + } /** - * Constructor + * Constructor that defaults to using NCList IntervalStore */ public FeatureStore() { - features = new IntervalStore<>(); + this(IntervalStoreType.INTERVAL_STORE_NCLIST); + } + + /** + * Constructor that allows an alternative IntervalStore implementation to be + * chosen + */ + public FeatureStore(IntervalStoreType intervalStoreType) + { + features = getIntervalStore(intervalStoreType); positionalFeatureGroups = new HashSet<>(); nonPositionalFeatureGroups = new HashSet<>(); positionalMinScore = Float.NaN; @@ -107,43 +229,108 @@ public class FeatureStore nonPositionalMinScore = Float.NaN; nonPositionalMaxScore = Float.NaN; - // we only construct nonPositionalFeatures, contactFeatures if we need to + // only construct nonPositionalFeatures or contactFeatures if needed } /** - * Adds one sequence feature to the store, and returns true, unless the - * feature is already contained in the store, in which case this method - * returns false. Containment is determined by SequenceFeature.equals() - * comparison. + * Returns a new instance of IntervalStoreI of implementation as selected by + * the type parameter + * + * @param type + * @return + */ + private IntervalStoreI getIntervalStore( + IntervalStoreType type) + { + switch (type) + { + default: + case INTERVAL_STORE_NCLIST: + return new intervalstore.impl.IntervalStore<>(); + case INTERVAL_STORE_NCARRAY: + return new intervalstore.nonc.IntervalStore<>(); + case INTERVAL_STORE_LINKED_LIST: + return new intervalstore.nonc.IntervalStore0<>(); + } + } + + /** + * Add a contact feature to the lists that hold them ordered by start (first + * contact) and by end (second contact) position, ensuring the lists remain + * ordered, and returns true. This method allows duplicate features to be + * added, so test before calling to avoid this. * * @param feature + * @return */ - public boolean addFeature(SequenceFeature feature) + protected synchronized boolean addContactFeature(SequenceFeature feature) { - if (contains(feature)) + if (contactFeatureStarts == null) { - return false; + contactFeatureStarts = new ArrayList<>(); + contactFeatureEnds = new ArrayList<>(); } /* - * keep a record of feature groups + * insert into list sorted by start (first contact position): + * binary search the sorted list to find the insertion point */ - if (!feature.isNonPositional()) - { - positionalFeatureGroups.add(feature.getFeatureGroup()); - } + int insertAt = BinarySearcher.findFirst(contactFeatureStarts, true, + Compare.GE, feature.begin); + contactFeatureStarts.add(insertAt, feature); + /* + * insert into list sorted by end (second contact position): + * binary search the sorted list to find the insertion point + */ + contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end), + feature); + + return true; + } + /** + * Adds one sequence feature to the store, and returns true, unless the + * feature is already contained in the store, in which case this method + * returns false. Containment is determined by SequenceFeature.equals() + * comparison. + * + * @param feature + */ + public boolean addFeature(SequenceFeature feature) + { if (feature.isContactFeature()) { + if (containsContactFeature(feature)) + { + return false; + } + positionalFeatureGroups.add(feature.getFeatureGroup()); + if (feature.begin > maxContactStart) + { + maxContactStart = feature.begin; + } addContactFeature(feature); } else if (feature.isNonPositional()) { + if (containsNonPositionalFeature(feature)) + { + return false; + } + addNonPositionalFeature(feature); } else { - addNestedFeature(feature); + if (!features.add(feature, false)) + { + return false; + } + positionalFeatureGroups.add(feature.getFeatureGroup()); + if (feature.begin > maxStart) + { + maxStart = feature.begin; + } } /* @@ -176,48 +363,29 @@ public class FeatureStore } /** - * Answers true if this store contains the given feature (testing by - * SequenceFeature.equals), else false - * - * @param feature - * @return - */ - public boolean contains(SequenceFeature feature) - { - if (feature.isNonPositional()) - { - return nonPositionalFeatures == null ? false : nonPositionalFeatures - .contains(feature); - } - - if (feature.isContactFeature()) - { - return contactFeatureStarts == null ? false : listContains( - contactFeatureStarts, feature); - } - - return features == null ? false : features - .contains(feature); - } - - /** - * Answers the 'length' of the feature, counting 0 for non-positional features - * and 1 for contact features + * A helper method that adds to the result list any features from the + * collection provided whose feature group matches the specified group * - * @param feature - * @return + * @param group + * @param sfs + * @param result */ - protected static int getFeatureLength(SequenceFeature feature) + private void addFeaturesForGroup(String group, + Collection sfs, List result) { - if (feature.isNonPositional()) + if (sfs == null) { - return 0; + return; } - if (feature.isContactFeature()) + for (SequenceFeature sf : sfs) { - return 1; + String featureGroup = sf.getFeatureGroup(); + if (group == null && featureGroup == null + || group != null && group.equals(featureGroup)) + { + result.add(sf); + } } - return 1 + feature.getEnd() - feature.getBegin(); } /** @@ -244,302 +412,58 @@ public class FeatureStore } /** - * Adds one feature to the IntervalStore that can manage nested features - * (creating the IntervalStore if necessary) - */ - protected synchronized void addNestedFeature(SequenceFeature feature) - { - if (features == null) - { - features = new IntervalStore<>(); - } - features.add(feature); - isTainted = true; - } - - /** - * Add a contact feature to the lists that hold them ordered by start (first - * contact) and by end (second contact) position, ensuring the lists remain - * ordered, and returns true. This method allows duplicate features to be - * added, so test before calling to avoid this. + * Answers true if this store contains the given feature (testing by + * SequenceFeature.equals), else false * * @param feature * @return */ - protected synchronized boolean addContactFeature(SequenceFeature feature) + public boolean contains(SequenceFeature feature) { - if (contactFeatureStarts == null) + if (feature.isNonPositional()) { - contactFeatureStarts = new ArrayList<>(); + return containsNonPositionalFeature(feature); } - if (contactFeatureEnds == null) + + if (feature.isContactFeature()) { - contactFeatureEnds = new ArrayList<>(); + return containsContactFeature(feature); } - /* - * insert into list sorted by start (first contact position): - * binary search the sorted list to find the insertion point - */ - int insertPosition = BinarySearcher.findFirst(contactFeatureStarts, - f -> f.getBegin() >= feature.getBegin()); - contactFeatureStarts.add(insertPosition, feature); - + return containsPositionalFeature(feature); + } - /* - * insert into list sorted by end (second contact position): - * binary search the sorted list to find the insertion point - */ - insertPosition = BinarySearcher.findFirst(contactFeatureEnds, - f -> f.getEnd() >= feature.getEnd()); - contactFeatureEnds.add(insertPosition, feature); + private boolean containsPositionalFeature(SequenceFeature feature) + { + return features == null || feature.begin > maxStart ? false + : features.contains(feature); + } - return true; + /** + * Answers true if this store already contains a contact feature equal to the + * given feature (by {@code SequenceFeature.equals()} test), else false + * + * @param feature + * @return + */ + private boolean containsContactFeature(SequenceFeature feature) + { + return contactFeatureStarts != null && feature.begin <= maxContactStart + && listContains(contactFeatureStarts, feature); } /** - * Answers true if the list contains the feature, else false. This method is - * optimised for the condition that the list is sorted on feature start - * position ascending, and will give unreliable results if this does not hold. + * Answers true if this store already contains a non-positional feature equal + * to the given feature (by {@code SequenceFeature.equals()} test), else false * - * @param features * @param feature * @return */ - protected static boolean listContains(List features, - SequenceFeature feature) + private boolean containsNonPositionalFeature(SequenceFeature feature) { - if (features == null || feature == null) - { - return false; - } - - /* - * locate the first entry in the list which does not precede the feature - */ - // int pos = binarySearch(features, - // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); - int pos = BinarySearcher.findFirst(features, - val -> val.getBegin() >= feature.getBegin()); - int len = features.size(); - while (pos < len) - { - SequenceFeature sf = features.get(pos); - if (sf.getBegin() > feature.getBegin()) - { - return false; // no match found - } - if (sf.equals(feature)) - { - return true; - } - pos++; - } - return false; - } - - /** - * Returns a (possibly empty) list of features whose extent overlaps the given - * range. The returned list is not ordered. Contact features are included if - * either of the contact points lies within the range. - * - * @param start - * start position of overlap range (inclusive) - * @param end - * end position of overlap range (inclusive) - * @return - */ - public List findOverlappingFeatures(long start, long end) - { - List result = new ArrayList<>(); - - findContactFeatures(start, end, result); - - if (features != null) - { - result.addAll(features.findOverlaps(start, end)); - } - - return result; - } - - /** - * Adds contact features to the result list where either the second or the - * first contact position lies within the target range - * - * @param from - * @param to - * @param result - */ - protected void findContactFeatures(long from, long to, - List result) - { - if (contactFeatureStarts != null) - { - findContactStartOverlaps(from, to, result); - } - if (contactFeatureEnds != null) - { - findContactEndOverlaps(from, to, result); - } - } - - /** - * Adds to the result list any contact features whose end (second contact - * point), but not start (first contact point), lies in the query from-to - * range - * - * @param from - * @param to - * @param result - */ - protected void findContactEndOverlaps(long from, long to, - List result) - { - /* - * find the first contact feature (if any) - * whose end point is not before the target range - */ - int index = BinarySearcher.findFirst(contactFeatureEnds, - f -> f.getEnd() >= from); - - while (index < contactFeatureEnds.size()) - { - SequenceFeature sf = contactFeatureEnds.get(index); - if (!sf.isContactFeature()) - { - System.err.println("Error! non-contact feature type " - + sf.getType() + " in contact features list"); - index++; - continue; - } - - int begin = sf.getBegin(); - if (begin >= from && begin <= to) - { - /* - * this feature's first contact position lies in the search range - * so we don't include it in results a second time - */ - index++; - continue; - } - - if (sf.getEnd() > to) - { - /* - * this feature (and all following) has end point after the target range - */ - break; - } - - /* - * feature has end >= from and end <= to - * i.e. contact end point lies within overlap search range - */ - result.add(sf); - index++; - } - } - - /** - * Adds contact features whose start position lies in the from-to range to the - * result list - * - * @param from - * @param to - * @param result - */ - protected void findContactStartOverlaps(long from, long to, - List result) - { - int index = BinarySearcher.findFirst(contactFeatureStarts, - f -> f.getBegin() >= from); - - while (index < contactFeatureStarts.size()) - { - SequenceFeature sf = contactFeatureStarts.get(index); - if (!sf.isContactFeature()) - { - System.err.println("Error! non-contact feature " + sf.toString() - + " in contact features list"); - index++; - continue; - } - if (sf.getBegin() > to) - { - /* - * this feature's start (and all following) follows the target range - */ - break; - } - - /* - * feature has begin >= from and begin <= to - * i.e. contact start point lies within overlap search range - */ - result.add(sf); - index++; - } - } - - /** - * Answers a list of all positional features stored, in no guaranteed order - * - * @return - */ - public List getPositionalFeatures() - { - List result = new ArrayList<>(); - - /* - * add any contact features - from the list by start position - */ - if (contactFeatureStarts != null) - { - result.addAll(contactFeatureStarts); - } - - /* - * add any nested features - */ - if (features != null) - { - result.addAll(features); - } - - return result; - } - - /** - * Answers a list of all contact features. If there are none, returns an - * immutable empty list. - * - * @return - */ - public List getContactFeatures() - { - if (contactFeatureStarts == null) - { - return Collections.emptyList(); - } - return new ArrayList<>(contactFeatureStarts); - } - - /** - * Answers a list of all non-positional features. If there are none, returns - * an immutable empty list. - * - * @return - */ - public List getNonPositionalFeatures() - { - if (nonPositionalFeatures == null) - { - return Collections.emptyList(); - } - return new ArrayList<>(nonPositionalFeatures); - } + return nonPositionalFeatures == null ? false + : nonPositionalFeatures.contains(feature); + } /** * Deletes the given feature from the store, returning true if it was found @@ -566,15 +490,12 @@ public class FeatureStore } } - boolean removedNonPositional = false; - /* * if not found, try non-positional features */ if (!removed && nonPositionalFeatures != null) { - removedNonPositional = nonPositionalFeatures.remove(sf); - removed = removedNonPositional; + removed = nonPositionalFeatures.remove(sf); } /* @@ -593,100 +514,92 @@ public class FeatureStore return removed; } + public List findFeatures(long start, long end) + { + return findFeatures(start, end, null); + } + /** - * Rescan all features to recompute any cached values after an entry has been - * deleted. This is expected to be an infrequent event, so performance here is - * not critical. + * Returns a (possibly empty) list of features whose extent overlaps the given + * range. The returned list is not ordered. Contact features are included if + * either of the contact points lies within the range. If the {@code result} + * parameter is not null, new entries are added to this list and the (possibly + * extended) list returned. + * + * @param start + * start position of overlap range (inclusive) + * @param end + * end position of overlap range (inclusive) + * @param result + * @return */ - protected synchronized void rescanAfterDelete() + public List findFeatures(long start, long end, + List result) { - positionalFeatureGroups.clear(); - nonPositionalFeatureGroups.clear(); - totalExtent = 0; - positionalMinScore = Float.NaN; - positionalMaxScore = Float.NaN; - nonPositionalMinScore = Float.NaN; - nonPositionalMaxScore = Float.NaN; - isTainted = true; - /* - * scan non-positional features for groups and scores - */ - for (SequenceFeature sf : getNonPositionalFeatures()) + if (result == null) { - nonPositionalFeatureGroups.add(sf.getFeatureGroup()); - float score = sf.getScore(); - nonPositionalMinScore = min(nonPositionalMinScore, score); - nonPositionalMaxScore = max(nonPositionalMaxScore, score); + result = new ArrayList<>(); } - /* - * scan positional features for groups, scores and extents - */ - for (SequenceFeature sf : getPositionalFeatures()) - { - positionalFeatureGroups.add(sf.getFeatureGroup()); - float score = sf.getScore(); - positionalMinScore = min(positionalMinScore, score); - positionalMaxScore = max(positionalMaxScore, score); - totalExtent += getFeatureLength(sf); - } + findContactFeatures(start, end, result); + features.findOverlaps(start, end, result); + + return result; } /** - * A helper method to return the minimum of two floats, where a non-NaN value - * is treated as 'less than' a NaN value (unlike Math.min which does the - * opposite) + * Returns a (possibly empty) list of stored contact features * - * @param f1 - * @param f2 + * @return */ - protected static float min(float f1, float f2) + public List getContactFeatures() { - if (Float.isNaN(f1)) - { - return Float.isNaN(f2) ? f1 : f2; - } - else - { - return Float.isNaN(f2) ? f1 : Math.min(f1, f2); - } + List result = new ArrayList<>(); + getContactFeatures(result); + return result; } /** - * A helper method to return the maximum of two floats, where a non-NaN value - * is treated as 'greater than' a NaN value (unlike Math.max which does the - * opposite) + * Adds any stored contact features to the result list * - * @param f1 - * @param f2 + * @return */ - protected static float max(float f1, float f2) + public void getContactFeatures(List result) { - if (Float.isNaN(f1)) - { - return Float.isNaN(f2) ? f1 : f2; - } - else + if (contactFeatureStarts != null) { - return Float.isNaN(f2) ? f1 : Math.max(f1, f2); + result.addAll(contactFeatureStarts); } } /** - * Answers true if this store has no features, else false + * Answers the number of positional (or non-positional) features stored. + * Contact features count as 1. * + * @param positional * @return */ - public boolean isEmpty() + public int getFeatureCount(boolean positional) { - boolean hasFeatures = (contactFeatureStarts != null - && !contactFeatureStarts - .isEmpty()) - || (nonPositionalFeatures != null && !nonPositionalFeatures - .isEmpty()) - || (features != null && features.size() > 0); + if (!positional) + { + return nonPositionalFeatures == null ? 0 + : nonPositionalFeatures.size(); + } + + int size = 0; + + if (contactFeatureStarts != null) + { + // note a contact feature (start/end) counts as one + size += contactFeatureStarts.size(); + } - return !hasFeatures; + if (features != null) + { + size += features.size(); + } + return size; } /** @@ -705,52 +618,58 @@ public class FeatureStore } else { - return nonPositionalFeatureGroups == null ? Collections - . emptySet() : Collections - .unmodifiableSet(nonPositionalFeatureGroups); + return nonPositionalFeatureGroups == null + ? Collections. emptySet() + : Collections.unmodifiableSet(nonPositionalFeatureGroups); } } /** - * Answers the number of positional (or non-positional) features stored. - * Contact features count as 1. + * Answers a list of all either positional or non-positional features whose + * feature group matches the given group (which may be null) * * @param positional + * @param group * @return */ - public int getFeatureCount(boolean positional) + public List getFeaturesForGroup(boolean positional, + String group) { - if (!positional) + List result = new ArrayList<>(); + + /* + * if we know features don't include the target group, no need + * to inspect them for matches + */ + if (positional && !positionalFeatureGroups.contains(group) + || !positional && !nonPositionalFeatureGroups.contains(group)) { - return nonPositionalFeatures == null ? 0 : nonPositionalFeatures - .size(); + return result; } - int size = 0; - - if (contactFeatureStarts != null) + if (positional) { - // note a contact feature (start/end) counts as one - size += contactFeatureStarts.size(); + addFeaturesForGroup(group, contactFeatureStarts, result); + addFeaturesForGroup(group, features, result); } - - if (features != null) + else { - size += features.size(); + addFeaturesForGroup(group, nonPositionalFeatures, result); } - - return size; + return result; } /** - * Answers the total length of positional features (or zero if there are - * none). Contact features contribute a value of 1 to the total. + * Answers the maximum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. * + * @param positional * @return */ - public int getTotalFeatureLength() + public float getMaximumScore(boolean positional) { - return totalExtent; + return positional ? positionalMaxScore : nonPositionalMaxScore; } /** @@ -767,53 +686,147 @@ public class FeatureStore } /** - * Answers the maximum score held for positional or non-positional features. - * This may be Float.NaN if there are no features, are none has a non-NaN - * score. + * Answers a (possibly empty) list of all non-positional features * - * @param positional * @return */ - public float getMaximumScore(boolean positional) + public List getNonPositionalFeatures() { - return positional ? positionalMaxScore : nonPositionalMaxScore; + List result = new ArrayList<>(); + getNonPositionalFeatures(result); + return result; } /** - * Answers a list of all either positional or non-positional features whose - * feature group matches the given group (which may be null) + * Adds any stored non-positional features to the result list * - * @param positional - * @param group * @return */ - public List getFeaturesForGroup(boolean positional, - String group) + public void getNonPositionalFeatures(List result) + { + if (nonPositionalFeatures != null) + { + result.addAll(nonPositionalFeatures); + } + } + + /** + * Returns a (possibly empty) list of all positional features stored + * + * @return + */ + public List getPositionalFeatures() { List result = new ArrayList<>(); + getPositionalFeatures(result); + + return result; + } + /** + * Adds all positional features stored to the result list, in no guaranteed + * order, and with no check for duplicates + */ + public void getPositionalFeatures(List result) + { /* - * if we know features don't include the target group, no need - * to inspect them for matches + * add any contact features - from the list by start position */ - if (positional && !positionalFeatureGroups.contains(group) - || !positional && !nonPositionalFeatureGroups.contains(group)) + if (contactFeatureStarts != null) { - return result; + result.addAll(contactFeatureStarts); } - List sfs = positional ? getPositionalFeatures() - : getNonPositionalFeatures(); - for (SequenceFeature sf : sfs) + /* + * add any nested features + */ + if (features != null) { - String featureGroup = sf.getFeatureGroup(); - if (group == null && featureGroup == null || group != null - && group.equals(featureGroup)) + result.addAll(features); + } + } + + /** + * Answers the total length of positional features (or zero if there are + * none). Contact features contribute a value of 1 to the total. + * + * @return + */ + public int getTotalFeatureLength() + { + return totalExtent; + } + + /** + * Answers true if this store has no features, else false + * + * @return + */ + public boolean isEmpty() + { + boolean hasFeatures = (contactFeatureStarts != null + && !contactFeatureStarts.isEmpty()) + || (nonPositionalFeatures != null + && !nonPositionalFeatures.isEmpty()) + || (features != null && features.size() > 0); + + return !hasFeatures; + } + + /** + * Rescan all features to recompute any cached values after an entry has been + * deleted. This is expected to be an infrequent event, so performance here is + * not critical. + */ + protected synchronized void rescanAfterDelete() + { + positionalFeatureGroups.clear(); + nonPositionalFeatureGroups.clear(); + totalExtent = 0; + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; + /* + * scan non-positional features for groups and scores + */ + if (nonPositionalFeatures != null) + { + for (int i = 0, n = nonPositionalFeatures.size(); i < n; i++) { - result.add(sf); + SequenceFeature sf = nonPositionalFeatures.get(i); + nonPositionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); } } - return result; + + rescanPositional(contactFeatureStarts); + rescanPositional(features); + } + + /** + * Scans the given features and updates cached feature groups, minimum and + * maximum feature score, and total feature extent (length) for positional + * features + * + * @param sfs + */ + private void rescanPositional(Collection sfs) + { + if (sfs == null) + { + return; + } + for (SequenceFeature sf : sfs) + { + positionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); + totalExtent += getFeatureLength(sf); + } } /** @@ -833,8 +846,10 @@ public class FeatureStore * (Although a simple shift of all values would preserve data integrity!) */ boolean modified = false; - for (SequenceFeature sf : getPositionalFeatures()) + List list = getPositionalFeatures(); + for (int i = 0, n = list.size(); i < n; i++) { + SequenceFeature sf = list.get(i); if (sf.getBegin() >= fromPosition) { modified = true; @@ -858,104 +873,133 @@ public class FeatureStore } /** - * Find all features containing this position. - * Uses isTainted field to know when to reconstruct its temporary array. + * Answers the position (0, 1...) in the list of the first entry whose end + * position is not less than {@ pos}. If no such entry is found, answers the + * length of the list. * + * @param list * @param pos - * @return list of SequenceFeatures - * @author Bob Hanson 2019.07.30 + * @return */ - public void findOverlappingFeatures(int pos, List result) + protected int findFirstEnd(List list, long pos) { + return BinarySearcher.findFirst(list, false, Compare.GE, (int) pos); + } + /** + * Adds contact features to the result list where either the second or the + * first contact position lies within the target range + * + * @param from + * @param to + * @param result + */ + protected void findContactFeatures(long from, long to, + List result) + { if (contactFeatureStarts != null) { - findContacts(contactFeatureStarts, pos, result, true); - findContacts(contactFeatureEnds, pos, result, false); - } - if (features != null) - { - int n = features.size(); - if (isTainted) - { - isTainted = false; - if (temp.length < n) - { - temp = new SequenceFeature[n << 1]; - } - features.toArray(temp); - } - findOverlaps(temp, n, pos, result); + findContactStartOverlaps(from, to, result); + findContactEndOverlaps(from, to, result); } } /** - * Binary search for contact start or end at a given (Overview) position. + * Adds to the result list any contact features whose end (second contact + * point), but not start (first contact point), lies in the query from-to + * range * - * @param l - * @param pos + * @param from + * @param to * @param result - * @param isStart - * - * @author Bob Hanson 2019.07.30 */ - private static void findContacts(List l, int pos, - List result, boolean isStart) + private void findContactEndOverlaps(long from, long to, + List result) { - int low = 0; - int high = l.size() - 1; - while (low <= high) + /* + * find the first contact feature (if any) + * whose end point is not before the target range + */ + int index = findFirstEnd(contactFeatureEnds, from); + + int n = contactFeatureEnds.size(); + while (index < n) { - int mid = (low + high) >>> 1; - SequenceFeature f = l.get(mid); - switch (Long.signum((isStart ? f.begin : f.end) - pos)) + SequenceFeature sf = contactFeatureEnds.get(index); + if (!sf.isContactFeature()) { - case -1: - low = mid + 1; + System.err.println("Error! non-contact feature type " + sf.getType() + + " in contact features list"); + index++; continue; - case 1: - high = mid - 1; + } + + int begin = sf.getBegin(); + if (begin >= from && begin <= to) + { + /* + * this feature's first contact position lies in the search range + * so we don't include it in results a second time + */ + index++; continue; - case 0: - int m = mid; - result.add(f); - // could be "5" in 12345556788 ? - while (++mid <= high && (f = l.get(mid)) != null - && (isStart ? f.begin : f.end) == pos) - { - result.add(f); - } - while (--m >= low && (f = l.get(m)) != null - && (isStart ? f.begin : f.end) == pos) - { - result.add(f); - } - return; } + + if (sf.getEnd() > to) + { + /* + * this feature (and all following) has end point after the target range + */ + break; + } + + /* + * feature has end >= from and end <= to + * i.e. contact end point lies within overlap search range + */ + result.add(sf); + index++; } } /** - * Brute force point-interval overlap test + * Adds contact features whose start position lies in the from-to range to the + * result list * - * @param features - * @param n - * @param pos + * @param from + * @param to * @param result */ - private static void findOverlaps(SequenceFeature[] features, int n, - int pos, + private void findContactStartOverlaps(long from, long to, List result) { - // BH I know, brute force. We need a single-position overlap - // method for IntervalStore, I think. - for (int i = n; --i >= 0;) + int index = BinarySearcher.findFirst(contactFeatureStarts, true, + Compare.GE, (int) from); + + while (index < contactFeatureStarts.size()) { - SequenceFeature f = features[i]; - if (f.begin <= pos && f.end >= pos) + SequenceFeature sf = contactFeatureStarts.get(index); + if (!sf.isContactFeature()) + { + System.err.println("Error! non-contact feature " + sf.toString() + + " in contact features list"); + index++; + continue; + } + if (sf.getBegin() > to) { - result.add(f); + /* + * this feature's start (and all following) follows the target range + */ + break; } + + /* + * feature has begin >= from and begin <= to + * i.e. contact start point lies within overlap search range + */ + result.add(sf); + index++; } }