X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=653d389ce54cb5e23ec69ff3fe65b3fc82aff65d;hb=a30b21bb66cb7faef19bd1c2417be687970babcf;hp=bd94c8a5dea1411bcf83d42a3e8ade101493fa8e;hpb=51728d3951398f9c12d7017c776953f17322cc68;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index bd94c8a..653d389 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -1,32 +1,70 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; -import java.util.Comparator; +import java.util.HashSet; import java.util.List; +import java.util.Set; + +import intervalstore.api.IntervalStoreI; +import intervalstore.impl.BinarySearcher; +import intervalstore.impl.BinarySearcher.Compare; -/** - * A data store for a set of sequence features that supports efficient lookup of - * features overlapping a given range. - * - * @author gmcarstairs - * - */ public class FeatureStore { - Comparator startOrdering = new RangeComparator(true); + public enum IntervalStoreType + { + /** + * original NCList-based IntervalStore + */ + INTERVAL_STORE_NCLIST, + + /** + * linked-list IntervalStore + */ + INTERVAL_STORE_LINKED_LIST, + + /** + * NCList as array buffer IntervalStore + */ + INTERVAL_STORE_NCARRAY + } + + /* + * track largest start for quick insertion of ordered features + */ + protected int maxStart = -1; - Comparator endOrdering = new RangeComparator(false); + protected int maxContactStart = -1; /* - * An ordered list of features, with the promise that no feature in the list - * properly contains any other. This constraint allows bounded linear search - * of the list for features overlapping a region. - * Contact features are not included in this list. + * Non-positional features have no (zero) start/end position. + * Kept as a separate list in case this criterion changes in future. */ - List nonNestedFeatures; + List nonPositionalFeatures; /* * contact features ordered by first contact position @@ -39,379 +77,889 @@ public class FeatureStore List contactFeatureEnds; /* - * Nested Containment List is used to hold any features that are nested - * within (properly contained by) any other feature. This is a recursive tree - * which supports depth-first scan for features overlapping a range. - * It is used here as a 'catch-all' fallback for features that cannot be put - * into a simple ordered list without invalidating the search methods. + * IntervalStore holds remaining features and provides efficient + * query for features overlapping any given interval + */ + IntervalStoreI features; + + /* + * Feature groups represented in stored positional features + * (possibly including null) + */ + Set positionalFeatureGroups; + + /* + * Feature groups represented in stored non-positional features + * (possibly including null) */ - NCList nestedFeatures; + Set nonPositionalFeatureGroups; + + /* + * the total length of all positional features; contact features count 1 to + * the total and 1 to size(), consistent with an average 'feature length' of 1 + */ + int totalExtent; + + float positionalMinScore; + + float positionalMaxScore; + + float nonPositionalMinScore; + + float nonPositionalMaxScore; /** - * Constructor + * Answers the 'length' of the feature, counting 0 for non-positional features + * and 1 for contact features + * + * @param feature + * @return */ - public FeatureStore() + protected static int getFeatureLength(SequenceFeature feature) { - nonNestedFeatures = new ArrayList(); - // we only construct contactFeatures and the NCList if we need to + if (feature.isNonPositional()) + { + return 0; + } + if (feature.isContactFeature()) + { + return 1; + } + return 1 + feature.getEnd() - feature.getBegin(); } /** - * Add one entry to the data store + * Answers true if the list contains the feature, else false. This method is + * optimised for the condition that the list is sorted on feature start + * position ascending, and will give unreliable results if this does not hold. * + * @param list * @param feature + * @return */ - public void addFeature(SequenceFeature feature) + public static boolean listContains(List list, + SequenceFeature feature) { - if (feature.isContactFeature()) + if (list == null || feature == null) { - addContactFeature(feature); + return false; } - else + + /* + * locate the first entry in the list which does not precede the feature + */ + int begin = feature.begin; + int pos = BinarySearcher.findFirst(list, true, Compare.GE, begin); + int len = list.size(); + while (pos < len) { - boolean added = addNonNestedFeature(feature); - if (!added) + SequenceFeature sf = list.get(pos); + if (sf.begin > begin) { - /* - * detected a nested feature - put it in the NCList structure - */ - addNestedFeature(feature); + return false; // no match found + } + if (sf.equals(feature)) + { + return true; } + pos++; } + return false; } /** - * Adds one feature to the NCList that can manage nested features (creating - * the NCList if necessary) + * A helper method to return the maximum of two floats, where a non-NaN value + * is treated as 'greater than' a NaN value (unlike Math.max which does the + * opposite) + * + * @param f1 + * @param f2 */ - protected synchronized void addNestedFeature(SequenceFeature feature) + protected static float max(float f1, float f2) { - if (nestedFeatures == null) + if (Float.isNaN(f1)) { - nestedFeatures = new NCList(feature); + return Float.isNaN(f2) ? f1 : f2; } else { - nestedFeatures.add(feature); + return Float.isNaN(f2) ? f1 : Math.max(f1, f2); } } /** - * Add a feature to the list of non-nested features, maintaining the ordering - * of the list. A check is made for whether the feature is nested in (properly - * contained by) an existing feature. If there is no nesting, the feature is - * added to the list and the method returns true. If nesting is found, the - * feature is not added and the method returns false. - *

- * Contact features are added at the position of their first contact point + * A helper method to return the minimum of two floats, where a non-NaN value + * is treated as 'less than' a NaN value (unlike Math.min which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float min(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.min(f1, f2); + } + } + + /** + * Constructor that defaults to using NCList IntervalStore + */ + public FeatureStore() + { + this(IntervalStoreType.INTERVAL_STORE_NCLIST); + } + + /** + * Constructor that allows an alternative IntervalStore implementation to be + * chosen + */ + public FeatureStore(IntervalStoreType intervalStoreType) + { + features = getIntervalStore(intervalStoreType); + positionalFeatureGroups = new HashSet<>(); + nonPositionalFeatureGroups = new HashSet<>(); + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; + + // only construct nonPositionalFeatures or contactFeatures if needed + } + + /** + * Returns a new instance of IntervalStoreI of implementation as selected by + * the type parameter + * + * @param type + * @return + */ + private IntervalStoreI getIntervalStore( + IntervalStoreType type) + { + switch (type) + { + default: + case INTERVAL_STORE_NCLIST: + return new intervalstore.impl.IntervalStore<>(); + case INTERVAL_STORE_NCARRAY: + return new intervalstore.nonc.IntervalStore<>(); + case INTERVAL_STORE_LINKED_LIST: + return new intervalstore.nonc.IntervalStore0<>(); + } + } + + /** + * Add a contact feature to the lists that hold them ordered by start (first + * contact) and by end (second contact) position, ensuring the lists remain + * ordered, and returns true. This method allows duplicate features to be + * added, so test before calling to avoid this. * * @param feature * @return */ - protected boolean addNonNestedFeature(SequenceFeature feature) + protected synchronized boolean addContactFeature(SequenceFeature feature) { - synchronized (nonNestedFeatures) + if (contactFeatureStarts == null) { - int insertPosition = binarySearchForAdd(nonNestedFeatures, feature); + contactFeatureStarts = new ArrayList<>(); + contactFeatureEnds = new ArrayList<>(); + } - /* - * fail if we detect feature enclosure - of the new feature by - * the one preceding it, or of the next feature by the new one - */ - if (insertPosition > 0) + /* + * insert into list sorted by start (first contact position): + * binary search the sorted list to find the insertion point + */ + int insertAt = BinarySearcher.findFirst(contactFeatureStarts, true, + Compare.GE, feature.begin); + contactFeatureStarts.add(insertAt, feature); + /* + * insert into list sorted by end (second contact position): + * binary search the sorted list to find the insertion point + */ + contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end), + feature); + + return true; + } + + /** + * Adds one sequence feature to the store, and returns true, unless the + * feature is already contained in the store, in which case this method + * returns false. Containment is determined by SequenceFeature.equals() + * comparison. + * + * @param feature + */ + public boolean addFeature(SequenceFeature feature) + { + if (feature.isContactFeature()) + { + if (containsContactFeature(feature)) { - if (encloses(nonNestedFeatures.get(insertPosition - 1), feature)) - { - return false; - } + return false; } - if (insertPosition < nonNestedFeatures.size()) + positionalFeatureGroups.add(feature.getFeatureGroup()); + if (feature.begin > maxContactStart) { - if (encloses(feature, nonNestedFeatures.get(insertPosition))) - { - return false; - } + maxContactStart = feature.begin; + } + addContactFeature(feature); + } + else if (feature.isNonPositional()) + { + if (containsNonPositionalFeature(feature)) + { + return false; } - /* - * checks passed - add or append the feature - */ - if (insertPosition == nonNestedFeatures.size()) + addNonPositionalFeature(feature); + } + else + { + if (!features.add(feature, false)) + { + return false; + } + positionalFeatureGroups.add(feature.getFeatureGroup()); + if (feature.begin > maxStart) { - nonNestedFeatures.add(feature); + maxStart = feature.begin; + } + } + + /* + * record the total extent of positional features, to make + * getTotalFeatureLength possible; we count the length of a + * contact feature as 1 + */ + totalExtent += getFeatureLength(feature); + + /* + * record the minimum and maximum score for positional + * and non-positional features + */ + float score = feature.getScore(); + if (!Float.isNaN(score)) + { + if (feature.isNonPositional()) + { + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); } else { - nonNestedFeatures.add(insertPosition, feature); + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); } - return true; } + + return true; } /** - * Answers true if range1 properly encloses range2, else false + * A helper method that adds to the result list any features from the + * collection provided whose feature group matches the specified group * - * @param range1 - * @param range2 - * @return + * @param group + * @param sfs + * @param result */ - protected boolean encloses(ContiguousI range1, ContiguousI range2) + private void addFeaturesForGroup(String group, + Collection sfs, List result) { - int begin1 = range1.getBegin(); - int begin2 = range2.getBegin(); - int end1 = range1.getEnd(); - int end2 = range2.getEnd(); - if (begin1 == begin2 && end1 > end2) + if (sfs == null) { - return true; + return; } - if (begin1 < begin2 && end1 >= end2) + for (SequenceFeature sf : sfs) { - return true; + String featureGroup = sf.getFeatureGroup(); + if (group == null && featureGroup == null + || group != null && group.equals(featureGroup)) + { + result.add(sf); + } } - return false; } /** - * Answers the index of the first element in the given list which follows or - * matches the given feature in the sort order. If no such element, answers - * the length of the list. + * Adds the feature to the list of non-positional features (with lazy + * instantiation of the list if it is null), and returns true. The feature + * group is added to the set of distinct feature groups for non-positional + * features. This method allows duplicate features, so test before calling to + * prevent this. * - * @param list * @param feature + */ + protected boolean addNonPositionalFeature(SequenceFeature feature) + { + if (nonPositionalFeatures == null) + { + nonPositionalFeatures = new ArrayList<>(); + } + + nonPositionalFeatures.add(feature); + + nonPositionalFeatureGroups.add(feature.getFeatureGroup()); + + return true; + } + + /** + * Answers true if this store contains the given feature (testing by + * SequenceFeature.equals), else false * + * @param feature * @return */ - protected int binarySearchForAdd(List list, SequenceFeature feature) + public boolean contains(SequenceFeature feature) { - // TODO binary search! - int i = 0; - while (i < list.size()) + if (feature.isNonPositional()) { - if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0) - { - break; - } - i++; + return containsNonPositionalFeature(feature); + } + + if (feature.isContactFeature()) + { + return containsContactFeature(feature); } - return i; + + return containsPositionalFeature(feature); + } + + private boolean containsPositionalFeature(SequenceFeature feature) + { + return features == null || feature.begin > maxStart ? false + : features.contains(feature); } /** - * Add a contact feature to the lists that hold them ordered by start (first - * contact) and by end (second contact) position, ensuring the lists remain - * ordered + * Answers true if this store already contains a contact feature equal to the + * given feature (by {@code SequenceFeature.equals()} test), else false + * + * @param feature + * @return + */ + private boolean containsContactFeature(SequenceFeature feature) + { + return contactFeatureStarts != null && feature.begin <= maxContactStart + && listContains(contactFeatureStarts, feature); + } + + /** + * Answers true if this store already contains a non-positional feature equal + * to the given feature (by {@code SequenceFeature.equals()} test), else false * * @param feature + * @return */ - protected synchronized void addContactFeature(SequenceFeature feature) + private boolean containsNonPositionalFeature(SequenceFeature feature) { - // TODO binary search for insertion points! - if (contactFeatureStarts == null) + return nonPositionalFeatures == null ? false + : nonPositionalFeatures.contains(feature); + } + + /** + * Deletes the given feature from the store, returning true if it was found + * (and deleted), else false. This method makes no assumption that the feature + * is in the 'expected' place in the store, in case it has been modified since + * it was added. + * + * @param sf + */ + public synchronized boolean delete(SequenceFeature sf) + { + boolean removed = false; + + /* + * try contact positions (and if found, delete + * from both lists of contact positions) + */ + if (!removed && contactFeatureStarts != null) + { + removed = contactFeatureStarts.remove(sf); + if (removed) + { + contactFeatureEnds.remove(sf); + } + } + + /* + * if not found, try non-positional features + */ + if (!removed && nonPositionalFeatures != null) + { + removed = nonPositionalFeatures.remove(sf); + } + + /* + * if not found, try nested features + */ + if (!removed && features != null) { - contactFeatureStarts = new ArrayList(); + removed = features.remove(sf); } - if (contactFeatureEnds == null) + + if (removed) { - contactFeatureEnds = new ArrayList(); + rescanAfterDelete(); } - contactFeatureStarts.add(feature); - Collections.sort(contactFeatureStarts, startOrdering); - contactFeatureEnds.add(feature); - Collections.sort(contactFeatureEnds, endOrdering); + + return removed; + } + + public List findFeatures(long start, long end) + { + return findFeatures(start, end, null); } /** - * Returns a (possibly empty) list of entries whose range overlaps the given + * Returns a (possibly empty) list of features whose extent overlaps the given * range. The returned list is not ordered. Contact features are included if - * either of the contact points lies within the range. + * either of the contact points lies within the range. If the {@code result} + * parameter is not null, new entries are added to this list and the (possibly + * extended) list returned. * * @param start * start position of overlap range (inclusive) * @param end * end position of overlap range (inclusive) + * @param result * @return */ - public List findOverlappingFeatures(long start, long end) + public List findFeatures(long start, long end, + List result) { - List result = new ArrayList(); - - findNonNestedFeatures(start, end, result); + if (result == null) + { + result = new ArrayList<>(); + } findContactFeatures(start, end, result); + features.findOverlaps(start, end, result); + + return result; + } - if (nestedFeatures != null) + /** + * Returns a (possibly empty) list of stored contact features + * + * @return + */ + public List getContactFeatures() + { + List result = new ArrayList<>(); + getContactFeatures(result); + return result; + } + + /** + * Adds any stored contact features to the result list + * + * @return + */ + public void getContactFeatures(List result) + { + if (contactFeatureStarts != null) + { + result.addAll(contactFeatureStarts); + } + } + + /** + * Answers the number of positional (or non-positional) features stored. + * Contact features count as 1. + * + * @param positional + * @return + */ + public int getFeatureCount(boolean positional) + { + if (!positional) + { + return nonPositionalFeatures == null ? 0 + : nonPositionalFeatures.size(); + } + + int size = 0; + + if (contactFeatureStarts != null) + { + // note a contact feature (start/end) counts as one + size += contactFeatureStarts.size(); + } + + if (features != null) + { + size += features.size(); + } + return size; + } + + /** + * Answers the set of distinct feature groups stored, possibly including null, + * as an unmodifiable view of the set. The parameter determines whether the + * groups for positional or for non-positional features are returned. + * + * @param positionalFeatures + * @return + */ + public Set getFeatureGroups(boolean positionalFeatures) + { + if (positionalFeatures) { - result.addAll(nestedFeatures.findOverlaps(start, end)); + return Collections.unmodifiableSet(positionalFeatureGroups); } + else + { + return nonPositionalFeatureGroups == null + ? Collections. emptySet() + : Collections.unmodifiableSet(nonPositionalFeatureGroups); + } + } + /** + * Answers a list of all either positional or non-positional features whose + * feature group matches the given group (which may be null) + * + * @param positional + * @param group + * @return + */ + public List getFeaturesForGroup(boolean positional, + String group) + { + List result = new ArrayList<>(); + + /* + * if we know features don't include the target group, no need + * to inspect them for matches + */ + if (positional && !positionalFeatureGroups.contains(group) + || !positional && !nonPositionalFeatureGroups.contains(group)) + { + return result; + } + + if (positional) + { + addFeaturesForGroup(group, contactFeatureStarts, result); + addFeaturesForGroup(group, features, result); + } + else + { + addFeaturesForGroup(group, nonPositionalFeatures, result); + } return result; } /** - * Adds contact features to the result list where either the second or the - * first contact position lies within the target range. + * Answers the maximum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. * - * @param from - * @param to - * @param result + * @param positional + * @return */ - protected void findContactFeatures(long from, long to, - List result) + public float getMaximumScore(boolean positional) + { + return positional ? positionalMaxScore : nonPositionalMaxScore; + } + + /** + * Answers the minimum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. + * + * @param positional + * @return + */ + public float getMinimumScore(boolean positional) + { + return positional ? positionalMinScore : nonPositionalMinScore; + } + + /** + * Answers a (possibly empty) list of all non-positional features + * + * @return + */ + public List getNonPositionalFeatures() + { + List result = new ArrayList<>(); + getNonPositionalFeatures(result); + return result; + } + + /** + * Adds any stored non-positional features to the result list + * + * @return + */ + public void getNonPositionalFeatures(List result) + { + if (nonPositionalFeatures != null) + { + result.addAll(nonPositionalFeatures); + } + } + + /** + * Returns a (possibly empty) list of all positional features stored + * + * @return + */ + public List getPositionalFeatures() { + List result = new ArrayList<>(); + getPositionalFeatures(result); + + return result; + } + + /** + * Adds all positional features stored to the result list, in no guaranteed + * order, and with no check for duplicates + */ + public void getPositionalFeatures(List result) + { + /* + * add any contact features - from the list by start position + */ if (contactFeatureStarts != null) { - findContactStartFeatures(from, to, result); + result.addAll(contactFeatureStarts); } - if (contactFeatureEnds != null) + + /* + * add any nested features + */ + if (features != null) { - findContactEndFeatures(from, to, result); + result.addAll(features); } } /** - * @param from - * @param to - * @param result + * Answers the total length of positional features (or zero if there are + * none). Contact features contribute a value of 1 to the total. + * + * @return */ - protected void findContactEndFeatures(long from, long to, - List result) + public int getTotalFeatureLength() + { + return totalExtent; + } + + /** + * Answers true if this store has no features, else false + * + * @return + */ + public boolean isEmpty() { - // TODO binary search for startPosition - for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++) + boolean hasFeatures = (contactFeatureStarts != null + && !contactFeatureStarts.isEmpty()) + || (nonPositionalFeatures != null + && !nonPositionalFeatures.isEmpty()) + || (features != null && features.size() > 0); + + return !hasFeatures; + } + + /** + * Rescan all features to recompute any cached values after an entry has been + * deleted. This is expected to be an infrequent event, so performance here is + * not critical. + */ + protected synchronized void rescanAfterDelete() + { + positionalFeatureGroups.clear(); + nonPositionalFeatureGroups.clear(); + totalExtent = 0; + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; + /* + * scan non-positional features for groups and scores + */ + if (nonPositionalFeatures != null) { - SequenceFeature sf = contactFeatureEnds.get(startPosition); - if (!sf.isContactFeature()) + for (int i = 0, n = nonPositionalFeatures.size(); i < n; i++) { - System.err.println("Error! non-contact feature type " - + sf.getType() + " in contact features list"); - continue; - } - int begin = sf.getBegin(); - if (begin >= from && begin <= to) - { - /* - * this feature's first contact position lies in the search range - * so we don't include it in results a second time - */ - continue; - } - int end = sf.getEnd(); - if (end >= from && end <= to) - { - result.add(sf); + SequenceFeature sf = nonPositionalFeatures.get(i); + nonPositionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); } } + + rescanPositional(contactFeatureStarts); + rescanPositional(features); + } + + /** + * Scans the given features and updates cached feature groups, minimum and + * maximum feature score, and total feature extent (length) for positional + * features + * + * @param sfs + */ + private void rescanPositional(Collection sfs) + { + if (sfs == null) + { + return; + } + for (SequenceFeature sf : sfs) + { + positionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); + totalExtent += getFeatureLength(sf); + } } /** - * Returns the index of the first contact feature found whose end (second - * contact position) is not before the given start position. If no such - * feature is found, returns the length of the contact features list. + * Adds the shift amount to the start and end of all positional features whose + * start position is at or after fromPosition. Returns true if at least one + * feature was shifted, else false. * - * @param start + * @param fromPosition + * @param shiftBy * @return */ - protected int contactsBinarySearch(long start) + public synchronized boolean shiftFeatures(int fromPosition, int shiftBy) { - // TODO binary search!! - int i = 0; - while (i < contactFeatureEnds.size()) + /* + * Because begin and end are final fields (to ensure the data store's + * integrity), we have to delete each feature and re-add it as amended. + * (Although a simple shift of all values would preserve data integrity!) + */ + boolean modified = false; + List list = getPositionalFeatures(); + for (int i = 0, n = list.size(); i < n; i++) { - if (contactFeatureEnds.get(i).getEnd() >= start) + SequenceFeature sf = list.get(i); + if (sf.getBegin() >= fromPosition) { - break; + modified = true; + int newBegin = sf.getBegin() + shiftBy; + int newEnd = sf.getEnd() + shiftBy; + + /* + * sanity check: don't shift left of the first residue + */ + if (newEnd > 0) + { + newBegin = Math.max(1, newBegin); + SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd, + sf.getFeatureGroup(), sf.getScore()); + addFeature(sf2); + } + delete(sf); } - i++; } - - return i; + return modified; } /** - * Adds features to the result list that are at a single position which lies - * within the target range. Non-positional features (start=end=0) and contact - * features are excluded. + * Answers the position (0, 1...) in the list of the first entry whose end + * position is not less than {@ pos}. If no such entry is found, answers the + * length of the list. * - * @param from - * @param to - * @param result + * @param list + * @param pos + * @return */ - protected void findNonNestedFeatures(long from, long to, - List result) + protected int findFirstEnd(List list, long pos) { - int startIndex = binarySearch(nonNestedFeatures, from); - findNonNestedFeatures(startIndex, from, to, result); + return BinarySearcher.findFirst(list, false, Compare.GE, (int) pos); } /** - * Scans the list of non-nested features, starting from startIndex, to find - * those that overlap the from-to range, and adds them to the result list. - * Returns the index of the first feature whose start position is after the - * target range (or the length of the whole list if none such feature exists). + * Adds contact features to the result list where either the second or the + * first contact position lies within the target range * - * @param startIndex * @param from * @param to * @param result - * @return */ - protected int findNonNestedFeatures(final int startIndex, long from, - long to, + protected void findContactFeatures(long from, long to, List result) { - int i = startIndex; - while (i < nonNestedFeatures.size()) + if (contactFeatureStarts != null) { - SequenceFeature sf = nonNestedFeatures.get(i); - if (sf.getBegin() > to) - { - break; - } - int start = sf.getBegin(); - int end = sf.getEnd(); - if (sf.isContactFeature()) - { - end = start; - } - if (start <= to && end >= from) - { - result.add(sf); - } - i++; + findContactStartOverlaps(from, to, result); + findContactEndOverlaps(from, to, result); } - return i; } /** - * Performs a binary search of the (sorted) list to find the index of the - * first entry whose end position is not less than the target position (i.e. - * skip all features that properly precede the given position) + * Adds to the result list any contact features whose end (second contact + * point), but not start (first contact point), lies in the query from-to + * range * - * @param features - * @param target - * @return + * @param from + * @param to + * @param result */ - protected int binarySearch(List features, long target) + private void findContactEndOverlaps(long from, long to, + List result) { - int width = features.size() / 2; - int lastpos = width; - while (width > 0) + /* + * find the first contact feature (if any) + * whose end point is not before the target range + */ + int index = findFirstEnd(contactFeatureEnds, from); + + int n = contactFeatureEnds.size(); + while (index < n) { - int end = features.get(lastpos).getEnd(); - width = width / 2; - if (end > target) + SequenceFeature sf = contactFeatureEnds.get(index); + if (!sf.isContactFeature()) { - lastpos -= width; + System.err.println("Error! non-contact feature type " + sf.getType() + + " in contact features list"); + index++; + continue; } - else + + int begin = sf.getBegin(); + if (begin >= from && begin <= to) + { + /* + * this feature's first contact position lies in the search range + * so we don't include it in results a second time + */ + index++; + continue; + } + + if (sf.getEnd() > to) { - lastpos += width; + /* + * this feature (and all following) has end point after the target range + */ + break; } + + /* + * feature has end >= from and end <= to + * i.e. contact end point lies within overlap search range + */ + result.add(sf); + index++; } - // todo correct binary search - return lastpos > 1 ? lastpos - 2 : 0; - // return lastpos; } /** @@ -422,24 +970,37 @@ public class FeatureStore * @param to * @param result */ - protected void findContactStartFeatures(long from, long to, + private void findContactStartOverlaps(long from, long to, List result) { - // TODO binary search for startPosition - for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++) + int index = BinarySearcher.findFirst(contactFeatureStarts, true, + Compare.GE, (int) from); + + while (index < contactFeatureStarts.size()) { - SequenceFeature sf = contactFeatureStarts.get(startPosition); + SequenceFeature sf = contactFeatureStarts.get(index); if (!sf.isContactFeature()) { - System.err.println("Error! non-contact feature type " - + sf.getType() + " in contact features list"); + System.err.println("Error! non-contact feature " + sf.toString() + + " in contact features list"); + index++; continue; } - int begin = sf.getBegin(); - if (begin >= from && begin <= to) + if (sf.getBegin() > to) { - result.add(sf); + /* + * this feature's start (and all following) follows the target range + */ + break; } + + /* + * feature has begin >= from and begin <= to + * i.e. contact start point lies within overlap search range + */ + result.add(sf); + index++; } } + }