X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=fe575e0186e0ee76ed8cd259effc80c8b38d99f9;hb=99b6c1ca13af92cd8867971986a05664b0cd787c;hp=716eb04c88f55f6f4e39e530b6a129a920e7ed44;hpb=8a5b4942a0625dd64429291729d89438fccfd804;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index 716eb04..fe575e0 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -23,27 +23,114 @@ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; -import intervalstore.api.IntervalStoreI; -import intervalstore.impl.BinarySearcher; -import intervalstore.impl.IntervalStore; - -/** - * A data store for a set of sequence features that supports efficient lookup of - * features overlapping a given range. Intended for (but not limited to) storage - * of features for one sequence and feature type. - * - * @author gmcarstairs - * - */ -public class FeatureStore +public abstract class FeatureStore implements FeatureStoreI { + + /** + * Answers the 'length' of the feature, counting 0 for non-positional features + * and 1 for contact features + * + * @param feature + * @return + */ + protected static int getFeatureLength(SequenceFeature feature) + { + if (feature.isNonPositional()) + { + return 0; + } + if (feature.isContactFeature()) + { + return 1; + } + return 1 + feature.getEnd() - feature.getBegin(); + } + + /** + * Answers true if the list contains the feature, else false. This method is + * optimised for the condition that the list is sorted on feature start + * position ascending, and will give unreliable results if this does not hold. + * + * @param features + * @param feature + * @return + */ + @Override + public boolean listContains(List features, + SequenceFeature feature) + { + if (features == null || feature == null) + { + return false; + } + + /* + * locate the first entry in the list which does not precede the feature + */ + int pos = findFirstBegin(features, feature.begin); + int len = features.size(); + while (pos < len) + { + SequenceFeature sf = features.get(pos); + if (sf.getBegin() > feature.getBegin()) + { + return false; // no match found + } + if (sf.equals(feature)) + { + return true; + } + pos++; + } + return false; + } + + /** + * A helper method to return the maximum of two floats, where a non-NaN value + * is treated as 'greater than' a NaN value (unlike Math.max which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float max(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.max(f1, f2); + } + } + + /** + * A helper method to return the minimum of two floats, where a non-NaN value + * is treated as 'less than' a NaN value (unlike Math.min which does the + * opposite) + * + * @param f1 + * @param f2 + */ + protected static float min(float f1, float f2) + { + if (Float.isNaN(f1)) + { + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.min(f1, f2); + } + } + /* * Non-positional features have no (zero) start/end position. * Kept as a separate list in case this criterion changes in future. @@ -64,7 +151,7 @@ public class FeatureStore * IntervalStore holds remaining features and provides efficient * query for features overlapping any given interval */ - IntervalStoreI features; + Collection features; /* * Feature groups represented in stored positional features @@ -92,15 +179,11 @@ public class FeatureStore float nonPositionalMaxScore; - private ArrayList featuresList; - /** * Constructor */ public FeatureStore() { - features = new IntervalStore<>(); - featuresList = new ArrayList<>(); positionalFeatureGroups = new HashSet<>(); nonPositionalFeatureGroups = new HashSet<>(); positionalMinScore = Float.NaN; @@ -112,6 +195,39 @@ public class FeatureStore } /** + * Add a contact feature to the lists that hold them ordered by start (first + * contact) and by end (second contact) position, ensuring the lists remain + * ordered, and returns true. This method allows duplicate features to be + * added, so test before calling to avoid this. + * + * @param feature + * @return + */ + protected synchronized boolean addContactFeature(SequenceFeature feature) + { + if (contactFeatureStarts == null) + { + contactFeatureStarts = new ArrayList<>(); + contactFeatureEnds = new ArrayList<>(); + } + + /* + * insert into list sorted by start (first contact position): + * binary search the sorted list to find the insertion point + */ + contactFeatureStarts.add( + findFirstBegin(contactFeatureStarts, feature.begin), feature); + /* + * insert into list sorted by end (second contact position): + * binary search the sorted list to find the insertion point + */ + contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end), + feature); + + return true; + } + + /** * Adds one sequence feature to the store, and returns true, unless the * feature is already contained in the store, in which case this method * returns false. Containment is determined by SequenceFeature.equals() @@ -120,6 +236,7 @@ public class FeatureStore * @param feature */ + @Override public boolean addFeature(SequenceFeature feature) { if (contains(feature)) @@ -177,48 +294,31 @@ public class FeatureStore return true; } - /** - * Answers true if this store contains the given feature (testing by - * SequenceFeature.equals), else false - * - * @param feature - * @return - */ - public boolean contains(SequenceFeature feature) + private void addFeaturesForGroup(String group, + Collection sfs, List result) { - if (feature.isNonPositional()) + if (sfs == null) { - return nonPositionalFeatures == null ? false - : nonPositionalFeatures.contains(feature); + return; } - - if (feature.isContactFeature()) + for (SequenceFeature sf : sfs) { - return contactFeatureStarts == null ? false - : listContains(contactFeatureStarts, feature); + String featureGroup = sf.getFeatureGroup(); + if (group == null && featureGroup == null + || group != null && group.equals(featureGroup)) + { + result.add(sf); + } } - - return features == null ? false : features.contains(feature); } /** - * Answers the 'length' of the feature, counting 0 for non-positional features - * and 1 for contact features - * - * @param feature - * @return + * Adds one feature to the IntervalStore that can manage nested features + * (creating the IntervalStore if necessary) */ - protected static int getFeatureLength(SequenceFeature feature) + protected synchronized void addNestedFeature(SequenceFeature feature) { - if (feature.isNonPositional()) - { - return 0; - } - if (feature.isContactFeature()) - { - return 1; - } - return 1 + feature.getEnd() - feature.getBegin(); + features.add(feature); } /** @@ -245,508 +345,299 @@ public class FeatureStore } /** - * Adds one feature to the IntervalStore that can manage nested features - * (creating the IntervalStore if necessary) - */ - protected synchronized void addNestedFeature(SequenceFeature feature) - { - if (features == null) - { - features = new IntervalStore<>(); - } - features.add(feature); - featuresList.add(feature); - } - - /** - * Add a contact feature to the lists that hold them ordered by start (first - * contact) and by end (second contact) position, ensuring the lists remain - * ordered, and returns true. This method allows duplicate features to be - * added, so test before calling to avoid this. + * Answers true if this store contains the given feature (testing by + * SequenceFeature.equals), else false * * @param feature * @return */ - protected synchronized boolean addContactFeature(SequenceFeature feature) + @Override + public boolean contains(SequenceFeature feature) { - if (contactFeatureStarts == null) + if (feature.isNonPositional()) { - contactFeatureStarts = new ArrayList<>(); + return nonPositionalFeatures == null ? false + : nonPositionalFeatures.contains(feature); } - if (contactFeatureEnds == null) + + if (feature.isContactFeature()) { - contactFeatureEnds = new ArrayList<>(); + return contactFeatureStarts != null + && listContains(contactFeatureStarts, feature); } - /* - * insert into list sorted by start (first contact position): - * binary search the sorted list to find the insertion point - */ - int insertPosition = BinarySearcher.findFirst(contactFeatureStarts, - f -> f.getBegin() >= feature.getBegin()); - contactFeatureStarts.add(insertPosition, feature); - - /* - * insert into list sorted by end (second contact position): - * binary search the sorted list to find the insertion point - */ - insertPosition = BinarySearcher.findFirst(contactFeatureEnds, - f -> f.getEnd() >= feature.getEnd()); - contactFeatureEnds.add(insertPosition, feature); - - return true; + return features == null ? false : features.contains(feature); } /** - * Answers true if the list contains the feature, else false. This method is - * optimised for the condition that the list is sorted on feature start - * position ascending, and will give unreliable results if this does not hold. + * Deletes the given feature from the store, returning true if it was found + * (and deleted), else false. This method makes no assumption that the feature + * is in the 'expected' place in the store, in case it has been modified since + * it was added. * - * @param features - * @param feature - * @return + * @param sf */ - protected static boolean listContains(List features, - SequenceFeature feature) + + @Override + public synchronized boolean delete(SequenceFeature sf) { - if (features == null || feature == null) - { - return false; - } + boolean removed = false; /* - * locate the first entry in the list which does not precede the feature + * try contact positions (and if found, delete + * from both lists of contact positions) */ - // int pos = binarySearch(features, - // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); - int pos = BinarySearcher.findFirst(features, - val -> val.getBegin() >= feature.getBegin()); - int len = features.size(); - while (pos < len) + if (!removed && contactFeatureStarts != null) { - SequenceFeature sf = features.get(pos); - if (sf.getBegin() > feature.getBegin()) - { - return false; // no match found - } - if (sf.equals(feature)) + removed = contactFeatureStarts.remove(sf); + if (removed) { - return true; + contactFeatureEnds.remove(sf); } - pos++; } - return false; - } - /** - * Returns a (possibly empty) list of features whose extent overlaps the given - * range. The returned list is not ordered. Contact features are included if - * either of the contact points lies within the range. - * - * @param start - * start position of overlap range (inclusive) - * @param end - * end position of overlap range (inclusive) - * @return - */ + boolean removedNonPositional = false; - public List findOverlappingFeatures(long start, long end) - { - List result = new ArrayList<>(); - - findContactFeatures(start, end, result); - - if (features != null) + /* + * if not found, try non-positional features + */ + if (!removed && nonPositionalFeatures != null) { - result.addAll(features.findOverlaps(start, end)); + removedNonPositional = nonPositionalFeatures.remove(sf); + removed = removedNonPositional; } - return result; - } - - /** - * Adds contact features to the result list where either the second or the - * first contact position lies within the target range - * - * @param from - * @param to - * @param result - */ - protected void findContactFeatures(long from, long to, - List result) - { - if (contactFeatureStarts != null) + /* + * if not found, try nested features + */ + if (!removed && features != null) { - findContactStartOverlaps(from, to, result); + removed = features.remove(sf); } - if (contactFeatureEnds != null) + + if (removed) { - findContactEndOverlaps(from, to, result); + rescanAfterDelete(); } - } - /** - * Adds to the result list any contact features whose end (second contact - * point), but not start (first contact point), lies in the query from-to - * range - * - * @param from - * @param to - * @param result - */ - protected void findContactEndOverlaps(long from, long to, - List result) - { - /* - * find the first contact feature (if any) - * whose end point is not before the target range - */ - int index = BinarySearcher.findFirst(contactFeatureEnds, - f -> f.getEnd() >= from); + return removed; + } - while (index < contactFeatureEnds.size()) - { - SequenceFeature sf = contactFeatureEnds.get(index); - if (!sf.isContactFeature()) - { - System.err.println("Error! non-contact feature type " + sf.getType() - + " in contact features list"); - index++; - continue; - } + abstract protected void findContactFeatures(long from, long to, + List result); - int begin = sf.getBegin(); - if (begin >= from && begin <= to) - { - /* - * this feature's first contact position lies in the search range - * so we don't include it in results a second time - */ - index++; - continue; - } + abstract protected int findFirstBegin(List list, + long pos); - if (sf.getEnd() > to) - { - /* - * this feature (and all following) has end point after the target range - */ - break; - } + abstract protected int findFirstEnd(List list, long pos); - /* - * feature has end >= from and end <= to - * i.e. contact end point lies within overlap search range - */ - result.add(sf); - index++; - } + @Override + public List findOverlappingFeatures(long start, long end) + { + return findOverlappingFeatures(start, end, null); } - /** - * Adds contact features whose start position lies in the from-to range to the - * result list - * - * @param from - * @param to - * @param result - */ - protected void findContactStartOverlaps(long from, long to, - List result) + @Override + public List getContactFeatures() { - int index = BinarySearcher.findFirst(contactFeatureStarts, - f -> f.getBegin() >= from); - - while (index < contactFeatureStarts.size()) - { - SequenceFeature sf = contactFeatureStarts.get(index); - if (!sf.isContactFeature()) - { - System.err.println("Error! non-contact feature " + sf.toString() - + " in contact features list"); - index++; - continue; - } - if (sf.getBegin() > to) - { - /* - * this feature's start (and all following) follows the target range - */ - break; - } - - /* - * feature has begin >= from and begin <= to - * i.e. contact start point lies within overlap search range - */ - result.add(sf); - index++; - } + return getContactFeatures(new ArrayList<>()); } /** - * Answers a list of all positional features stored, in no guaranteed order + * Answers a list of all contact features. If there are none, returns an + * immutable empty list. * * @return */ - public List getPositionalFeatures() + @Override + public List getContactFeatures( + List result) { - List result = new ArrayList<>(); - - /* - * add any contact features - from the list by start position - */ if (contactFeatureStarts != null) { result.addAll(contactFeatureStarts); } - - /* - * add any nested features - */ - if (features != null) - { - result.addAll(features); - } - return result; } /** - * Answers a list of all contact features. If there are none, returns an - * immutable empty list. + * Answers the number of positional (or non-positional) features stored. + * Contact features count as 1. * + * @param positional * @return */ - public List getContactFeatures() + @Override + public int getFeatureCount(boolean positional) { - if (contactFeatureStarts == null) + if (!positional) { - return Collections.emptyList(); + return nonPositionalFeatures == null ? 0 + : nonPositionalFeatures.size(); } - return new ArrayList<>(contactFeatureStarts); - } - /** - * Answers a list of all non-positional features. If there are none, returns - * an immutable empty list. - * - * @return - */ + return (contactFeatureStarts == null ? 0 : contactFeatureStarts.size()) + + features.size(); - public List getNonPositionalFeatures() - { - if (nonPositionalFeatures == null) - { - return Collections.emptyList(); - } - return new ArrayList<>(nonPositionalFeatures); } /** - * Deletes the given feature from the store, returning true if it was found - * (and deleted), else false. This method makes no assumption that the feature - * is in the 'expected' place in the store, in case it has been modified since - * it was added. + * Answers the set of distinct feature groups stored, possibly including null, + * as an unmodifiable view of the set. The parameter determines whether the + * groups for positional or for non-positional features are returned. * - * @param sf + * @param positionalFeatures + * @return */ - public synchronized boolean delete(SequenceFeature sf) + @Override + public Set getFeatureGroups(boolean positionalFeatures) { - boolean removed = false; - - /* - * try contact positions (and if found, delete - * from both lists of contact positions) - */ - if (!removed && contactFeatureStarts != null) - { - removed = contactFeatureStarts.remove(sf); - if (removed) - { - contactFeatureEnds.remove(sf); - } - } - - boolean removedNonPositional = false; - - /* - * if not found, try non-positional features - */ - if (!removed && nonPositionalFeatures != null) - { - removedNonPositional = nonPositionalFeatures.remove(sf); - removed = removedNonPositional; - } - - /* - * if not found, try nested features - */ - if (!removed && features != null) + if (positionalFeatures) { - removed = features.remove(sf); - featuresList.remove(sf); + return Collections.unmodifiableSet(positionalFeatureGroups); } - - if (removed) + else { - rescanAfterDelete(); + return nonPositionalFeatureGroups == null + ? Collections. emptySet() + : Collections.unmodifiableSet(nonPositionalFeatureGroups); } + } - return removed; + @Override + public Collection getFeatures() + { + return features; } /** - * Rescan all features to recompute any cached values after an entry has been - * deleted. This is expected to be an infrequent event, so performance here is - * not critical. + * Answers a list of all either positional or non-positional features whose + * feature group matches the given group (which may be null) + * + * @param positional + * @param group + * @return */ - protected synchronized void rescanAfterDelete() + + @Override + public List getFeaturesForGroup(boolean positional, + String group) { - positionalFeatureGroups.clear(); - nonPositionalFeatureGroups.clear(); - totalExtent = 0; - positionalMinScore = Float.NaN; - positionalMaxScore = Float.NaN; - nonPositionalMinScore = Float.NaN; - nonPositionalMaxScore = Float.NaN; - /* - * scan non-positional features for groups and scores - */ - for (SequenceFeature sf : getNonPositionalFeatures()) - { - nonPositionalFeatureGroups.add(sf.getFeatureGroup()); - float score = sf.getScore(); - nonPositionalMinScore = min(nonPositionalMinScore, score); - nonPositionalMaxScore = max(nonPositionalMaxScore, score); - } + List result = new ArrayList<>(); /* - * scan positional features for groups, scores and extents + * if we know features don't include the target group, no need + * to inspect them for matches */ - for (SequenceFeature sf : getPositionalFeatures()) + if (positional && !positionalFeatureGroups.contains(group) + || !positional && !nonPositionalFeatureGroups.contains(group)) { - positionalFeatureGroups.add(sf.getFeatureGroup()); - float score = sf.getScore(); - positionalMinScore = min(positionalMinScore, score); - positionalMaxScore = max(positionalMaxScore, score); - totalExtent += getFeatureLength(sf); + return result; } - } - /** - * A helper method to return the minimum of two floats, where a non-NaN value - * is treated as 'less than' a NaN value (unlike Math.min which does the - * opposite) - * - * @param f1 - * @param f2 - */ - protected static float min(float f1, float f2) - { - if (Float.isNaN(f1)) + if (positional) { - return Float.isNaN(f2) ? f1 : f2; + addFeaturesForGroup(group, contactFeatureStarts, result); + addFeaturesForGroup(group, features, result); } else { - return Float.isNaN(f2) ? f1 : Math.min(f1, f2); + addFeaturesForGroup(group, nonPositionalFeatures, result); } + return result; } /** - * A helper method to return the maximum of two floats, where a non-NaN value - * is treated as 'greater than' a NaN value (unlike Math.max which does the - * opposite) + * Answers the maximum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. * - * @param f1 - * @param f2 + * @param positional + * @return */ - protected static float max(float f1, float f2) + + @Override + public float getMaximumScore(boolean positional) { - if (Float.isNaN(f1)) - { - return Float.isNaN(f2) ? f1 : f2; - } - else - { - return Float.isNaN(f2) ? f1 : Math.max(f1, f2); - } + return positional ? positionalMaxScore : nonPositionalMaxScore; } /** - * Answers true if this store has no features, else false + * Answers the minimum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. * + * @param positional * @return */ - public boolean isEmpty() + @Override + public float getMinimumScore(boolean positional) { - boolean hasFeatures = (contactFeatureStarts != null - && !contactFeatureStarts.isEmpty()) - || (nonPositionalFeatures != null - && !nonPositionalFeatures.isEmpty()) - || (features != null && features.size() > 0); + return positional ? positionalMinScore : nonPositionalMinScore; + } - return !hasFeatures; + @Override + public List getNonPositionalFeatures() + { + return getNonPositionalFeatures(new ArrayList<>()); } /** - * Answers the set of distinct feature groups stored, possibly including null, - * as an unmodifiable view of the set. The parameter determines whether the - * groups for positional or for non-positional features are returned. + * Answers a list of all non-positional features. If there are none, returns + * an immutable empty list. * - * @param positionalFeatures * @return */ - public Set getFeatureGroups(boolean positionalFeatures) + @Override + public List getNonPositionalFeatures( + List result) { - if (positionalFeatures) - { - return Collections.unmodifiableSet(positionalFeatureGroups); - } - else + if (nonPositionalFeatures != null) { - return nonPositionalFeatureGroups == null - ? Collections. emptySet() - : Collections.unmodifiableSet(nonPositionalFeatureGroups); + result.addAll(nonPositionalFeatures); } + return result; + } + + @Override + public List getPositionalFeatures() + { + return getPositionalFeatures(new ArrayList<>()); } /** - * Answers the number of positional (or non-positional) features stored. - * Contact features count as 1. + * Answers a list of all positional features stored, in no guaranteed order * - * @param positional * @return */ - public int getFeatureCount(boolean positional) + @Override + public List getPositionalFeatures( + List result) { - if (!positional) - { - return nonPositionalFeatures == null ? 0 - : nonPositionalFeatures.size(); - } - - int size = 0; + /* + * add any contact features - from the list by start position + */ if (contactFeatureStarts != null) { - // note a contact feature (start/end) counts as one - size += contactFeatureStarts.size(); + result.addAll(contactFeatureStarts); } + /* + * add any nested features + */ if (features != null) { - size += features.size(); + result.addAll(features); } - return size; + return result; } /** @@ -756,75 +647,80 @@ public class FeatureStore * @return */ + @Override public int getTotalFeatureLength() { return totalExtent; } /** - * Answers the minimum score held for positional or non-positional features. - * This may be Float.NaN if there are no features, are none has a non-NaN - * score. + * Answers true if this store has no features, else false * - * @param positional * @return */ - public float getMinimumScore(boolean positional) + @Override + public boolean isEmpty() { - return positional ? positionalMinScore : nonPositionalMinScore; - } - - /** - * Answers the maximum score held for positional or non-positional features. - * This may be Float.NaN if there are no features, are none has a non-NaN - * score. - * - * @param positional - * @return - */ + boolean hasFeatures = (contactFeatureStarts != null + && !contactFeatureStarts.isEmpty()) + || (nonPositionalFeatures != null + && !nonPositionalFeatures.isEmpty()) + || features.size() > 0; - public float getMaximumScore(boolean positional) - { - return positional ? positionalMaxScore : nonPositionalMaxScore; + return !hasFeatures; } /** - * Answers a list of all either positional or non-positional features whose - * feature group matches the given group (which may be null) - * - * @param positional - * @param group - * @return + * Rescan all features to recompute any cached values after an entry has been + * deleted. This is expected to be an infrequent event, so performance here is + * not critical. */ - - public List getFeaturesForGroup(boolean positional, - String group) + protected synchronized void rescanAfterDelete() { - List result = new ArrayList<>(); - + positionalFeatureGroups.clear(); + nonPositionalFeatureGroups.clear(); + totalExtent = 0; + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; /* - * if we know features don't include the target group, no need - * to inspect them for matches + * scan non-positional features for groups and scores */ - if (positional && !positionalFeatureGroups.contains(group) - || !positional && !nonPositionalFeatureGroups.contains(group)) + if (nonPositionalFeatures != null) { - return result; + for (SequenceFeature sf : nonPositionalFeatures) + { + nonPositionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); + } } - List sfs = positional ? getPositionalFeatures() - : getNonPositionalFeatures(); + /* + * scan positional features for groups, scores and extents + */ + + rescanPositional(contactFeatureStarts); + rescanPositional(features); + } + + private void rescanPositional(Collection sfs) + { + if (sfs == null) + { + return; + } for (SequenceFeature sf : sfs) { - String featureGroup = sf.getFeatureGroup(); - if (group == null && featureGroup == null - || group != null && group.equals(featureGroup)) - { - result.add(sf); - } + positionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); + totalExtent += getFeatureLength(sf); } - return result; } /** @@ -837,6 +733,7 @@ public class FeatureStore * @return */ + @Override public synchronized boolean shiftFeatures(int fromPosition, int shiftBy) { /* @@ -869,301 +766,4 @@ public class FeatureStore return modified; } - /////////////////////// added by Bob Hanson /////////////////////// - - // The following methods use a linked list of containment in features - // rather than IntervalStore. Implemented only for OverviewPanel, because - // only that makes calls for start == end in feature overlap requests. - // - // - // There are two parts --- initialization, and overlap searching. - // - // Initialization involves two steps: - // - // (1) sorting of features by start position using a standard Array.sort with - // Comparator. - // (2) linking of features, effectively nesting them. - // - // Searching also involves two steps: - // - // (1) binary search for a position within the sorted features array. - // (2) traversing the linked lists with an end check to read out the - // overlapped features at this position. - // - // All of this is done with very simple standard methods. - - // single public method: - - /** - * Find all features containing this position. - * - * @param pos - * @return list of SequenceFeatures - * @author Bob Hanson 2019.07.30 - */ - - public List findOverlappingFeatures(int pos, - List result) - { - if (result == null) - { - result = new ArrayList<>(); - } - - if (contactFeatureStarts != null) - { - findContacts(contactFeatureStarts, pos, result, true); - findContacts(contactFeatureEnds, pos, result, false); - } - if (featuresList != null) - { - findOverlaps(featuresList, pos, result); - } - return result; - } - - // Initialization - - /* - * contact features ordered by first contact position - */ - private SequenceFeature[] orderedFeatureStarts; - - private void rebuildArrays(int n) - { - if (startComp == null) - { - startComp = new StartComparator(); - } - orderedFeatureStarts = new SequenceFeature[n]; - for (int i = n; --i >= 0;) - { - SequenceFeature sf = featuresList.get(i); - sf.index = i; // for debugging only - orderedFeatureStarts[i] = sf; - } - Arrays.sort(orderedFeatureStarts, startComp); - linkFeatures(orderedFeatureStarts); - } - - /** - * just a standard Comparator - */ - private static StartComparator startComp; - - class StartComparator implements Comparator - { - - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) - { - int p1 = o1.begin; - int p2 = o2.begin; - return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0); - } - - } - - /** - * Run through the sorted sequence array once, building the containedBy linked - * list references. Does a check first to make sure there is actually - * something out there that is overlapping. A null for sf.containedBy means - * there are no overlaps for this feature. - * - * @param intervals - */ - private void linkFeatures(SequenceFeature[] intervals) - { - if (intervals.length < 2) - { - return; - } - int maxEnd = intervals[0].end; - for (int i = 1, n = intervals.length; i < n; i++) - { - SequenceFeature sf = intervals[i]; - if (sf.begin <= maxEnd) - { - sf.containedBy = getContainedBy(intervals[i - 1], sf); - } - if (sf.end > maxEnd) - { - maxEnd = sf.end; - } - } - } - - /** - * Since we are traversing the sorted feature array in a forward direction, - * all elements prior to the one we are working on have been fully linked. All - * we are doing is following those links until we find the first array feature - * with a containedBy element that has an end >= our begin point. It is - * generally a very short list -- maybe one or two depths. But it might be - * more than that. - * - * @param sf - * @param sf0 - * @return - */ - private SequenceFeature getContainedBy(SequenceFeature sf, - SequenceFeature sf0) - { - int begin = sf0.begin; - while (sf != null) - { - if (begin <= sf.end) - { - System.out.println("\nFS found " + sf0.index + ":" + sf0 - + "\nFS in " + sf.index + ":" + sf); - return sf; - } - sf = sf.containedBy; - } - return null; - } - - // search-stage methods - - /** - * Binary search for contact start or end at a given (Overview) position. - * - * @param l - * @param pos - * @param result - * @param isStart - * - * @author Bob Hanson 2019.07.30 - */ - private static void findContacts(List l, int pos, - List result, boolean isStart) - { - int low = 0; - int high = l.size() - 1; - while (low <= high) - { - int mid = (low + high) >>> 1; - SequenceFeature f = l.get(mid); - switch (Long.signum((isStart ? f.begin : f.end) - pos)) - { - case -1: - low = mid + 1; - continue; - case 1: - high = mid - 1; - continue; - case 0: - int m = mid; - result.add(f); - // could be "5" in 12345556788 ? - while (++mid <= high && (f = l.get(mid)) != null - && (isStart ? f.begin : f.end) == pos) - { - result.add(f); - } - while (--m >= low && (f = l.get(m)) != null - && (isStart ? f.begin : f.end) == pos) - { - result.add(f); - } - return; - } - } - } - - /** - * Find all overlaps; special case when there is only one feature. The - * required array of start-sorted SequenceFeature is created lazily. - * - * @param features - * @param pos - * @param result - */ - private void findOverlaps(List features, int pos, - List result) - { - int n = featuresList.size(); - if (n == 1) - { - checkOne(featuresList.get(0), pos, result); - return; - } - if (orderedFeatureStarts == null) - { - rebuildArrays(n); - } - - // (1) Find the closest feature to this position. - - SequenceFeature sf = findClosestFeature(orderedFeatureStarts, pos); - - // (2) Traverse the containedBy field, checking for overlap. - - while (sf != null) - { - if (sf.end >= pos) - { - result.add(sf); - } - sf = sf.containedBy; - } - } - - /** - * Quick check when we only have one feature. - * - * @param sf - * @param pos - * @param result - */ - private void checkOne(SequenceFeature sf, int pos, - List result) - { - if (sf.begin <= pos && sf.end >= pos) - { - result.add(sf); - } - return; - } - - /** - * A binary search identical to the one used for contact start/end, but here - * we return the feature itself. Unlike Collection.BinarySearch, all we have - * to be is close, not exact, and we make sure if there is a string of - * identical starts, then we slide to the end so that we can check all of - * them. - * - * @param l - * @param pos - * @return - */ - private SequenceFeature findClosestFeature(SequenceFeature[] l, int pos) - { - int low = 0; - int high = l.length - 1; - while (low <= high) - { - int mid = (low + high) >>> 1; - SequenceFeature f = l[mid]; - switch (Long.signum(f.begin - pos)) - { - case -1: - low = mid + 1; - continue; - case 1: - high = mid - 1; - continue; - case 0: - - while (++mid <= high && l[mid].begin == pos) - { - ; - } - return l[--mid]; - } - } - // -1 here? - return (high < 0 || low >= l.length ? null : l[high]); - } - - }