From 523b07fc514fc3936fac484dd6cfce6563ef0e83 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 17 Apr 2017 13:37:39 +0100 Subject: [PATCH] JAL-2446 tidied up / generalised binary search in FeatureStore --- src/jalview/datamodel/features/FeatureStore.java | 227 +++++++++++--------- .../datamodel/features/FeatureStoreTest.java | 2 +- 2 files changed, 128 insertions(+), 101 deletions(-) diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index 9f61871..ddee921 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -19,6 +19,59 @@ import java.util.Set; */ public class FeatureStore { + /** + * a class providing criteria for performing a binary search of a list + */ + abstract static class SearchCriterion + { + /** + * Answers true if the entry passes the search criterion test + * + * @param entry + * @return + */ + abstract boolean compare(SequenceFeature entry); + + static SearchCriterion byStart(final long target) + { + return new SearchCriterion() { + + @Override + boolean compare(SequenceFeature entry) + { + return entry.getBegin() >= target; + } + }; + } + + static SearchCriterion byEnd(final long target) + { + return new SearchCriterion() + { + + @Override + boolean compare(SequenceFeature entry) + { + return entry.getEnd() >= target; + } + }; + } + + static SearchCriterion byFeature(final ContiguousI to, + final Comparator rc) + { + return new SearchCriterion() + { + + @Override + boolean compare(SequenceFeature entry) + { + return rc.compare(entry, to) >= 0; + } + }; + } + } + Comparator startOrdering = new RangeComparator(true); Comparator endOrdering = new RangeComparator(false); @@ -90,6 +143,9 @@ public class FeatureStore */ public boolean addFeature(SequenceFeature feature) { + /* + * keep a record of feature groups + */ if (!feature.isNonPositional()) { positionalFeatureGroups.add(feature.getFeatureGroup()); @@ -173,8 +229,6 @@ public class FeatureStore * contained by) an existing feature. If there is no nesting, the feature is * added to the list and the method returns true. If nesting is found, the * feature is not added and the method returns false. - *

- * Contact features are added at the position of their first contact point * * @param feature * @return @@ -183,7 +237,11 @@ public class FeatureStore { synchronized (nonNestedFeatures) { - int insertPosition = binarySearchForAdd(nonNestedFeatures, feature); + /* + * find the first stored feature which doesn't precede the new one + */ + int insertPosition = binarySearch(nonNestedFeatures, + SearchCriterion.byFeature(feature, startOrdering)); /* * fail if we detect feature enclosure - of the new feature by @@ -198,6 +256,8 @@ public class FeatureStore } if (insertPosition < nonNestedFeatures.size()) { + // FIXME may have to test more than one feature here + // e.g. add [40-60] to [20-42, 30-50, 45-55] if (encloses(feature, nonNestedFeatures.get(insertPosition))) { return false; @@ -205,16 +265,10 @@ public class FeatureStore } /* - * checks passed - add or append the feature + * checks passed - add the feature */ - if (insertPosition == nonNestedFeatures.size()) - { - nonNestedFeatures.add(feature); - } - else - { - nonNestedFeatures.add(insertPosition, feature); - } + nonNestedFeatures.add(insertPosition, feature); + return true; } } @@ -244,31 +298,6 @@ public class FeatureStore } /** - * Answers the index of the first element in the given list which follows or - * matches the given feature in the sort order. If no such element, answers - * the length of the list. - * - * @param list - * @param feature - * - * @return - */ - protected int binarySearchForAdd(List list, SequenceFeature feature) - { - // TODO binary search! - int i = 0; - while (i < list.size()) - { - if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0) - { - break; - } - i++; - } - return i; - } - - /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain * ordered, and returns true. If the contact feature lists already contain the @@ -303,7 +332,7 @@ public class FeatureStore } /** - * Returns a (possibly empty) list of entries whose range overlaps the given + * Returns a (possibly empty) list of features whose extent overlaps the given * range. The returned list is not ordered. Contact features are included if * either of the contact points lies within the range. * @@ -331,7 +360,7 @@ public class FeatureStore /** * Adds contact features to the result list where either the second or the - * first contact position lies within the target range. + * first contact position lies within the target range * * @param from * @param to @@ -351,6 +380,10 @@ public class FeatureStore } /** + * Adds to the result list any contact features whose end (second contact + * point), but not start (first contact point), lies in the query from-to + * range + * * @param from * @param to * @param result @@ -358,8 +391,13 @@ public class FeatureStore protected void findContactEndFeatures(long from, long to, List result) { - // TODO binary search for startPosition - for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++) + /* + * find the first contact feature (if any) that does not lie + * entirely before the target range + */ + int startPosition = binarySearch(contactFeatureEnds, + SearchCriterion.byEnd(from)); + for (; startPosition < contactFeatureEnds.size(); startPosition++) { SequenceFeature sf = contactFeatureEnds.get(startPosition); if (!sf.isContactFeature()) @@ -368,6 +406,7 @@ public class FeatureStore + sf.getType() + " in contact features list"); continue; } + int begin = sf.getBegin(); if (begin >= from && begin <= to) { @@ -377,41 +416,22 @@ public class FeatureStore */ continue; } + int end = sf.getEnd(); if (end >= from && end <= to) { result.add(sf); } - } - } - - /** - * Returns the index of the first contact feature found whose end (second - * contact position) is not before the given start position. If no such - * feature is found, returns the length of the contact features list. - * - * @param start - * @return - */ - protected int contactsBinarySearch(long start) - { - // TODO binary search!! - int i = 0; - while (i < contactFeatureEnds.size()) - { - if (contactFeatureEnds.get(i).getEnd() >= start) + if (end > to) { break; } - i++; } - - return i; } /** - * Adds features to the result list that are at a single position which lies - * within the target range. Non-positional features (start=end=0) and contact + * Adds non-nested features to the result list that lie within the target + * range. Non-positional features (start=end=0), contact features and nested * features are excluded. * * @param from @@ -421,7 +441,9 @@ public class FeatureStore protected void findNonNestedFeatures(long from, long to, List result) { - int startIndex = binarySearch(nonNestedFeatures, from); + int startIndex = binarySearch(nonNestedFeatures, + SearchCriterion.byEnd(from)); + findNonNestedFeatures(startIndex, from, to, result); } @@ -438,8 +460,7 @@ public class FeatureStore * @return */ protected int findNonNestedFeatures(final int startIndex, long from, - long to, - List result) + long to, List result) { int i = startIndex; while (i < nonNestedFeatures.size()) @@ -461,37 +482,6 @@ public class FeatureStore } /** - * Performs a binary search of the (sorted) list to find the index of the - * first entry whose end position is not less than the target position (i.e. - * skip all features that properly precede the given position) - * - * @param features - * @param target - * @return - */ - protected int binarySearch(List features, long target) - { - int width = features.size() / 2; - int lastpos = width; - while (width > 0) - { - int end = features.get(lastpos).getEnd(); - width = width / 2; - if (end > target) - { - lastpos -= width; - } - else - { - lastpos += width; - } - } - // todo correct binary search - return lastpos > 1 ? lastpos - 2 : 0; - // return lastpos; - } - - /** * Adds contact features whose start position lies in the from-to range to the * result list * @@ -502,8 +492,10 @@ public class FeatureStore protected void findContactStartFeatures(long from, long to, List result) { - // TODO binary search for startPosition - for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++) + int startPosition = binarySearch(contactFeatureStarts, + SearchCriterion.byStart(from)); + + for (; startPosition < contactFeatureStarts.size(); startPosition++) { SequenceFeature sf = contactFeatureStarts.get(startPosition); if (!sf.isContactFeature()) @@ -731,4 +723,39 @@ public class FeatureStore .unmodifiableSet(nonPositionalFeatureGroups); } } + + /** + * Performs a binary search of the (sorted) list to find the index of the + * first entry which returns true for the given comparator function. Returns + * the length of the list if there is no such entry. + * + * @param features + * @param sc + * @return + */ + protected int binarySearch(List features, + SearchCriterion sc) + { + int start = 0; + int end = features.size() - 1; + int matched = features.size(); + + while (start <= end) + { + int mid = (start + end) / 2; + SequenceFeature entry = features.get(mid); + boolean compare = sc.compare(entry); + if (compare) + { + matched = mid; + end = mid - 1; + } + else + { + start = mid + 1; + } + } + + return matched; + } } diff --git a/test/jalview/datamodel/features/FeatureStoreTest.java b/test/jalview/datamodel/features/FeatureStoreTest.java index fa63637..8aabc56 100644 --- a/test/jalview/datamodel/features/FeatureStoreTest.java +++ b/test/jalview/datamodel/features/FeatureStoreTest.java @@ -420,7 +420,7 @@ public class FeatureStoreTest fs.addFeature(sf3); assertTrue(fs.delete(sf1)); // FeatureStore should now only contain features in the NCList - assertEquals(fs.nonNestedFeatures.size(), 0); + assertTrue(fs.nonNestedFeatures.isEmpty()); assertEquals(fs.nestedFeatures.size(), 2); assertFalse(fs.isEmpty()); assertTrue(fs.delete(sf2)); -- 1.7.10.2