X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=bd6bd1e37514a925e506842d988b6763abf444ce;hb=3d4ead4880755ec949de0300c232544ba965e60d;hp=c6a49dbbc56b3a39d26cf26f3ea2080d23953d64;hpb=c8863c027ddace3961405ed0051f104f1ec99581;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index c6a49db..bd6bd1e 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -1,14 +1,41 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; +import intervalstore.api.IntervalStoreI; +import intervalstore.impl.BinarySearcher; +import intervalstore.impl.IntervalStore; + + /** * A data store for a set of sequence features that supports efficient lookup of * features overlapping a given range. Intended for (but not limited to) storage @@ -19,59 +46,6 @@ import java.util.Set; */ public class FeatureStore { - /** - * a class providing criteria for performing a binary search of a list - */ - abstract static class SearchCriterion - { - /** - * Answers true if the entry passes the search criterion test - * - * @param entry - * @return - */ - abstract boolean compare(SequenceFeature entry); - - static SearchCriterion byStart(final long target) - { - return new SearchCriterion() { - - @Override - boolean compare(SequenceFeature entry) - { - return entry.getBegin() >= target; - } - }; - } - - static SearchCriterion byEnd(final long target) - { - return new SearchCriterion() - { - - @Override - boolean compare(SequenceFeature entry) - { - return entry.getEnd() >= target; - } - }; - } - - static SearchCriterion byFeature(final ContiguousI to, - final Comparator rc) - { - return new SearchCriterion() - { - - @Override - boolean compare(SequenceFeature entry) - { - return rc.compare(entry, to) >= 0; - } - }; - } - } - /* * Non-positional features have no (zero) start/end position. * Kept as a separate list in case this criterion changes in future. @@ -79,14 +53,6 @@ public class FeatureStore List nonPositionalFeatures; /* - * An ordered list of features, with the promise that no feature in the list - * properly contains any other. This constraint allows bounded linear search - * of the list for features overlapping a region. - * Contact features are not included in this list. - */ - List nonNestedFeatures; - - /* * contact features ordered by first contact position */ List contactFeatureStarts; @@ -97,13 +63,10 @@ public class FeatureStore List contactFeatureEnds; /* - * Nested Containment List is used to hold any features that are nested - * within (properly contained by) any other feature. This is a recursive tree - * which supports depth-first scan for features overlapping a range. - * It is used here as a 'catch-all' fallback for features that cannot be put - * into a simple ordered list without invalidating the search methods. + * IntervalStore holds remaining features and provides efficient + * query for features overlapping any given interval */ - NCList nestedFeatures; + IntervalStoreI features; /* * Feature groups represented in stored positional features @@ -131,21 +94,23 @@ public class FeatureStore float nonPositionalMaxScore; + private ArrayList featuresList; + /** * Constructor */ public FeatureStore() { - nonNestedFeatures = new ArrayList(); - positionalFeatureGroups = new HashSet(); - nonPositionalFeatureGroups = new HashSet(); + features = new IntervalStore<>(); + featuresList = new ArrayList<>(); + positionalFeatureGroups = new HashSet<>(); + nonPositionalFeatureGroups = new HashSet<>(); positionalMinScore = Float.NaN; positionalMaxScore = Float.NaN; nonPositionalMinScore = Float.NaN; nonPositionalMaxScore = Float.NaN; - // we only construct nonPositionalFeatures, contactFeatures - // or the NCList if we need to + // we only construct nonPositionalFeatures, contactFeatures if we need to } /** @@ -156,8 +121,14 @@ public class FeatureStore * * @param feature */ + public boolean addFeature(SequenceFeature feature) { + if (contains(feature)) + { + return false; + } + /* * keep a record of feature groups */ @@ -166,61 +137,70 @@ public class FeatureStore positionalFeatureGroups.add(feature.getFeatureGroup()); } - boolean added = false; - if (feature.isContactFeature()) { - added = addContactFeature(feature); + addContactFeature(feature); } else if (feature.isNonPositional()) { - added = addNonPositionalFeature(feature); + addNonPositionalFeature(feature); } else { - if (!contains(nonNestedFeatures, feature)) + addNestedFeature(feature); + } + + /* + * record the total extent of positional features, to make + * getTotalFeatureLength possible; we count the length of a + * contact feature as 1 + */ + totalExtent += getFeatureLength(feature); + + /* + * record the minimum and maximum score for positional + * and non-positional features + */ + float score = feature.getScore(); + if (!Float.isNaN(score)) + { + if (feature.isNonPositional()) { - added = addNonNestedFeature(feature); - if (!added) - { - /* - * detected a nested feature - put it in the NCList structure - */ - added = addNestedFeature(feature); - } + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); + } + else + { + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); } } - if (added) + return true; + } + + /** + * Answers true if this store contains the given feature (testing by + * SequenceFeature.equals), else false + * + * @param feature + * @return + */ + public boolean contains(SequenceFeature feature) + { + if (feature.isNonPositional()) { - /* - * record the total extent of positional features, to make - * getTotalFeatureLength possible; we count the length of a - * contact feature as 1 - */ - totalExtent += getFeatureLength(feature); + return nonPositionalFeatures == null ? false + : nonPositionalFeatures.contains(feature); + } - /* - * record the minimum and maximum score for positional - * and non-positional features - */ - float score = feature.getScore(); - if (!Float.isNaN(score)) - { - if (feature.isNonPositional()) - { - nonPositionalMinScore = min(nonPositionalMinScore, score); - nonPositionalMaxScore = max(nonPositionalMaxScore, score); - } - else - { - positionalMinScore = min(positionalMinScore, score); - positionalMaxScore = max(positionalMaxScore, score); - } - } + if (feature.isContactFeature()) + { + return contactFeatureStarts == null ? false + : listContains(contactFeatureStarts, feature); } - return added; + return features == null ? false : features.contains(feature); } /** @@ -245,10 +225,10 @@ public class FeatureStore /** * Adds the feature to the list of non-positional features (with lazy - * instantiation of the list if it is null), and returns true. If the - * non-positional features already include the new feature (by equality test), - * then it is not added, and this method returns false. The feature group is - * added to the set of distinct feature groups for non-positional features. + * instantiation of the list if it is null), and returns true. The feature + * group is added to the set of distinct feature groups for non-positional + * features. This method allows duplicate features, so test before calling to + * prevent this. * * @param feature */ @@ -256,11 +236,7 @@ public class FeatureStore { if (nonPositionalFeatures == null) { - nonPositionalFeatures = new ArrayList(); - } - if (nonPositionalFeatures.contains(feature)) - { - return false; + nonPositionalFeatures = new ArrayList<>(); } nonPositionalFeatures.add(feature); @@ -271,98 +247,24 @@ public class FeatureStore } /** - * Adds one feature to the NCList that can manage nested features (creating - * the NCList if necessary), and returns true. If the feature is already - * stored in the NCList (by equality test), then it is not added, and this - * method returns false. - */ - protected synchronized boolean addNestedFeature(SequenceFeature feature) - { - if (nestedFeatures == null) - { - nestedFeatures = new NCList(feature); - return true; - } - return nestedFeatures.add(feature, false); - } - - /** - * Add a feature to the list of non-nested features, maintaining the ordering - * of the list. A check is made for whether the feature is nested in (properly - * contained by) an existing feature. If there is no nesting, the feature is - * added to the list and the method returns true. If nesting is found, the - * feature is not added and the method returns false. - * - * @param feature - * @return - */ - protected boolean addNonNestedFeature(SequenceFeature feature) - { - synchronized (nonNestedFeatures) - { - /* - * find the first stored feature which doesn't precede the new one - */ - int insertPosition = binarySearch(nonNestedFeatures, - SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); - - /* - * fail if we detect feature enclosure - of the new feature by - * the one preceding it, or of the next feature by the new one - */ - if (insertPosition > 0) - { - if (encloses(nonNestedFeatures.get(insertPosition - 1), feature)) - { - return false; - } - } - if (insertPosition < nonNestedFeatures.size()) - { - if (encloses(feature, nonNestedFeatures.get(insertPosition))) - { - return false; - } - } - - /* - * checks passed - add the feature - */ - nonNestedFeatures.add(insertPosition, feature); - - return true; - } - } - - /** - * Answers true if range1 properly encloses range2, else false - * - * @param range1 - * @param range2 - * @return + * Adds one feature to the IntervalStore that can manage nested features + * (creating the IntervalStore if necessary) */ - protected boolean encloses(ContiguousI range1, ContiguousI range2) + protected synchronized void addNestedFeature(SequenceFeature feature) { - int begin1 = range1.getBegin(); - int begin2 = range2.getBegin(); - int end1 = range1.getEnd(); - int end2 = range2.getEnd(); - if (begin1 == begin2 && end1 > end2) - { - return true; - } - if (begin1 < begin2 && end1 >= end2) + if (features == null) { - return true; + features = new IntervalStore<>(); } - return false; + features.add(feature); + featuresList.add(feature); } /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain - * ordered, and returns true. If the contact feature lists already contain the - * given feature (by test for equality), does not add it and returns false. + * ordered, and returns true. This method allows duplicate features to be + * added, so test before calling to avoid this. * * @param feature * @return @@ -371,33 +273,28 @@ public class FeatureStore { if (contactFeatureStarts == null) { - contactFeatureStarts = new ArrayList(); + contactFeatureStarts = new ArrayList<>(); } if (contactFeatureEnds == null) { - contactFeatureEnds = new ArrayList(); - } - - if (contains(contactFeatureStarts, feature)) - { - return false; + contactFeatureEnds = new ArrayList<>(); } /* + * insert into list sorted by start (first contact position): * binary search the sorted list to find the insertion point */ - int insertPosition = binarySearch(contactFeatureStarts, - SearchCriterion.byFeature(feature, - RangeComparator.BY_START_POSITION)); + int insertPosition = BinarySearcher.findFirst(contactFeatureStarts, + f -> f.getBegin() >= feature.getBegin()); contactFeatureStarts.add(insertPosition, feature); - // and resort to mak siccar...just in case insertion point not quite right - Collections.sort(contactFeatureStarts, RangeComparator.BY_START_POSITION); - insertPosition = binarySearch(contactFeatureStarts, - SearchCriterion.byFeature(feature, - RangeComparator.BY_END_POSITION)); - contactFeatureEnds.add(feature); - Collections.sort(contactFeatureEnds, RangeComparator.BY_END_POSITION); + /* + * insert into list sorted by end (second contact position): + * binary search the sorted list to find the insertion point + */ + insertPosition = BinarySearcher.findFirst(contactFeatureEnds, + f -> f.getEnd() >= feature.getEnd()); + contactFeatureEnds.add(insertPosition, feature); return true; } @@ -411,7 +308,7 @@ public class FeatureStore * @param feature * @return */ - protected static boolean contains(List features, + protected static boolean listContains(List features, SequenceFeature feature) { if (features == null || feature == null) @@ -422,8 +319,10 @@ public class FeatureStore /* * locate the first entry in the list which does not precede the feature */ - int pos = binarySearch(features, - SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); + // int pos = binarySearch(features, + // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); + int pos = BinarySearcher.findFirst(features, + val -> val.getBegin() >= feature.getBegin()); int len = features.size(); while (pos < len) { @@ -452,17 +351,16 @@ public class FeatureStore * end position of overlap range (inclusive) * @return */ + public List findOverlappingFeatures(long start, long end) { - List result = new ArrayList(); - - findNonNestedFeatures(start, end, result); + List result = new ArrayList<>(); findContactFeatures(start, end, result); - if (nestedFeatures != null) + if (features != null) { - result.addAll(nestedFeatures.findOverlaps(start, end)); + result.addAll(features.findOverlaps(start, end)); } return result; @@ -481,11 +379,11 @@ public class FeatureStore { if (contactFeatureStarts != null) { - findContactStartFeatures(from, to, result); + findContactStartOverlaps(from, to, result); } if (contactFeatureEnds != null) { - findContactEndFeatures(from, to, result); + findContactEndOverlaps(from, to, result); } } @@ -498,22 +396,24 @@ public class FeatureStore * @param to * @param result */ - protected void findContactEndFeatures(long from, long to, + protected void findContactEndOverlaps(long from, long to, List result) { /* - * find the first contact feature (if any) that does not lie - * entirely before the target range + * find the first contact feature (if any) + * whose end point is not before the target range */ - int startPosition = binarySearch(contactFeatureEnds, - SearchCriterion.byEnd(from)); - for (; startPosition < contactFeatureEnds.size(); startPosition++) + int index = BinarySearcher.findFirst(contactFeatureEnds, + f -> f.getEnd() >= from); + + while (index < contactFeatureEnds.size()) { - SequenceFeature sf = contactFeatureEnds.get(startPosition); + SequenceFeature sf = contactFeatureEnds.get(index); if (!sf.isContactFeature()) { - System.err.println("Error! non-contact feature type " - + sf.getType() + " in contact features list"); + System.err.println("Error! non-contact feature type " + sf.getType() + + " in contact features list"); + index++; continue; } @@ -524,71 +424,25 @@ public class FeatureStore * this feature's first contact position lies in the search range * so we don't include it in results a second time */ + index++; continue; } - int end = sf.getEnd(); - if (end >= from && end <= to) - { - result.add(sf); - } - if (end > to) + if (sf.getEnd() > to) { + /* + * this feature (and all following) has end point after the target range + */ break; } - } - } - - /** - * Adds non-nested features to the result list that lie within the target - * range. Non-positional features (start=end=0), contact features and nested - * features are excluded. - * - * @param from - * @param to - * @param result - */ - protected void findNonNestedFeatures(long from, long to, - List result) - { - int startIndex = binarySearch(nonNestedFeatures, - SearchCriterion.byEnd(from)); - findNonNestedFeatures(startIndex, from, to, result); - } - - /** - * Scans the list of non-nested features, starting from startIndex, to find - * those that overlap the from-to range, and adds them to the result list. - * Returns the index of the first feature whose start position is after the - * target range (or the length of the whole list if none such feature exists). - * - * @param startIndex - * @param from - * @param to - * @param result - * @return - */ - protected int findNonNestedFeatures(final int startIndex, long from, - long to, List result) - { - int i = startIndex; - while (i < nonNestedFeatures.size()) - { - SequenceFeature sf = nonNestedFeatures.get(i); - if (sf.getBegin() > to) - { - break; - } - int start = sf.getBegin(); - int end = sf.getEnd(); - if (start <= to && end >= from) - { - result.add(sf); - } - i++; + /* + * feature has end >= from and end <= to + * i.e. contact end point lies within overlap search range + */ + result.add(sf); + index++; } - return i; } /** @@ -599,26 +453,36 @@ public class FeatureStore * @param to * @param result */ - protected void findContactStartFeatures(long from, long to, + protected void findContactStartOverlaps(long from, long to, List result) { - int startPosition = binarySearch(contactFeatureStarts, - SearchCriterion.byStart(from)); + int index = BinarySearcher.findFirst(contactFeatureStarts, + f -> f.getBegin() >= from); - for (; startPosition < contactFeatureStarts.size(); startPosition++) + while (index < contactFeatureStarts.size()) { - SequenceFeature sf = contactFeatureStarts.get(startPosition); + SequenceFeature sf = contactFeatureStarts.get(index); if (!sf.isContactFeature()) { - System.err.println("Error! non-contact feature type " - + sf.getType() + " in contact features list"); + System.err.println("Error! non-contact feature " + sf.toString() + + " in contact features list"); + index++; continue; } - int begin = sf.getBegin(); - if (begin >= from && begin <= to) + if (sf.getBegin() > to) { - result.add(sf); + /* + * this feature's start (and all following) follows the target range + */ + break; } + + /* + * feature has begin >= from and begin <= to + * i.e. contact start point lies within overlap search range + */ + result.add(sf); + index++; } } @@ -627,13 +491,10 @@ public class FeatureStore * * @return */ + public List getPositionalFeatures() { - /* - * add non-nested features (may be all features for many cases) - */ - List result = new ArrayList(); - result.addAll(nonNestedFeatures); + List result = new ArrayList<>(); /* * add any contact features - from the list by start position @@ -646,9 +507,9 @@ public class FeatureStore /* * add any nested features */ - if (nestedFeatures != null) + if (features != null) { - result.addAll(nestedFeatures.getEntries()); + result.addAll(features); } return result; @@ -660,13 +521,14 @@ public class FeatureStore * * @return */ + public List getContactFeatures() { if (contactFeatureStarts == null) { return Collections.emptyList(); } - return new ArrayList(contactFeatureStarts); + return new ArrayList<>(contactFeatureStarts); } /** @@ -675,13 +537,14 @@ public class FeatureStore * * @return */ + public List getNonPositionalFeatures() { if (nonPositionalFeatures == null) { return Collections.emptyList(); } - return new ArrayList(nonPositionalFeatures); + return new ArrayList<>(nonPositionalFeatures); } /** @@ -692,15 +555,13 @@ public class FeatureStore * * @param sf */ + public synchronized boolean delete(SequenceFeature sf) { - /* - * try the non-nested positional features first - */ - boolean removed = nonNestedFeatures.remove(sf); + boolean removed = false; /* - * if not found, try contact positions (and if found, delete + * try contact positions (and if found, delete * from both lists of contact positions) */ if (!removed && contactFeatureStarts != null) @@ -726,9 +587,10 @@ public class FeatureStore /* * if not found, try nested features */ - if (!removed && nestedFeatures != null) + if (!removed && features != null) { - removed = nestedFeatures.delete(sf); + removed = features.remove(sf); + featuresList.remove(sf); } if (removed) @@ -753,7 +615,6 @@ public class FeatureStore positionalMaxScore = Float.NaN; nonPositionalMinScore = Float.NaN; nonPositionalMaxScore = Float.NaN; - /* * scan non-positional features for groups and scores */ @@ -819,39 +680,18 @@ public class FeatureStore } /** - * Scans all positional features to check whether the given feature group is - * found, and returns true if found, else false - * - * @param featureGroup - * @return - */ - protected boolean findFeatureGroup(String featureGroup) - { - for (SequenceFeature sf : getPositionalFeatures()) - { - String group = sf.getFeatureGroup(); - if (group == featureGroup - || (group != null && group.equals(featureGroup))) - { - return true; - } - } - return false; - } - - /** * Answers true if this store has no features, else false * * @return */ + public boolean isEmpty() { - boolean hasFeatures = !nonNestedFeatures.isEmpty() - || (contactFeatureStarts != null && !contactFeatureStarts - .isEmpty()) - || (nonPositionalFeatures != null && !nonPositionalFeatures - .isEmpty()) - || (nestedFeatures != null && nestedFeatures.size() > 0); + boolean hasFeatures = (contactFeatureStarts != null + && !contactFeatureStarts.isEmpty()) + || (nonPositionalFeatures != null + && !nonPositionalFeatures.isEmpty()) + || (features != null && features.size() > 0); return !hasFeatures; } @@ -864,6 +704,7 @@ public class FeatureStore * @param positionalFeatures * @return */ + public Set getFeatureGroups(boolean positionalFeatures) { if (positionalFeatures) @@ -872,45 +713,10 @@ public class FeatureStore } else { - return nonPositionalFeatureGroups == null ? Collections - . emptySet() : Collections - .unmodifiableSet(nonPositionalFeatureGroups); - } - } - - /** - * Performs a binary search of the (sorted) list to find the index of the - * first entry which returns true for the given comparator function. Returns - * the length of the list if there is no such entry. - * - * @param features - * @param sc - * @return - */ - protected static int binarySearch(List features, - SearchCriterion sc) - { - int start = 0; - int end = features.size() - 1; - int matched = features.size(); - - while (start <= end) - { - int mid = (start + end) / 2; - SequenceFeature entry = features.get(mid); - boolean compare = sc.compare(entry); - if (compare) - { - matched = mid; - end = mid - 1; - } - else - { - start = mid + 1; - } + return nonPositionalFeatureGroups == null + ? Collections. emptySet() + : Collections.unmodifiableSet(nonPositionalFeatureGroups); } - - return matched; } /** @@ -920,15 +726,16 @@ public class FeatureStore * @param positional * @return */ + public int getFeatureCount(boolean positional) { if (!positional) { - return nonPositionalFeatures == null ? 0 : nonPositionalFeatures - .size(); + return nonPositionalFeatures == null ? 0 + : nonPositionalFeatures.size(); } - int size = nonNestedFeatures.size(); + int size = 0; if (contactFeatureStarts != null) { @@ -936,9 +743,9 @@ public class FeatureStore size += contactFeatureStarts.size(); } - if (nestedFeatures != null) + if (features != null) { - size += nestedFeatures.size(); + size += features.size(); } return size; @@ -950,6 +757,7 @@ public class FeatureStore * * @return */ + public int getTotalFeatureLength() { return totalExtent; @@ -963,6 +771,7 @@ public class FeatureStore * @param positional * @return */ + public float getMinimumScore(boolean positional) { return positional ? positionalMinScore : nonPositionalMinScore; @@ -976,6 +785,7 @@ public class FeatureStore * @param positional * @return */ + public float getMaximumScore(boolean positional) { return positional ? positionalMaxScore : nonPositionalMaxScore; @@ -989,10 +799,11 @@ public class FeatureStore * @param group * @return */ + public List getFeaturesForGroup(boolean positional, String group) { - List result = new ArrayList(); + List result = new ArrayList<>(); /* * if we know features don't include the target group, no need @@ -1009,12 +820,298 @@ public class FeatureStore for (SequenceFeature sf : sfs) { String featureGroup = sf.getFeatureGroup(); - if (group == null && featureGroup == null || group != null - && group.equals(featureGroup)) + if (group == null && featureGroup == null + || group != null && group.equals(featureGroup)) { result.add(sf); } } return result; } + + /** + * Adds the shift amount to the start and end of all positional features whose + * start position is at or after fromPosition. Returns true if at least one + * feature was shifted, else false. + * + * @param fromPosition + * @param shiftBy + * @return + */ + + public synchronized boolean shiftFeatures(int fromPosition, int shiftBy) + { + /* + * Because begin and end are final fields (to ensure the data store's + * integrity), we have to delete each feature and re-add it as amended. + * (Although a simple shift of all values would preserve data integrity!) + */ + boolean modified = false; + for (SequenceFeature sf : getPositionalFeatures()) + { + if (sf.getBegin() >= fromPosition) + { + modified = true; + int newBegin = sf.getBegin() + shiftBy; + int newEnd = sf.getEnd() + shiftBy; + + /* + * sanity check: don't shift left of the first residue + */ + if (newEnd > 0) + { + newBegin = Math.max(1, newBegin); + SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd, + sf.getFeatureGroup(), sf.getScore()); + addFeature(sf2); + } + delete(sf); + } + } + return modified; + } + + /** + * Find all features containing this position. + * + * @param pos + * @return list of SequenceFeatures + * @author Bob Hanson 2019.07.30 + */ + + public List findOverlappingFeatures(int pos, + List result) + { + if (result == null) + { + result = new ArrayList<>(); + } + + if (contactFeatureStarts != null) + { + findContacts(contactFeatureStarts, pos, result, true); + findContacts(contactFeatureEnds, pos, result, false); + } + if (featuresList != null) + { + findOverlaps(featuresList, pos, result); + } + return result; + } + + /** + * Binary search for contact start or end at a given (Overview) position. + * + * @param l + * @param pos + * @param result + * @param isStart + * + * @author Bob Hanson 2019.07.30 + */ + private static void findContacts(List l, int pos, + List result, boolean isStart) + { + int low = 0; + int high = l.size() - 1; + while (low <= high) + { + int mid = (low + high) >>> 1; + SequenceFeature f = l.get(mid); + switch (Long.signum((isStart ? f.begin : f.end) - pos)) + { + case -1: + low = mid + 1; + continue; + case 1: + high = mid - 1; + continue; + case 0: + int m = mid; + result.add(f); + // could be "5" in 12345556788 ? + while (++mid <= high && (f = l.get(mid)) != null + && (isStart ? f.begin : f.end) == pos) + { + result.add(f); + } + while (--m >= low && (f = l.get(m)) != null + && (isStart ? f.begin : f.end) == pos) + { + result.add(f); + } + return; + } + } + } + + BitSet bs = new BitSet(); + + /** + * Double binary sort with bitset correlation + * + * + * @param features + * @param pos + * @param result + */ + private void findOverlaps(List features, int pos, + List result) + { + int n = featuresList.size(); + if (n == 1) + { + checkOne(featuresList.get(0), pos, result); + return; + } + if (orderedFeatureStarts == null) + { + rebuildArrays(n); + } + SequenceFeature sf = findClosestFeature(orderedFeatureStarts, pos); + while (sf != null) { + if (sf.end >= pos) + { + result.add(sf); + } + sf = sf.containedBy; + } + } + + private void linkFeatures(SequenceFeature[] intervals) + { + if (intervals.length < 2) + { + return; + } + int maxEnd = intervals[0].end; + for (int i = 1, n = intervals.length; i < n; i++) + { + SequenceFeature ithis = intervals[i]; + if (ithis.begin <= maxEnd) + { + ithis.containedBy = getContainedBy(intervals[i - 1], ithis); + } + if (ithis.end > maxEnd) + { + maxEnd = ithis.end; + } + } + } + + private SequenceFeature getContainedBy(SequenceFeature sf, + SequenceFeature sf0) + { + int begin = sf0.begin; + while (sf != null) + { + if (begin <= sf.end) + { + System.out.println("\nFS found " + sf0.index + ":" + sf0 + + "\nFS in " + sf.index + ":" + sf); + return sf; + } + sf = sf.containedBy; + } + return null; + } + + private SequenceFeature findClosestFeature(SequenceFeature[] l, int pos) + { + int low = 0; + int high = l.length - 1; + while (low <= high) + { + int mid = (low + high) >>> 1; + SequenceFeature f = l[mid]; + switch (Long.signum(f.begin - pos)) + { + case -1: + low = mid + 1; + continue; + case 1: + high = mid - 1; + continue; + case 0: + + while (++mid <= high && l[mid].begin == pos) + { + ; + } + mid--; + return l[mid]; + } + } + // -1 here? + return (high < 0 || low >= l.length ? null : l[high]); + } + + private void checkOne(SequenceFeature sf, int pos, + List result) + { + if (sf.begin <= pos && sf.end >= pos) + { + result.add(sf); + } + return; + } + + /* + * contact features ordered by first contact position + */ + private SequenceFeature[] orderedFeatureStarts; + + private void rebuildArrays(int n) + { + if (startComp == null) + { + startComp = new StartComparator(); + } + orderedFeatureStarts = new SequenceFeature[n]; + + for (int i = n; --i >= 0;) + { + SequenceFeature sf = featuresList.get(i); + sf.index = i; + orderedFeatureStarts[i] = sf; + } + Arrays.sort(orderedFeatureStarts, startComp); + linkFeatures(orderedFeatureStarts); + } + + class StartComparator implements Comparator + { + + int pos; + + @Override + public int compare(SequenceFeature o1, SequenceFeature o2) + { + int p1 = o1.begin; + int p2 = o2.begin; + return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0); + } + + } + + static StartComparator startComp; + + // class EndComparator implements Comparator + // { + // + // int pos; + // + // @Override + // public int compare(SequenceFeature o1, SequenceFeature o2) + // { + // int p1 = o1.end; + // int p2 = o2.end; + // int val = (p1 < p2 ? 1 : p1 > p2 ? -1 : 0); + // return val; + // } + // + // } + // + // static EndComparator endComp; + }