package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; /** * A data store for a set of sequence features that supports efficient lookup of * features overlapping a given range. * * @author gmcarstairs * */ public class FeatureStore { Comparator startOrdering = new RangeComparator(true); Comparator endOrdering = new RangeComparator(false); /* * An ordered list of features, with the promise that no feature in the list * properly contains any other. This constraint allows bounded linear search * of the list for features overlapping a region. * Contact features are not included in this list. */ List nonNestedFeatures; /* * contact features ordered by first contact position */ List contactFeatureStarts; /* * contact features ordered by second contact position */ List contactFeatureEnds; /* * Nested Containment List is used to hold any features that are nested * within (properly contained by) any other feature. This is a recursive tree * which supports depth-first scan for features overlapping a range. * It is used here as a 'catch-all' fallback for features that cannot be put * into a simple ordered list without invalidating the search methods. */ NCList nestedFeatures; /** * Constructor */ public FeatureStore() { nonNestedFeatures = new ArrayList(); // we only construct contactFeatures and the NCList if we need to } /** * Add one entry to the data store * * @param feature */ public void addFeature(SequenceFeature feature) { if (feature.isContactFeature()) { addContactFeature(feature); } else { boolean added = addNonNestedFeature(feature); if (!added) { /* * detected a nested feature - put it in the NCList structure */ addNestedFeature(feature); } } } /** * Adds one feature to the NCList that can manage nested features (creating * the NCList if necessary) */ protected synchronized void addNestedFeature(SequenceFeature feature) { if (nestedFeatures == null) { nestedFeatures = new NCList(feature); } else { nestedFeatures.add(feature); } } /** * Add a feature to the list of non-nested features, maintaining the ordering * of the list. A check is made for whether the feature is nested in (properly * contained by) an existing feature. If there is no nesting, the feature is * added to the list and the method returns true. If nesting is found, the * feature is not added and the method returns false. *

* Contact features are added at the position of their first contact point * * @param feature * @return */ protected boolean addNonNestedFeature(SequenceFeature feature) { synchronized (nonNestedFeatures) { int insertPosition = binarySearchForAdd(nonNestedFeatures, feature); /* * fail if we detect feature enclosure - of the new feature by * the one preceding it, or of the next feature by the new one */ if (insertPosition > 0) { if (encloses(nonNestedFeatures.get(insertPosition - 1), feature)) { return false; } } if (insertPosition < nonNestedFeatures.size()) { if (encloses(feature, nonNestedFeatures.get(insertPosition))) { return false; } } /* * checks passed - add or append the feature */ if (insertPosition == nonNestedFeatures.size()) { nonNestedFeatures.add(feature); } else { nonNestedFeatures.add(insertPosition, feature); } return true; } } /** * Answers true if range1 properly encloses range2, else false * * @param range1 * @param range2 * @return */ protected boolean encloses(ContiguousI range1, ContiguousI range2) { int begin1 = range1.getBegin(); int begin2 = range2.getBegin(); int end1 = range1.getEnd(); int end2 = range2.getEnd(); if (begin1 == begin2 && end1 > end2) { return true; } if (begin1 < begin2 && end1 >= end2) { return true; } return false; } /** * Answers the index of the first element in the given list which follows or * matches the given feature in the sort order. If no such element, answers * the length of the list. * * @param list * @param feature * * @return */ protected int binarySearchForAdd(List list, SequenceFeature feature) { // TODO binary search! int i = 0; while (i < list.size()) { if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0) { break; } i++; } return i; } /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain * ordered * * @param feature */ protected synchronized void addContactFeature(SequenceFeature feature) { // TODO binary search for insertion points! if (contactFeatureStarts == null) { contactFeatureStarts = new ArrayList(); } if (contactFeatureEnds == null) { contactFeatureEnds = new ArrayList(); } contactFeatureStarts.add(feature); Collections.sort(contactFeatureStarts, startOrdering); contactFeatureEnds.add(feature); Collections.sort(contactFeatureEnds, endOrdering); } /** * Returns a (possibly empty) list of entries whose range overlaps the given * range. The returned list is not ordered. Contact features are included if * either of the contact points lies within the range. * * @param start * start position of overlap range (inclusive) * @param end * end position of overlap range (inclusive) * @return */ public List findOverlappingFeatures(long start, long end) { List result = new ArrayList(); findNonNestedFeatures(start, end, result); findContactFeatures(start, end, result); if (nestedFeatures != null) { result.addAll(nestedFeatures.findOverlaps(start, end)); } return result; } /** * Adds contact features to the result list where either the second or the * first contact position lies within the target range. * * @param from * @param to * @param result */ protected void findContactFeatures(long from, long to, List result) { if (contactFeatureStarts != null) { findContactStartFeatures(from, to, result); } if (contactFeatureEnds != null) { findContactEndFeatures(from, to, result); } } /** * @param from * @param to * @param result */ protected void findContactEndFeatures(long from, long to, List result) { // TODO binary search for startPosition for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++) { SequenceFeature sf = contactFeatureEnds.get(startPosition); if (!sf.isContactFeature()) { System.err.println("Error! non-contact feature type " + sf.getType() + " in contact features list"); continue; } int begin = sf.getBegin(); if (begin >= from && begin <= to) { /* * this feature's first contact position lies in the search range * so we don't include it in results a second time */ continue; } int end = sf.getEnd(); if (end >= from && end <= to) { result.add(sf); } } } /** * Returns the index of the first contact feature found whose end (second * contact position) is not before the given start position. If no such * feature is found, returns the length of the contact features list. * * @param start * @return */ protected int contactsBinarySearch(long start) { // TODO binary search!! int i = 0; while (i < contactFeatureEnds.size()) { if (contactFeatureEnds.get(i).getEnd() >= start) { break; } i++; } return i; } /** * Adds features to the result list that are at a single position which lies * within the target range. Non-positional features (start=end=0) and contact * features are excluded. * * @param from * @param to * @param result */ protected void findNonNestedFeatures(long from, long to, List result) { int startIndex = binarySearch(nonNestedFeatures, from); findNonNestedFeatures(startIndex, from, to, result); } /** * Scans the list of non-nested features, starting from startIndex, to find * those that overlap the from-to range, and adds them to the result list. * Returns the index of the first feature whose start position is after the * target range (or the length of the whole list if none such feature exists). * * @param startIndex * @param from * @param to * @param result * @return */ protected int findNonNestedFeatures(final int startIndex, long from, long to, List result) { int i = startIndex; while (i < nonNestedFeatures.size()) { SequenceFeature sf = nonNestedFeatures.get(i); if (sf.getBegin() > to) { break; } int start = sf.getBegin(); int end = sf.getEnd(); if (sf.isContactFeature()) { end = start; } if (start <= to && end >= from) { result.add(sf); } i++; } return i; } /** * Performs a binary search of the (sorted) list to find the index of the * first entry whose end position is not less than the target position (i.e. * skip all features that properly precede the given position) * * @param features * @param target * @return */ protected int binarySearch(List features, long target) { int width = features.size() / 2; int lastpos = width; while (width > 0) { int end = features.get(lastpos).getEnd(); width = width / 2; if (end > target) { lastpos -= width; } else { lastpos += width; } } // todo correct binary search return lastpos > 1 ? lastpos - 2 : 0; // return lastpos; } /** * Adds contact features whose start position lies in the from-to range to the * result list * * @param from * @param to * @param result */ protected void findContactStartFeatures(long from, long to, List result) { // TODO binary search for startPosition for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++) { SequenceFeature sf = contactFeatureStarts.get(startPosition); if (!sf.isContactFeature()) { System.err.println("Error! non-contact feature type " + sf.getType() + " in contact features list"); continue; } int begin = sf.getBegin(); if (begin >= from && begin <= to) { result.add(sf); } } } }