/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; import intervalstore.api.IntervalStoreI; import intervalstore.impl.BinarySearcher; import intervalstore.impl.IntervalStore; /** * A data store for a set of sequence features that supports efficient lookup of * features overlapping a given range. Intended for (but not limited to) storage * of features for one sequence and feature type. * * @author gmcarstairs * */ public class FeatureStore { /* * Non-positional features have no (zero) start/end position. * Kept as a separate list in case this criterion changes in future. */ List nonPositionalFeatures; /* * contact features ordered by first contact position */ List contactFeatureStarts; /* * contact features ordered by second contact position */ List contactFeatureEnds; /* * IntervalStore holds remaining features and provides efficient * query for features overlapping any given interval */ IntervalStoreI features; /* * Feature groups represented in stored positional features * (possibly including null) */ Set positionalFeatureGroups; /* * Feature groups represented in stored non-positional features * (possibly including null) */ Set nonPositionalFeatureGroups; /* * the total length of all positional features; contact features count 1 to * the total and 1 to size(), consistent with an average 'feature length' of 1 */ int totalExtent; float positionalMinScore; float positionalMaxScore; float nonPositionalMinScore; float nonPositionalMaxScore; private ArrayList featuresList; /** * Constructor */ public FeatureStore() { features = new IntervalStore<>(); featuresList = new ArrayList<>(); positionalFeatureGroups = new HashSet<>(); nonPositionalFeatureGroups = new HashSet<>(); positionalMinScore = Float.NaN; positionalMaxScore = Float.NaN; nonPositionalMinScore = Float.NaN; nonPositionalMaxScore = Float.NaN; // we only construct nonPositionalFeatures, contactFeatures if we need to } /** * Adds one sequence feature to the store, and returns true, unless the * feature is already contained in the store, in which case this method * returns false. Containment is determined by SequenceFeature.equals() * comparison. * * @param feature */ public boolean addFeature(SequenceFeature feature) { if (contains(feature)) { return false; } /* * keep a record of feature groups */ if (!feature.isNonPositional()) { positionalFeatureGroups.add(feature.getFeatureGroup()); } if (feature.isContactFeature()) { addContactFeature(feature); } else if (feature.isNonPositional()) { addNonPositionalFeature(feature); } else { addNestedFeature(feature); } /* * record the total extent of positional features, to make * getTotalFeatureLength possible; we count the length of a * contact feature as 1 */ totalExtent += getFeatureLength(feature); /* * record the minimum and maximum score for positional * and non-positional features */ float score = feature.getScore(); if (!Float.isNaN(score)) { if (feature.isNonPositional()) { nonPositionalMinScore = min(nonPositionalMinScore, score); nonPositionalMaxScore = max(nonPositionalMaxScore, score); } else { positionalMinScore = min(positionalMinScore, score); positionalMaxScore = max(positionalMaxScore, score); } } return true; } /** * Answers true if this store contains the given feature (testing by * SequenceFeature.equals), else false * * @param feature * @return */ public boolean contains(SequenceFeature feature) { if (feature.isNonPositional()) { return nonPositionalFeatures == null ? false : nonPositionalFeatures.contains(feature); } if (feature.isContactFeature()) { return contactFeatureStarts == null ? false : listContains(contactFeatureStarts, feature); } return features == null ? false : features.contains(feature); } /** * Answers the 'length' of the feature, counting 0 for non-positional features * and 1 for contact features * * @param feature * @return */ protected static int getFeatureLength(SequenceFeature feature) { if (feature.isNonPositional()) { return 0; } if (feature.isContactFeature()) { return 1; } return 1 + feature.getEnd() - feature.getBegin(); } /** * Adds the feature to the list of non-positional features (with lazy * instantiation of the list if it is null), and returns true. The feature * group is added to the set of distinct feature groups for non-positional * features. This method allows duplicate features, so test before calling to * prevent this. * * @param feature */ protected boolean addNonPositionalFeature(SequenceFeature feature) { if (nonPositionalFeatures == null) { nonPositionalFeatures = new ArrayList<>(); } nonPositionalFeatures.add(feature); nonPositionalFeatureGroups.add(feature.getFeatureGroup()); return true; } /** * Adds one feature to the IntervalStore that can manage nested features * (creating the IntervalStore if necessary) */ protected synchronized void addNestedFeature(SequenceFeature feature) { if (features == null) { features = new IntervalStore<>(); } features.add(feature); featuresList.add(feature); } /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain * ordered, and returns true. This method allows duplicate features to be * added, so test before calling to avoid this. * * @param feature * @return */ protected synchronized boolean addContactFeature(SequenceFeature feature) { if (contactFeatureStarts == null) { contactFeatureStarts = new ArrayList<>(); } if (contactFeatureEnds == null) { contactFeatureEnds = new ArrayList<>(); } /* * insert into list sorted by start (first contact position): * binary search the sorted list to find the insertion point */ int insertPosition = BinarySearcher.findFirst(contactFeatureStarts, f -> f.getBegin() >= feature.getBegin()); contactFeatureStarts.add(insertPosition, feature); /* * insert into list sorted by end (second contact position): * binary search the sorted list to find the insertion point */ insertPosition = BinarySearcher.findFirst(contactFeatureEnds, f -> f.getEnd() >= feature.getEnd()); contactFeatureEnds.add(insertPosition, feature); return true; } /** * Answers true if the list contains the feature, else false. This method is * optimised for the condition that the list is sorted on feature start * position ascending, and will give unreliable results if this does not hold. * * @param features * @param feature * @return */ protected static boolean listContains(List features, SequenceFeature feature) { if (features == null || feature == null) { return false; } /* * locate the first entry in the list which does not precede the feature */ // int pos = binarySearch(features, // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); int pos = BinarySearcher.findFirst(features, val -> val.getBegin() >= feature.getBegin()); int len = features.size(); while (pos < len) { SequenceFeature sf = features.get(pos); if (sf.getBegin() > feature.getBegin()) { return false; // no match found } if (sf.equals(feature)) { return true; } pos++; } return false; } /** * Returns a (possibly empty) list of features whose extent overlaps the given * range. The returned list is not ordered. Contact features are included if * either of the contact points lies within the range. * * @param start * start position of overlap range (inclusive) * @param end * end position of overlap range (inclusive) * @return */ public List findOverlappingFeatures(long start, long end) { List result = new ArrayList<>(); findContactFeatures(start, end, result); if (features != null) { result.addAll(features.findOverlaps(start, end)); } return result; } /** * Adds contact features to the result list where either the second or the * first contact position lies within the target range * * @param from * @param to * @param result */ protected void findContactFeatures(long from, long to, List result) { if (contactFeatureStarts != null) { findContactStartOverlaps(from, to, result); } if (contactFeatureEnds != null) { findContactEndOverlaps(from, to, result); } } /** * Adds to the result list any contact features whose end (second contact * point), but not start (first contact point), lies in the query from-to * range * * @param from * @param to * @param result */ protected void findContactEndOverlaps(long from, long to, List result) { /* * find the first contact feature (if any) * whose end point is not before the target range */ int index = BinarySearcher.findFirst(contactFeatureEnds, f -> f.getEnd() >= from); while (index < contactFeatureEnds.size()) { SequenceFeature sf = contactFeatureEnds.get(index); if (!sf.isContactFeature()) { System.err.println("Error! non-contact feature type " + sf.getType() + " in contact features list"); index++; continue; } int begin = sf.getBegin(); if (begin >= from && begin <= to) { /* * this feature's first contact position lies in the search range * so we don't include it in results a second time */ index++; continue; } if (sf.getEnd() > to) { /* * this feature (and all following) has end point after the target range */ break; } /* * feature has end >= from and end <= to * i.e. contact end point lies within overlap search range */ result.add(sf); index++; } } /** * Adds contact features whose start position lies in the from-to range to the * result list * * @param from * @param to * @param result */ protected void findContactStartOverlaps(long from, long to, List result) { int index = BinarySearcher.findFirst(contactFeatureStarts, f -> f.getBegin() >= from); while (index < contactFeatureStarts.size()) { SequenceFeature sf = contactFeatureStarts.get(index); if (!sf.isContactFeature()) { System.err.println("Error! non-contact feature " + sf.toString() + " in contact features list"); index++; continue; } if (sf.getBegin() > to) { /* * this feature's start (and all following) follows the target range */ break; } /* * feature has begin >= from and begin <= to * i.e. contact start point lies within overlap search range */ result.add(sf); index++; } } /** * Answers a list of all positional features stored, in no guaranteed order * * @return */ public List getPositionalFeatures() { List result = new ArrayList<>(); /* * add any contact features - from the list by start position */ if (contactFeatureStarts != null) { result.addAll(contactFeatureStarts); } /* * add any nested features */ if (features != null) { result.addAll(features); } return result; } /** * Answers a list of all contact features. If there are none, returns an * immutable empty list. * * @return */ public List getContactFeatures() { if (contactFeatureStarts == null) { return Collections.emptyList(); } return new ArrayList<>(contactFeatureStarts); } /** * Answers a list of all non-positional features. If there are none, returns * an immutable empty list. * * @return */ public List getNonPositionalFeatures() { if (nonPositionalFeatures == null) { return Collections.emptyList(); } return new ArrayList<>(nonPositionalFeatures); } /** * Deletes the given feature from the store, returning true if it was found * (and deleted), else false. This method makes no assumption that the feature * is in the 'expected' place in the store, in case it has been modified since * it was added. * * @param sf */ public synchronized boolean delete(SequenceFeature sf) { boolean removed = false; /* * try contact positions (and if found, delete * from both lists of contact positions) */ if (!removed && contactFeatureStarts != null) { removed = contactFeatureStarts.remove(sf); if (removed) { contactFeatureEnds.remove(sf); } } boolean removedNonPositional = false; /* * if not found, try non-positional features */ if (!removed && nonPositionalFeatures != null) { removedNonPositional = nonPositionalFeatures.remove(sf); removed = removedNonPositional; } /* * if not found, try nested features */ if (!removed && features != null) { removed = features.remove(sf); featuresList.remove(sf); } if (removed) { rescanAfterDelete(); } return removed; } /** * Rescan all features to recompute any cached values after an entry has been * deleted. This is expected to be an infrequent event, so performance here is * not critical. */ protected synchronized void rescanAfterDelete() { positionalFeatureGroups.clear(); nonPositionalFeatureGroups.clear(); totalExtent = 0; positionalMinScore = Float.NaN; positionalMaxScore = Float.NaN; nonPositionalMinScore = Float.NaN; nonPositionalMaxScore = Float.NaN; /* * scan non-positional features for groups and scores */ for (SequenceFeature sf : getNonPositionalFeatures()) { nonPositionalFeatureGroups.add(sf.getFeatureGroup()); float score = sf.getScore(); nonPositionalMinScore = min(nonPositionalMinScore, score); nonPositionalMaxScore = max(nonPositionalMaxScore, score); } /* * scan positional features for groups, scores and extents */ for (SequenceFeature sf : getPositionalFeatures()) { positionalFeatureGroups.add(sf.getFeatureGroup()); float score = sf.getScore(); positionalMinScore = min(positionalMinScore, score); positionalMaxScore = max(positionalMaxScore, score); totalExtent += getFeatureLength(sf); } } /** * A helper method to return the minimum of two floats, where a non-NaN value * is treated as 'less than' a NaN value (unlike Math.min which does the * opposite) * * @param f1 * @param f2 */ protected static float min(float f1, float f2) { if (Float.isNaN(f1)) { return Float.isNaN(f2) ? f1 : f2; } else { return Float.isNaN(f2) ? f1 : Math.min(f1, f2); } } /** * A helper method to return the maximum of two floats, where a non-NaN value * is treated as 'greater than' a NaN value (unlike Math.max which does the * opposite) * * @param f1 * @param f2 */ protected static float max(float f1, float f2) { if (Float.isNaN(f1)) { return Float.isNaN(f2) ? f1 : f2; } else { return Float.isNaN(f2) ? f1 : Math.max(f1, f2); } } /** * Answers true if this store has no features, else false * * @return */ public boolean isEmpty() { boolean hasFeatures = (contactFeatureStarts != null && !contactFeatureStarts.isEmpty()) || (nonPositionalFeatures != null && !nonPositionalFeatures.isEmpty()) || (features != null && features.size() > 0); return !hasFeatures; } /** * Answers the set of distinct feature groups stored, possibly including null, * as an unmodifiable view of the set. The parameter determines whether the * groups for positional or for non-positional features are returned. * * @param positionalFeatures * @return */ public Set getFeatureGroups(boolean positionalFeatures) { if (positionalFeatures) { return Collections.unmodifiableSet(positionalFeatureGroups); } else { return nonPositionalFeatureGroups == null ? Collections. emptySet() : Collections.unmodifiableSet(nonPositionalFeatureGroups); } } /** * Answers the number of positional (or non-positional) features stored. * Contact features count as 1. * * @param positional * @return */ public int getFeatureCount(boolean positional) { if (!positional) { return nonPositionalFeatures == null ? 0 : nonPositionalFeatures.size(); } int size = 0; if (contactFeatureStarts != null) { // note a contact feature (start/end) counts as one size += contactFeatureStarts.size(); } if (features != null) { size += features.size(); } return size; } /** * Answers the total length of positional features (or zero if there are * none). Contact features contribute a value of 1 to the total. * * @return */ public int getTotalFeatureLength() { return totalExtent; } /** * Answers the minimum score held for positional or non-positional features. * This may be Float.NaN if there are no features, are none has a non-NaN * score. * * @param positional * @return */ public float getMinimumScore(boolean positional) { return positional ? positionalMinScore : nonPositionalMinScore; } /** * Answers the maximum score held for positional or non-positional features. * This may be Float.NaN if there are no features, are none has a non-NaN * score. * * @param positional * @return */ public float getMaximumScore(boolean positional) { return positional ? positionalMaxScore : nonPositionalMaxScore; } /** * Answers a list of all either positional or non-positional features whose * feature group matches the given group (which may be null) * * @param positional * @param group * @return */ public List getFeaturesForGroup(boolean positional, String group) { List result = new ArrayList<>(); /* * if we know features don't include the target group, no need * to inspect them for matches */ if (positional && !positionalFeatureGroups.contains(group) || !positional && !nonPositionalFeatureGroups.contains(group)) { return result; } List sfs = positional ? getPositionalFeatures() : getNonPositionalFeatures(); for (SequenceFeature sf : sfs) { String featureGroup = sf.getFeatureGroup(); if (group == null && featureGroup == null || group != null && group.equals(featureGroup)) { result.add(sf); } } return result; } /** * Adds the shift amount to the start and end of all positional features whose * start position is at or after fromPosition. Returns true if at least one * feature was shifted, else false. * * @param fromPosition * @param shiftBy * @return */ public synchronized boolean shiftFeatures(int fromPosition, int shiftBy) { /* * Because begin and end are final fields (to ensure the data store's * integrity), we have to delete each feature and re-add it as amended. * (Although a simple shift of all values would preserve data integrity!) */ boolean modified = false; for (SequenceFeature sf : getPositionalFeatures()) { if (sf.getBegin() >= fromPosition) { modified = true; int newBegin = sf.getBegin() + shiftBy; int newEnd = sf.getEnd() + shiftBy; /* * sanity check: don't shift left of the first residue */ if (newEnd > 0) { newBegin = Math.max(1, newBegin); SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd, sf.getFeatureGroup(), sf.getScore()); addFeature(sf2); } delete(sf); } } return modified; } /** * Find all features containing this position. * * @param pos * @return list of SequenceFeatures * @author Bob Hanson 2019.07.30 */ public List findOverlappingFeatures(int pos, List result) { if (result == null) { result = new ArrayList<>(); } if (contactFeatureStarts != null) { findContacts(contactFeatureStarts, pos, result, true); findContacts(contactFeatureEnds, pos, result, false); } if (featuresList != null) { findOverlaps(featuresList, pos, result); } return result; } /** * Binary search for contact start or end at a given (Overview) position. * * @param l * @param pos * @param result * @param isStart * * @author Bob Hanson 2019.07.30 */ private static void findContacts(List l, int pos, List result, boolean isStart) { int low = 0; int high = l.size() - 1; while (low <= high) { int mid = (low + high) >>> 1; SequenceFeature f = l.get(mid); switch (Long.signum((isStart ? f.begin : f.end) - pos)) { case -1: low = mid + 1; continue; case 1: high = mid - 1; continue; case 0: int m = mid; result.add(f); // could be "5" in 12345556788 ? while (++mid <= high && (f = l.get(mid)) != null && (isStart ? f.begin : f.end) == pos) { result.add(f); } while (--m >= low && (f = l.get(m)) != null && (isStart ? f.begin : f.end) == pos) { result.add(f); } return; } } } BitSet bs = new BitSet(); /** * Double binary sort with bitset correlation * * * @param features * @param pos * @param result */ private void findOverlaps(List features, int pos, List result) { int n = featuresList.size(); if (n == 1) { checkOne(featuresList.get(0), pos, result); return; } if (orderedFeatureStarts == null) { rebuildArrays(n); } SequenceFeature sf = findClosestFeature(orderedFeatureStarts, pos); while (sf != null) { if (sf.end >= pos) { result.add(sf); } sf = sf.containedBy; } } private void linkFeatures(SequenceFeature[] intervals) { if (intervals.length < 2) { return; } int maxEnd = intervals[0].end; for (int i = 1, n = intervals.length; i < n; i++) { SequenceFeature ithis = intervals[i]; if (ithis.begin <= maxEnd) { ithis.containedBy = getContainedBy(intervals[i - 1], ithis); } if (ithis.end > maxEnd) { maxEnd = ithis.end; } } } private SequenceFeature getContainedBy(SequenceFeature sf, SequenceFeature sf0) { int begin = sf0.begin; while (sf != null) { if (begin <= sf.end) { System.out.println("\nFS found " + sf0.index + ":" + sf0 + "\nFS in " + sf.index + ":" + sf); return sf; } sf = sf.containedBy; } return null; } private SequenceFeature findClosestFeature(SequenceFeature[] l, int pos) { int low = 0; int high = l.length - 1; while (low <= high) { int mid = (low + high) >>> 1; SequenceFeature f = l[mid]; switch (Long.signum(f.begin - pos)) { case -1: low = mid + 1; continue; case 1: high = mid - 1; continue; case 0: while (++mid <= high && l[mid].begin == pos) { ; } mid--; return l[mid]; } } // -1 here? return (high < 0 || low >= l.length ? null : l[high]); } private void checkOne(SequenceFeature sf, int pos, List result) { if (sf.begin <= pos && sf.end >= pos) { result.add(sf); } return; } /* * contact features ordered by first contact position */ private SequenceFeature[] orderedFeatureStarts; private void rebuildArrays(int n) { if (startComp == null) { startComp = new StartComparator(); } orderedFeatureStarts = new SequenceFeature[n]; for (int i = n; --i >= 0;) { SequenceFeature sf = featuresList.get(i); sf.index = i; orderedFeatureStarts[i] = sf; } Arrays.sort(orderedFeatureStarts, startComp); linkFeatures(orderedFeatureStarts); } class StartComparator implements Comparator { int pos; @Override public int compare(SequenceFeature o1, SequenceFeature o2) { int p1 = o1.begin; int p2 = o2.begin; return (p1 < p2 ? -1 : p1 > p2 ? 1 : 0); } } static StartComparator startComp; // class EndComparator implements Comparator // { // // int pos; // // @Override // public int compare(SequenceFeature o1, SequenceFeature o2) // { // int p1 = o1.end; // int p2 = o2.end; // int val = (p1 < p2 ? 1 : p1 > p2 ? -1 : 0); // return val; // } // // } // // static EndComparator endComp; }