X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FFeatureStore.java;h=02ce1c50a5f28eb59603bab4d7b7f7d335c4cfaa;hb=5969ead0fd401f4435a3435ede9e4e87cf9c1d0b;hp=cb0c36e91e6ebd4e64a878f87e444e03a9fae980;hpb=6a2fd9a13d79f316acdd4fcff066b8cfaaefa939;p=jalview.git diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index cb0c36e..02ce1c5 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -1,5 +1,26 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel.features; +import jalview.datamodel.ContiguousI; import jalview.datamodel.SequenceFeature; import java.util.ArrayList; @@ -32,6 +53,13 @@ public class FeatureStore */ abstract boolean compare(SequenceFeature entry); + /** + * serves a search condition for finding the first feature whose start + * position follows a given target location + * + * @param target + * @return + */ static SearchCriterion byStart(final long target) { return new SearchCriterion() { @@ -44,6 +72,13 @@ public class FeatureStore }; } + /** + * serves a search condition for finding the first feature whose end + * position is at or follows a given target location + * + * @param target + * @return + */ static SearchCriterion byEnd(final long target) { return new SearchCriterion() @@ -57,6 +92,13 @@ public class FeatureStore }; } + /** + * serves a search condition for finding the first feature which follows the + * given range as determined by a supplied comparator + * + * @param target + * @return + */ static SearchCriterion byFeature(final ContiguousI to, final Comparator rc) { @@ -72,10 +114,6 @@ public class FeatureStore } } - Comparator startOrdering = new RangeComparator(true); - - Comparator endOrdering = new RangeComparator(false); - /* * Non-positional features have no (zero) start/end position. * Kept as a separate list in case this criterion changes in future. @@ -127,6 +165,14 @@ public class FeatureStore */ int totalExtent; + float positionalMinScore; + + float positionalMaxScore; + + float nonPositionalMinScore; + + float nonPositionalMaxScore; + /** * Constructor */ @@ -134,6 +180,11 @@ public class FeatureStore { nonNestedFeatures = new ArrayList(); positionalFeatureGroups = new HashSet(); + nonPositionalFeatureGroups = new HashSet(); + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; // we only construct nonPositionalFeatures, contactFeatures // or the NCList if we need to @@ -149,6 +200,11 @@ public class FeatureStore */ public boolean addFeature(SequenceFeature feature) { + if (contains(feature)) + { + return false; + } + /* * keep a record of feature groups */ @@ -169,40 +225,104 @@ public class FeatureStore } else { - if (!nonNestedFeatures.contains(feature)) + added = addNonNestedFeature(feature); + if (!added) { - added = addNonNestedFeature(feature); - if (!added) + /* + * detected a nested feature - put it in the NCList structure + */ + added = addNestedFeature(feature); + } + } + + if (added) + { + /* + * record the total extent of positional features, to make + * getTotalFeatureLength possible; we count the length of a + * contact feature as 1 + */ + totalExtent += getFeatureLength(feature); + + /* + * record the minimum and maximum score for positional + * and non-positional features + */ + float score = feature.getScore(); + if (!Float.isNaN(score)) + { + if (feature.isNonPositional()) { - /* - * detected a nested feature - put it in the NCList structure - */ - added = addNestedFeature(feature); + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); + } + else + { + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); } } } - /* - * record the total extent of positional features, to make - * getAverageFeatureLength possible; we count the length of a - * contact feature as 1 - */ - if (added && !feature.isNonPositional()) + return added; + } + + /** + * Answers true if this store contains the given feature (testing by + * SequenceFeature.equals), else false + * + * @param feature + * @return + */ + public boolean contains(SequenceFeature feature) + { + if (feature.isNonPositional()) { - int featureLength = feature.isContactFeature() ? 1 : 1 - + feature.getEnd() - feature.getBegin(); - totalExtent += featureLength; + return nonPositionalFeatures == null ? false : nonPositionalFeatures + .contains(feature); } - return added; + if (feature.isContactFeature()) + { + return contactFeatureStarts == null ? false : listContains( + contactFeatureStarts, feature); + } + + if (listContains(nonNestedFeatures, feature)) + { + return true; + } + + return nestedFeatures == null ? false : nestedFeatures + .contains(feature); + } + + /** + * Answers the 'length' of the feature, counting 0 for non-positional features + * and 1 for contact features + * + * @param feature + * @return + */ + protected static int getFeatureLength(SequenceFeature feature) + { + if (feature.isNonPositional()) + { + return 0; + } + if (feature.isContactFeature()) + { + return 1; + } + return 1 + feature.getEnd() - feature.getBegin(); } /** * Adds the feature to the list of non-positional features (with lazy - * instantiation of the list if it is null), and returns true. If the - * non-positional features already include the new feature (by equality test), - * then it is not added, and this method returns false. The feature group is - * added to the set of distinct feature groups for non-positional features. + * instantiation of the list if it is null), and returns true. The feature + * group is added to the set of distinct feature groups for non-positional + * features. This method allows duplicate features, so test before calling to + * prevent this. * * @param feature */ @@ -211,11 +331,6 @@ public class FeatureStore if (nonPositionalFeatures == null) { nonPositionalFeatures = new ArrayList(); - nonPositionalFeatureGroups = new HashSet(); - } - if (nonPositionalFeatures.contains(feature)) - { - return false; } nonPositionalFeatures.add(feature); @@ -235,7 +350,7 @@ public class FeatureStore { if (nestedFeatures == null) { - nestedFeatures = new NCList(feature); + nestedFeatures = new NCList<>(feature); return true; } return nestedFeatures.add(feature, false); @@ -259,7 +374,7 @@ public class FeatureStore * find the first stored feature which doesn't precede the new one */ int insertPosition = binarySearch(nonNestedFeatures, - SearchCriterion.byFeature(feature, startOrdering)); + SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); /* * fail if we detect feature enclosure - of the new feature by @@ -316,8 +431,8 @@ public class FeatureStore /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain - * ordered, and returns true. If the contact feature lists already contain the - * given feature (by test for equality), does not add it and returns false. + * ordered, and returns true. This method allows duplicate features to be + * added, so test before calling to avoid this. * * @param feature * @return @@ -333,18 +448,62 @@ public class FeatureStore contactFeatureEnds = new ArrayList(); } - // TODO binary search for insertion points! - if (contactFeatureStarts.contains(feature)) + /* + * binary search the sorted list to find the insertion point + */ + int insertPosition = binarySearch(contactFeatureStarts, + SearchCriterion.byFeature(feature, + RangeComparator.BY_START_POSITION)); + contactFeatureStarts.add(insertPosition, feature); + // and resort to mak siccar...just in case insertion point not quite right + Collections.sort(contactFeatureStarts, RangeComparator.BY_START_POSITION); + + insertPosition = binarySearch(contactFeatureStarts, + SearchCriterion.byFeature(feature, + RangeComparator.BY_END_POSITION)); + contactFeatureEnds.add(feature); + Collections.sort(contactFeatureEnds, RangeComparator.BY_END_POSITION); + + return true; + } + + /** + * Answers true if the list contains the feature, else false. This method is + * optimised for the condition that the list is sorted on feature start + * position ascending, and will give unreliable results if this does not hold. + * + * @param features + * @param feature + * @return + */ + protected static boolean listContains(List features, + SequenceFeature feature) + { + if (features == null || feature == null) { return false; } - contactFeatureStarts.add(feature); - Collections.sort(contactFeatureStarts, startOrdering); - contactFeatureEnds.add(feature); - Collections.sort(contactFeatureEnds, endOrdering); - - return true; + /* + * locate the first entry in the list which does not precede the feature + */ + int pos = binarySearch(features, + SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION)); + int len = features.size(); + while (pos < len) + { + SequenceFeature sf = features.get(pos); + if (sf.getBegin() > feature.getBegin()) + { + return false; // no match found + } + if (sf.equals(feature)) + { + return true; + } + pos++; + } + return false; } /** @@ -360,7 +519,7 @@ public class FeatureStore */ public List findOverlappingFeatures(long start, long end) { - List result = new ArrayList(); + List result = new ArrayList<>(); findNonNestedFeatures(start, end, result); @@ -457,28 +616,15 @@ public class FeatureStore protected void findNonNestedFeatures(long from, long to, List result) { + /* + * find the first feature whose end position is + * after the target range start + */ int startIndex = binarySearch(nonNestedFeatures, SearchCriterion.byEnd(from)); - findNonNestedFeatures(startIndex, from, to, result); - } - - /** - * Scans the list of non-nested features, starting from startIndex, to find - * those that overlap the from-to range, and adds them to the result list. - * Returns the index of the first feature whose start position is after the - * target range (or the length of the whole list if none such feature exists). - * - * @param startIndex - * @param from - * @param to - * @param result - * @return - */ - protected int findNonNestedFeatures(final int startIndex, long from, - long to, List result) - { - int i = startIndex; + final int startIndex1 = startIndex; + int i = startIndex1; while (i < nonNestedFeatures.size()) { SequenceFeature sf = nonNestedFeatures.get(i); @@ -486,15 +632,12 @@ public class FeatureStore { break; } - int start = sf.getBegin(); - int end = sf.getEnd(); - if (start <= to && end >= from) + if (sf.getBegin() <= to && sf.getEnd() >= from) { result.add(sf); } i++; } - return i; } /** @@ -538,7 +681,7 @@ public class FeatureStore /* * add non-nested features (may be all features for many cases) */ - List result = new ArrayList(); + List result = new ArrayList<>(); result.addAll(nonNestedFeatures); /* @@ -572,7 +715,7 @@ public class FeatureStore { return Collections.emptyList(); } - return new ArrayList(contactFeatureStarts); + return new ArrayList<>(contactFeatureStarts); } /** @@ -587,7 +730,7 @@ public class FeatureStore { return Collections.emptyList(); } - return new ArrayList(nonPositionalFeatures); + return new ArrayList<>(nonPositionalFeatures); } /** @@ -639,67 +782,89 @@ public class FeatureStore if (removed) { - rebuildFeatureGroups(sf.getFeatureGroup(), removedNonPositional); - // TODO and recalculate totalExtent (feature may have changed length!) + rescanAfterDelete(); } return removed; } /** - * Check whether the given feature group is still represented, in either - * positional or non-positional features, and if not, remove it from the set - * of feature groups + * Rescan all features to recompute any cached values after an entry has been + * deleted. This is expected to be an infrequent event, so performance here is + * not critical. + */ + protected synchronized void rescanAfterDelete() + { + positionalFeatureGroups.clear(); + nonPositionalFeatureGroups.clear(); + totalExtent = 0; + positionalMinScore = Float.NaN; + positionalMaxScore = Float.NaN; + nonPositionalMinScore = Float.NaN; + nonPositionalMaxScore = Float.NaN; + + /* + * scan non-positional features for groups and scores + */ + for (SequenceFeature sf : getNonPositionalFeatures()) + { + nonPositionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + nonPositionalMinScore = min(nonPositionalMinScore, score); + nonPositionalMaxScore = max(nonPositionalMaxScore, score); + } + + /* + * scan positional features for groups, scores and extents + */ + for (SequenceFeature sf : getPositionalFeatures()) + { + positionalFeatureGroups.add(sf.getFeatureGroup()); + float score = sf.getScore(); + positionalMinScore = min(positionalMinScore, score); + positionalMaxScore = max(positionalMaxScore, score); + totalExtent += getFeatureLength(sf); + } + } + + /** + * A helper method to return the minimum of two floats, where a non-NaN value + * is treated as 'less than' a NaN value (unlike Math.min which does the + * opposite) * - * @param featureGroup - * @param nonPositional + * @param f1 + * @param f2 */ - protected void rebuildFeatureGroups(String featureGroup, - boolean nonPositional) + protected static float min(float f1, float f2) { - if (nonPositional && nonPositionalFeatures != null) + if (Float.isNaN(f1)) { - boolean found = false; - for (SequenceFeature sf : nonPositionalFeatures) - { - String group = sf.getFeatureGroup(); - if (featureGroup == group - || (featureGroup != null && featureGroup.equals(group))) - { - found = true; - break; - } - } - if (!found) - { - nonPositionalFeatureGroups.remove(featureGroup); - } + return Float.isNaN(f2) ? f1 : f2; } - else if (!findFeatureGroup(featureGroup)) + else { - positionalFeatureGroups.remove(featureGroup); + return Float.isNaN(f2) ? f1 : Math.min(f1, f2); } } /** - * Scans all positional features to check whether the given feature group is - * found, and returns true if found, else false + * A helper method to return the maximum of two floats, where a non-NaN value + * is treated as 'greater than' a NaN value (unlike Math.max which does the + * opposite) * - * @param featureGroup - * @return + * @param f1 + * @param f2 */ - protected boolean findFeatureGroup(String featureGroup) + protected static float max(float f1, float f2) { - for (SequenceFeature sf : getPositionalFeatures()) + if (Float.isNaN(f1)) { - String group = sf.getFeatureGroup(); - if (group == featureGroup - || (group != null && group.equals(featureGroup))) - { - return true; - } + return Float.isNaN(f2) ? f1 : f2; + } + else + { + return Float.isNaN(f2) ? f1 : Math.max(f1, f2); } - return false; } /** @@ -750,7 +915,7 @@ public class FeatureStore * @param sc * @return */ - protected int binarySearch(List features, + protected static int binarySearch(List features, SearchCriterion sc) { int start = 0; @@ -817,4 +982,103 @@ public class FeatureStore { return totalExtent; } + + /** + * Answers the minimum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. + * + * @param positional + * @return + */ + public float getMinimumScore(boolean positional) + { + return positional ? positionalMinScore : nonPositionalMinScore; + } + + /** + * Answers the maximum score held for positional or non-positional features. + * This may be Float.NaN if there are no features, are none has a non-NaN + * score. + * + * @param positional + * @return + */ + public float getMaximumScore(boolean positional) + { + return positional ? positionalMaxScore : nonPositionalMaxScore; + } + + /** + * Answers a list of all either positional or non-positional features whose + * feature group matches the given group (which may be null) + * + * @param positional + * @param group + * @return + */ + public List getFeaturesForGroup(boolean positional, + String group) + { + List result = new ArrayList<>(); + + /* + * if we know features don't include the target group, no need + * to inspect them for matches + */ + if (positional && !positionalFeatureGroups.contains(group) + || !positional && !nonPositionalFeatureGroups.contains(group)) + { + return result; + } + + List sfs = positional ? getPositionalFeatures() + : getNonPositionalFeatures(); + for (SequenceFeature sf : sfs) + { + String featureGroup = sf.getFeatureGroup(); + if (group == null && featureGroup == null || group != null + && group.equals(featureGroup)) + { + result.add(sf); + } + } + return result; + } + + /** + * Adds the shift value to the start and end of all positional features. + * Returns true if at least one feature was updated, else false. + * + * @param shift + * @return + */ + public synchronized boolean shiftFeatures(int shift) + { + /* + * Because begin and end are final fields (to ensure the data store's + * integrity), we have to delete each feature and re-add it as amended. + * (Although a simple shift of all values would preserve data integrity!) + */ + boolean modified = false; + for (SequenceFeature sf : getPositionalFeatures()) + { + modified = true; + int newBegin = sf.getBegin() + shift; + int newEnd = sf.getEnd() + shift; + + /* + * sanity check: don't shift left of the first residue + */ + if (newEnd > 0) + { + newBegin = Math.max(1, newBegin); + SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd, + sf.getFeatureGroup(), sf.getScore()); + addFeature(sf2); + } + delete(sf); + } + return modified; + } }