2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.features;
23 import jalview.datamodel.SequenceFeature;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.HashSet;
29 import java.util.List;
32 import intervalstore.impl.BinarySearcher;
34 public abstract class FeatureStore implements FeatureStoreI
38 * Non-positional features have no (zero) start/end position.
39 * Kept as a separate list in case this criterion changes in future.
41 List<SequenceFeature> nonPositionalFeatures;
44 * contact features ordered by first contact position
46 List<SequenceFeature> contactFeatureStarts;
49 * contact features ordered by second contact position
51 List<SequenceFeature> contactFeatureEnds;
54 * IntervalStore holds remaining features and provides efficient
55 * query for features overlapping any given interval
57 Collection<SequenceFeature> features;
60 public Collection<SequenceFeature> getFeatures()
66 * Feature groups represented in stored positional features
67 * (possibly including null)
69 Set<String> positionalFeatureGroups;
72 * Feature groups represented in stored non-positional features
73 * (possibly including null)
75 Set<String> nonPositionalFeatureGroups;
78 * the total length of all positional features; contact features count 1 to
79 * the total and 1 to size(), consistent with an average 'feature length' of 1
83 float positionalMinScore;
85 float positionalMaxScore;
87 float nonPositionalMinScore;
89 float nonPositionalMaxScore;
96 positionalFeatureGroups = new HashSet<>();
97 nonPositionalFeatureGroups = new HashSet<>();
98 positionalMinScore = Float.NaN;
99 positionalMaxScore = Float.NaN;
100 nonPositionalMinScore = Float.NaN;
101 nonPositionalMaxScore = Float.NaN;
103 // we only construct nonPositionalFeatures, contactFeatures if we need to
107 * Adds one sequence feature to the store, and returns true, unless the
108 * feature is already contained in the store, in which case this method
109 * returns false. Containment is determined by SequenceFeature.equals()
116 public boolean addFeature(SequenceFeature feature)
118 if (contains(feature))
124 * keep a record of feature groups
126 if (!feature.isNonPositional())
128 positionalFeatureGroups.add(feature.getFeatureGroup());
131 if (feature.isContactFeature())
133 addContactFeature(feature);
135 else if (feature.isNonPositional())
137 addNonPositionalFeature(feature);
141 addNestedFeature(feature);
145 * record the total extent of positional features, to make
146 * getTotalFeatureLength possible; we count the length of a
147 * contact feature as 1
149 totalExtent += getFeatureLength(feature);
152 * record the minimum and maximum score for positional
153 * and non-positional features
155 float score = feature.getScore();
156 if (!Float.isNaN(score))
158 if (feature.isNonPositional())
160 nonPositionalMinScore = min(nonPositionalMinScore, score);
161 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
165 positionalMinScore = min(positionalMinScore, score);
166 positionalMaxScore = max(positionalMaxScore, score);
174 * Answers true if this store contains the given feature (testing by
175 * SequenceFeature.equals), else false
181 public boolean contains(SequenceFeature feature)
183 if (feature.isNonPositional())
185 return nonPositionalFeatures == null ? false
186 : nonPositionalFeatures.contains(feature);
189 if (feature.isContactFeature())
191 return contactFeatureStarts != null
192 && listContains(contactFeatureStarts, feature);
195 return features == null ? false : features.contains(feature);
199 * Answers the 'length' of the feature, counting 0 for non-positional features
200 * and 1 for contact features
205 protected static int getFeatureLength(SequenceFeature feature)
207 if (feature.isNonPositional())
211 if (feature.isContactFeature())
215 return 1 + feature.getEnd() - feature.getBegin();
219 * Adds the feature to the list of non-positional features (with lazy
220 * instantiation of the list if it is null), and returns true. The feature
221 * group is added to the set of distinct feature groups for non-positional
222 * features. This method allows duplicate features, so test before calling to
227 protected boolean addNonPositionalFeature(SequenceFeature feature)
229 if (nonPositionalFeatures == null)
231 nonPositionalFeatures = new ArrayList<>();
234 nonPositionalFeatures.add(feature);
236 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
242 * Adds one feature to the IntervalStore that can manage nested features
243 * (creating the IntervalStore if necessary)
245 protected synchronized void addNestedFeature(SequenceFeature feature)
247 features.add(feature);
250 * Add a contact feature to the lists that hold them ordered by start (first
251 * contact) and by end (second contact) position, ensuring the lists remain
252 * ordered, and returns true. This method allows duplicate features to be
253 * added, so test before calling to avoid this.
258 protected synchronized boolean addContactFeature(SequenceFeature feature)
260 if (contactFeatureStarts == null)
262 contactFeatureStarts = new ArrayList<>();
263 contactFeatureEnds = new ArrayList<>();
267 * insert into list sorted by start (first contact position):
268 * binary search the sorted list to find the insertion point
270 int insertPosition = BinarySearcher.findFirst(contactFeatureStarts,
271 f -> f.getBegin() >= feature.getBegin());
272 contactFeatureStarts.add(insertPosition, feature);
275 * insert into list sorted by end (second contact position):
276 * binary search the sorted list to find the insertion point
278 insertPosition = BinarySearcher.findFirst(contactFeatureEnds,
279 f -> f.getEnd() >= feature.getEnd());
280 contactFeatureEnds.add(insertPosition, feature);
286 * Answers true if the list contains the feature, else false. This method is
287 * optimised for the condition that the list is sorted on feature start
288 * position ascending, and will give unreliable results if this does not hold.
294 protected static boolean listContains(List<SequenceFeature> features,
295 SequenceFeature feature)
297 if (features == null || feature == null)
303 * locate the first entry in the list which does not precede the feature
305 // int pos = binarySearch(features,
306 // SearchCriterion.byFeature(feature, RangeComparator.BY_START_POSITION));
307 int pos = BinarySearcher.findFirst(features,
308 val -> val.getBegin() >= feature.getBegin());
309 int len = features.size();
312 SequenceFeature sf = features.get(pos);
313 if (sf.getBegin() > feature.getBegin())
315 return false; // no match found
317 if (sf.equals(feature))
326 abstract protected void findContactFeatures(long from, long to,
327 List<SequenceFeature> result);
330 * Answers a list of all positional features stored, in no guaranteed order
336 public List<SequenceFeature> getPositionalFeatures(
337 List<SequenceFeature> result)
341 * add any contact features - from the list by start position
343 if (contactFeatureStarts != null)
345 result.addAll(contactFeatureStarts);
349 * add any nested features
351 if (features != null)
353 result.addAll(features);
360 * Answers a list of all contact features. If there are none, returns an
361 * immutable empty list.
367 public List<SequenceFeature> getContactFeatures(
368 List<SequenceFeature> result)
370 if (contactFeatureStarts != null)
372 result.addAll(contactFeatureStarts);
378 * Answers a list of all non-positional features. If there are none, returns
379 * an immutable empty list.
385 public List<SequenceFeature> getNonPositionalFeatures(
386 List<SequenceFeature> result)
388 if (nonPositionalFeatures != null)
390 result.addAll(nonPositionalFeatures);
396 * Deletes the given feature from the store, returning true if it was found
397 * (and deleted), else false. This method makes no assumption that the feature
398 * is in the 'expected' place in the store, in case it has been modified since
405 public synchronized boolean delete(SequenceFeature sf)
407 boolean removed = false;
410 * try contact positions (and if found, delete
411 * from both lists of contact positions)
413 if (!removed && contactFeatureStarts != null)
415 removed = contactFeatureStarts.remove(sf);
418 contactFeatureEnds.remove(sf);
422 boolean removedNonPositional = false;
425 * if not found, try non-positional features
427 if (!removed && nonPositionalFeatures != null)
429 removedNonPositional = nonPositionalFeatures.remove(sf);
430 removed = removedNonPositional;
434 * if not found, try nested features
436 if (!removed && features != null)
438 removed = features.remove(sf);
450 * Rescan all features to recompute any cached values after an entry has been
451 * deleted. This is expected to be an infrequent event, so performance here is
454 protected synchronized void rescanAfterDelete()
456 positionalFeatureGroups.clear();
457 nonPositionalFeatureGroups.clear();
459 positionalMinScore = Float.NaN;
460 positionalMaxScore = Float.NaN;
461 nonPositionalMinScore = Float.NaN;
462 nonPositionalMaxScore = Float.NaN;
464 * scan non-positional features for groups and scores
466 if (nonPositionalFeatures != null)
468 for (SequenceFeature sf : nonPositionalFeatures)
470 nonPositionalFeatureGroups.add(sf.getFeatureGroup());
471 float score = sf.getScore();
472 nonPositionalMinScore = min(nonPositionalMinScore, score);
473 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
478 * scan positional features for groups, scores and extents
481 rescanPositional(contactFeatureStarts);
482 rescanPositional(features);
485 private void rescanPositional(Collection<SequenceFeature> sfs)
491 for (SequenceFeature sf : sfs)
493 positionalFeatureGroups.add(sf.getFeatureGroup());
494 float score = sf.getScore();
495 positionalMinScore = min(positionalMinScore, score);
496 positionalMaxScore = max(positionalMaxScore, score);
497 totalExtent += getFeatureLength(sf);
502 * A helper method to return the minimum of two floats, where a non-NaN value
503 * is treated as 'less than' a NaN value (unlike Math.min which does the
509 protected static float min(float f1, float f2)
513 return Float.isNaN(f2) ? f1 : f2;
517 return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
522 * A helper method to return the maximum of two floats, where a non-NaN value
523 * is treated as 'greater than' a NaN value (unlike Math.max which does the
529 protected static float max(float f1, float f2)
533 return Float.isNaN(f2) ? f1 : f2;
537 return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
543 * Answers true if this store has no features, else false
549 public boolean isEmpty()
551 boolean hasFeatures = (contactFeatureStarts != null
552 && !contactFeatureStarts.isEmpty())
553 || (nonPositionalFeatures != null
554 && !nonPositionalFeatures.isEmpty())
555 || features.size() > 0;
561 * Answers the set of distinct feature groups stored, possibly including null,
562 * as an unmodifiable view of the set. The parameter determines whether the
563 * groups for positional or for non-positional features are returned.
565 * @param positionalFeatures
570 public Set<String> getFeatureGroups(boolean positionalFeatures)
572 if (positionalFeatures)
574 return Collections.unmodifiableSet(positionalFeatureGroups);
578 return nonPositionalFeatureGroups == null
579 ? Collections.<String> emptySet()
580 : Collections.unmodifiableSet(nonPositionalFeatureGroups);
585 * Answers a list of all either positional or non-positional features whose
586 * feature group matches the given group (which may be null)
594 public List<SequenceFeature> getFeaturesForGroup(boolean positional,
597 List<SequenceFeature> result = new ArrayList<>();
600 * if we know features don't include the target group, no need
601 * to inspect them for matches
603 if (positional && !positionalFeatureGroups.contains(group)
604 || !positional && !nonPositionalFeatureGroups.contains(group))
611 addFeaturesForGroup(group, contactFeatureStarts, result);
612 addFeaturesForGroup(group, features, result);
616 addFeaturesForGroup(group, nonPositionalFeatures, result);
621 private void addFeaturesForGroup(String group,
622 Collection<SequenceFeature> sfs, List<SequenceFeature> result)
628 for (SequenceFeature sf : sfs)
630 String featureGroup = sf.getFeatureGroup();
631 if (group == null && featureGroup == null
632 || group != null && group.equals(featureGroup))
640 * Answers the number of positional (or non-positional) features stored.
641 * Contact features count as 1.
648 public int getFeatureCount(boolean positional)
652 return nonPositionalFeatures == null ? 0
653 : nonPositionalFeatures.size();
656 return (contactFeatureStarts == null ? 0 : contactFeatureStarts.size())
663 * Answers the total length of positional features (or zero if there are
664 * none). Contact features contribute a value of 1 to the total.
670 public int getTotalFeatureLength()
676 * Answers the minimum score held for positional or non-positional features.
677 * This may be Float.NaN if there are no features, are none has a non-NaN
685 public float getMinimumScore(boolean positional)
687 return positional ? positionalMinScore : nonPositionalMinScore;
691 * Answers the maximum score held for positional or non-positional features.
692 * This may be Float.NaN if there are no features, are none has a non-NaN
700 public float getMaximumScore(boolean positional)
702 return positional ? positionalMaxScore : nonPositionalMaxScore;
707 * Adds the shift amount to the start and end of all positional features whose
708 * start position is at or after fromPosition. Returns true if at least one
709 * feature was shifted, else false.
711 * @param fromPosition
717 public synchronized boolean shiftFeatures(int fromPosition, int shiftBy)
720 * Because begin and end are final fields (to ensure the data store's
721 * integrity), we have to delete each feature and re-add it as amended.
722 * (Although a simple shift of all values would preserve data integrity!)
724 boolean modified = false;
725 for (SequenceFeature sf : getPositionalFeatures())
727 if (sf.getBegin() >= fromPosition)
730 int newBegin = sf.getBegin() + shiftBy;
731 int newEnd = sf.getEnd() + shiftBy;
734 * sanity check: don't shift left of the first residue
738 newBegin = Math.max(1, newBegin);
739 SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd,
740 sf.getFeatureGroup(), sf.getScore());
751 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
753 return findOverlappingFeatures(start, end, null);
757 public List<SequenceFeature> getPositionalFeatures()
759 return getPositionalFeatures(new ArrayList<>());
763 public List<SequenceFeature> getContactFeatures()
765 return getContactFeatures(new ArrayList<>());
769 public List<SequenceFeature> getNonPositionalFeatures()
771 return getNonPositionalFeatures(new ArrayList<>());