2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel.features;
23 import jalview.datamodel.SequenceFeature;
24 import jalview.util.Platform;
26 import java.util.ArrayList;
27 import java.util.Collection;
28 import java.util.Collections;
29 import java.util.HashSet;
30 import java.util.List;
33 import intervalstore.api.IntervalStoreI;
34 import intervalstore.impl.BinarySearcher;
35 import intervalstore.impl.BinarySearcher.Compare;
37 public class FeatureStore
40 * track last start for quick insertion of ordered features
42 protected int lastStart = -1;
44 protected int lastContactStart = -1;
47 * Non-positional features have no (zero) start/end position.
48 * Kept as a separate list in case this criterion changes in future.
50 List<SequenceFeature> nonPositionalFeatures;
53 * contact features ordered by first contact position
55 List<SequenceFeature> contactFeatureStarts;
58 * contact features ordered by second contact position
60 List<SequenceFeature> contactFeatureEnds;
63 * IntervalStore holds remaining features and provides efficient
64 * query for features overlapping any given interval
66 IntervalStoreI<SequenceFeature> features;
69 * Feature groups represented in stored positional features
70 * (possibly including null)
72 Set<String> positionalFeatureGroups;
75 * Feature groups represented in stored non-positional features
76 * (possibly including null)
78 Set<String> nonPositionalFeatureGroups;
81 * the total length of all positional features; contact features count 1 to
82 * the total and 1 to size(), consistent with an average 'feature length' of 1
86 float positionalMinScore;
88 float positionalMaxScore;
90 float nonPositionalMinScore;
92 float nonPositionalMaxScore;
94 public final static int INTERVAL_STORE_DEFAULT = -1;
97 * original NCList-based IntervalStore
99 public final static int INTERVAL_STORE_NCLIST_OBJECT = 0;
102 * linked-list IntervalStore
104 public final static int INTERVAL_STORE_LINKED_LIST_PRESORT = 1;
107 * linked-list IntervalStore
109 public final static int INTERVAL_STORE_LINKED_LIST_NO_PRESORT = 2;
112 * NCList as array buffer IntervalStore
114 public final static int INTERVAL_STORE_NCLIST_BUFFER_PRESORT = 3;
117 * NCList as array buffer IntervalStore
119 public final static int INTERVAL_STORE_NCLIST_BUFFER_NO_PRESORT = 4;
121 static final int intervalStoreJavaOption = INTERVAL_STORE_NCLIST_OBJECT;
123 static final int intervalStoreJSOption = INTERVAL_STORE_NCLIST_BUFFER_PRESORT;
126 * Answers the 'length' of the feature, counting 0 for non-positional features
127 * and 1 for contact features
132 protected static int getFeatureLength(SequenceFeature feature)
134 if (feature.isNonPositional())
138 if (feature.isContactFeature())
142 return 1 + feature.getEnd() - feature.getBegin();
146 * Answers true if the list contains the feature, else false. This method is
147 * optimised for the condition that the list is sorted on feature start
148 * position ascending, and will give unreliable results if this does not hold.
154 public boolean listContains(List<SequenceFeature> list,
155 SequenceFeature feature)
157 if (list == null || feature == null)
163 * locate the first entry in the list which does not precede the feature
165 int begin = feature.begin;
166 int pos = BinarySearcher.findFirst(list, true, Compare.GE, begin);
167 int len = list.size();
170 SequenceFeature sf = list.get(pos);
171 if (sf.begin > begin)
173 return false; // no match found
175 if (sf.equals(feature))
185 * A helper method to return the maximum of two floats, where a non-NaN value
186 * is treated as 'greater than' a NaN value (unlike Math.max which does the
192 protected static float max(float f1, float f2)
196 return Float.isNaN(f2) ? f1 : f2;
200 return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
205 * A helper method to return the minimum of two floats, where a non-NaN value
206 * is treated as 'less than' a NaN value (unlike Math.min which does the
212 protected static float min(float f1, float f2)
216 return Float.isNaN(f2) ? f1 : f2;
220 return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
225 * standard constructor
227 public FeatureStore()
229 this(INTERVAL_STORE_DEFAULT);
233 * constructor for testing only
235 public FeatureStore(int intervalStoreType)
239 // ? new intervalstore.nonc.IntervalStore<>(true)
240 // : new intervalstore.impl.IntervalStore<>();
241 getIntervalStore(intervalStoreType);
242 positionalFeatureGroups = new HashSet<>();
243 nonPositionalFeatureGroups = new HashSet<>();
244 positionalMinScore = Float.NaN;
245 positionalMaxScore = Float.NaN;
246 nonPositionalMinScore = Float.NaN;
247 nonPositionalMaxScore = Float.NaN;
249 // we only construct nonPositionalFeatures, contactFeatures if we need to
252 private IntervalStoreI<SequenceFeature> getIntervalStore(int type)
254 switch (type != INTERVAL_STORE_DEFAULT ? type : //
256 ? intervalStoreJSOption
257 : intervalStoreJavaOption)
260 case INTERVAL_STORE_NCLIST_OBJECT:
261 return new intervalstore.impl.IntervalStore<>();
262 case INTERVAL_STORE_NCLIST_BUFFER_PRESORT:
263 return new intervalstore.nonc.IntervalStore<>(true);
264 case INTERVAL_STORE_NCLIST_BUFFER_NO_PRESORT:
265 return new intervalstore.nonc.IntervalStore<>(false);
266 case INTERVAL_STORE_LINKED_LIST_PRESORT:
267 return new intervalstore.nonc.IntervalStore0<>(true);
268 case INTERVAL_STORE_LINKED_LIST_NO_PRESORT:
269 return new intervalstore.nonc.IntervalStore0<>(false);
274 * Add a contact feature to the lists that hold them ordered by start (first
275 * contact) and by end (second contact) position, ensuring the lists remain
276 * ordered, and returns true. This method allows duplicate features to be
277 * added, so test before calling to avoid this.
282 protected synchronized boolean addContactFeature(SequenceFeature feature)
284 if (contactFeatureStarts == null)
286 contactFeatureStarts = new ArrayList<>();
287 contactFeatureEnds = new ArrayList<>();
291 * insert into list sorted by start (first contact position):
292 * binary search the sorted list to find the insertion point
294 int insertAt = BinarySearcher.findFirst(contactFeatureStarts, true,
295 Compare.GE, feature.begin);
296 contactFeatureStarts.add(insertAt, feature);
298 * insert into list sorted by end (second contact position):
299 * binary search the sorted list to find the insertion point
301 contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end),
308 * Adds one sequence feature to the store, and returns true, unless the
309 * feature is already contained in the store, in which case this method
310 * returns false. Containment is determined by SequenceFeature.equals()
315 public boolean addFeature(SequenceFeature feature)
317 if (feature.isContactFeature())
319 if (containsContactFeature(feature))
323 positionalFeatureGroups.add(feature.getFeatureGroup());
324 if (feature.begin > lastContactStart)
326 lastContactStart = feature.begin;
328 addContactFeature(feature);
330 else if (feature.isNonPositional())
332 if (containsNonPositionalFeature(feature))
337 addNonPositionalFeature(feature);
341 if (!features.add(feature, false))
345 positionalFeatureGroups.add(feature.getFeatureGroup());
346 if (feature.begin > lastStart)
348 lastStart = feature.begin;
353 * record the total extent of positional features, to make
354 * getTotalFeatureLength possible; we count the length of a
355 * contact feature as 1
357 totalExtent += getFeatureLength(feature);
360 * record the minimum and maximum score for positional
361 * and non-positional features
363 float score = feature.getScore();
364 if (!Float.isNaN(score))
366 if (feature.isNonPositional())
368 nonPositionalMinScore = min(nonPositionalMinScore, score);
369 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
373 positionalMinScore = min(positionalMinScore, score);
374 positionalMaxScore = max(positionalMaxScore, score);
381 private void addFeaturesForGroup(String group,
382 Collection<SequenceFeature> sfs, List<SequenceFeature> result)
388 for (SequenceFeature sf : sfs)
390 String featureGroup = sf.getFeatureGroup();
391 if (group == null && featureGroup == null
392 || group != null && group.equals(featureGroup))
400 * Adds the feature to the list of non-positional features (with lazy
401 * instantiation of the list if it is null), and returns true. The feature
402 * group is added to the set of distinct feature groups for non-positional
403 * features. This method allows duplicate features, so test before calling to
408 protected boolean addNonPositionalFeature(SequenceFeature feature)
410 if (nonPositionalFeatures == null)
412 nonPositionalFeatures = new ArrayList<>();
415 nonPositionalFeatures.add(feature);
417 nonPositionalFeatureGroups.add(feature.getFeatureGroup());
423 * Answers true if this store contains the given feature (testing by
424 * SequenceFeature.equals), else false
429 public boolean contains(SequenceFeature feature)
431 if (feature.isNonPositional())
433 return containsNonPositionalFeature(feature);
436 if (feature.isContactFeature())
438 return containsContactFeature(feature);
441 return containsPositionalFeature(feature);
445 private boolean containsPositionalFeature(SequenceFeature feature)
447 return features == null || feature.begin > lastStart ? false
448 : features.contains(feature);
452 * Answers true if this store already contains a contact feature equal to the
453 * given feature (by {@code SequenceFeature.equals()} test), else false
458 private boolean containsContactFeature(SequenceFeature feature)
460 return contactFeatureStarts != null && feature.begin <= lastContactStart
461 && listContains(contactFeatureStarts, feature);
465 * Answers true if this store already contains a non-positional feature equal
466 * to the given feature (by {@code SequenceFeature.equals()} test), else false
471 private boolean containsNonPositionalFeature(SequenceFeature feature)
473 return nonPositionalFeatures == null ? false
474 : nonPositionalFeatures.contains(feature);
478 * Deletes the given feature from the store, returning true if it was found
479 * (and deleted), else false. This method makes no assumption that the feature
480 * is in the 'expected' place in the store, in case it has been modified since
485 public synchronized boolean delete(SequenceFeature sf)
487 boolean removed = false;
490 * try contact positions (and if found, delete
491 * from both lists of contact positions)
493 if (!removed && contactFeatureStarts != null)
495 removed = contactFeatureStarts.remove(sf);
498 contactFeatureEnds.remove(sf);
503 * if not found, try non-positional features
505 if (!removed && nonPositionalFeatures != null)
507 removed = nonPositionalFeatures.remove(sf);
511 * if not found, try nested features
513 if (!removed && features != null)
515 removed = features.remove(sf);
526 public List<SequenceFeature> findOverlappingFeatures(long start, long end)
528 return findOverlappingFeatures(start, end, null);
531 public List<SequenceFeature> getContactFeatures()
533 return getContactFeatures(new ArrayList<>());
537 * Answers a list of all contact features. If there are none, returns an
538 * immutable empty list.
542 public List<SequenceFeature> getContactFeatures(
543 List<SequenceFeature> result)
545 if (contactFeatureStarts != null)
547 result.addAll(contactFeatureStarts);
553 * Answers the number of positional (or non-positional) features stored.
554 * Contact features count as 1.
559 public int getFeatureCount(boolean positional)
563 return nonPositionalFeatures == null ? 0
564 : nonPositionalFeatures.size();
567 return (contactFeatureStarts == null ? 0 : contactFeatureStarts.size())
572 * Answers the set of distinct feature groups stored, possibly including null,
573 * as an unmodifiable view of the set. The parameter determines whether the
574 * groups for positional or for non-positional features are returned.
576 * @param positionalFeatures
579 public Set<String> getFeatureGroups(boolean positionalFeatures)
581 if (positionalFeatures)
583 return Collections.unmodifiableSet(positionalFeatureGroups);
587 return nonPositionalFeatureGroups == null
588 ? Collections.<String> emptySet()
589 : Collections.unmodifiableSet(nonPositionalFeatureGroups);
593 public Collection<SequenceFeature> getFeatures()
599 * Answers a list of all either positional or non-positional features whose
600 * feature group matches the given group (which may be null)
606 public List<SequenceFeature> getFeaturesForGroup(boolean positional,
609 List<SequenceFeature> result = new ArrayList<>();
612 * if we know features don't include the target group, no need
613 * to inspect them for matches
615 if (positional && !positionalFeatureGroups.contains(group)
616 || !positional && !nonPositionalFeatureGroups.contains(group))
623 addFeaturesForGroup(group, contactFeatureStarts, result);
624 addFeaturesForGroup(group, features, result);
628 addFeaturesForGroup(group, nonPositionalFeatures, result);
634 * Answers the maximum score held for positional or non-positional features.
635 * This may be Float.NaN if there are no features, are none has a non-NaN
641 public float getMaximumScore(boolean positional)
643 return positional ? positionalMaxScore : nonPositionalMaxScore;
647 * Answers the minimum score held for positional or non-positional features.
648 * This may be Float.NaN if there are no features, are none has a non-NaN
654 public float getMinimumScore(boolean positional)
656 return positional ? positionalMinScore : nonPositionalMinScore;
659 public List<SequenceFeature> getNonPositionalFeatures()
661 return getNonPositionalFeatures(new ArrayList<>());
665 * Answers a list of all non-positional features. If there are none, returns
666 * an immutable empty list.
670 public List<SequenceFeature> getNonPositionalFeatures(
671 List<SequenceFeature> result)
673 if (nonPositionalFeatures != null)
675 result.addAll(nonPositionalFeatures);
680 public List<SequenceFeature> getPositionalFeatures()
682 return getPositionalFeatures(new ArrayList<>());
686 * Answers a list of all positional features stored, in no guaranteed order
690 public List<SequenceFeature> getPositionalFeatures(
691 List<SequenceFeature> result)
695 * add any contact features - from the list by start position
697 if (contactFeatureStarts != null)
699 result.addAll(contactFeatureStarts);
703 * add any nested features
705 if (features != null)
707 result.addAll(features);
714 * Answers the total length of positional features (or zero if there are
715 * none). Contact features contribute a value of 1 to the total.
719 public int getTotalFeatureLength()
725 * Answers true if this store has no features, else false
729 public boolean isEmpty()
731 boolean hasFeatures = (contactFeatureStarts != null
732 && !contactFeatureStarts.isEmpty())
733 || (nonPositionalFeatures != null
734 && !nonPositionalFeatures.isEmpty())
735 || features.size() > 0;
741 * Rescan all features to recompute any cached values after an entry has been
742 * deleted. This is expected to be an infrequent event, so performance here is
745 protected synchronized void rescanAfterDelete()
747 positionalFeatureGroups.clear();
748 nonPositionalFeatureGroups.clear();
750 positionalMinScore = Float.NaN;
751 positionalMaxScore = Float.NaN;
752 nonPositionalMinScore = Float.NaN;
753 nonPositionalMaxScore = Float.NaN;
755 * scan non-positional features for groups and scores
757 if (nonPositionalFeatures != null)
759 List<SequenceFeature> list = nonPositionalFeatures;
760 for (int i = 0, n = list.size(); i < n; i++)
762 SequenceFeature sf = list.get(i);
763 nonPositionalFeatureGroups.add(sf.getFeatureGroup());
764 float score = sf.getScore();
765 nonPositionalMinScore = min(nonPositionalMinScore, score);
766 nonPositionalMaxScore = max(nonPositionalMaxScore, score);
771 * scan positional features for groups, scores and extents
774 rescanPositional(contactFeatureStarts);
775 rescanPositional(features);
778 private void rescanPositional(Collection<SequenceFeature> sfs)
784 for (SequenceFeature sf : sfs)
786 positionalFeatureGroups.add(sf.getFeatureGroup());
787 float score = sf.getScore();
788 positionalMinScore = min(positionalMinScore, score);
789 positionalMaxScore = max(positionalMaxScore, score);
790 totalExtent += getFeatureLength(sf);
795 * Adds the shift amount to the start and end of all positional features whose
796 * start position is at or after fromPosition. Returns true if at least one
797 * feature was shifted, else false.
799 * @param fromPosition
803 public synchronized boolean shiftFeatures(int fromPosition, int shiftBy)
806 * Because begin and end are final fields (to ensure the data store's
807 * integrity), we have to delete each feature and re-add it as amended.
808 * (Although a simple shift of all values would preserve data integrity!)
810 boolean modified = false;
811 List<SequenceFeature> list = getPositionalFeatures();
812 for (int i = 0, n = list.size(); i < n; i++)
814 SequenceFeature sf = list.get(i);
815 if (sf.getBegin() >= fromPosition)
818 int newBegin = sf.getBegin() + shiftBy;
819 int newEnd = sf.getEnd() + shiftBy;
822 * sanity check: don't shift left of the first residue
826 newBegin = Math.max(1, newBegin);
827 SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd,
828 sf.getFeatureGroup(), sf.getScore());
838 * Answers the position (0, 1...) in the list of the first entry whose end
839 * position is not less than {@ pos}. If no such entry is found, answers the
840 * length of the list.
846 protected int findFirstEnd(List<SequenceFeature> list, long pos)
848 return BinarySearcher.findFirst(list, false, Compare.GE, (int) pos);
852 * Adds contact features to the result list where either the second or the
853 * first contact position lies within the target range
859 protected void findContactFeatures(long from, long to,
860 List<SequenceFeature> result)
862 if (contactFeatureStarts != null)
864 findContactStartOverlaps(from, to, result);
865 findContactEndOverlaps(from, to, result);
870 * Adds to the result list any contact features whose end (second contact
871 * point), but not start (first contact point), lies in the query from-to
878 private void findContactEndOverlaps(long from, long to,
879 List<SequenceFeature> result)
882 * find the first contact feature (if any)
883 * whose end point is not before the target range
885 int index = findFirstEnd(contactFeatureEnds, from);
887 int n = contactFeatureEnds.size();
890 SequenceFeature sf = contactFeatureEnds.get(index);
891 if (!sf.isContactFeature())
893 System.err.println("Error! non-contact feature type " + sf.getType()
894 + " in contact features list");
899 int begin = sf.getBegin();
900 if (begin >= from && begin <= to)
903 * this feature's first contact position lies in the search range
904 * so we don't include it in results a second time
910 if (sf.getEnd() > to)
913 * this feature (and all following) has end point after the target range
919 * feature has end >= from and end <= to
920 * i.e. contact end point lies within overlap search range
928 * Adds contact features whose start position lies in the from-to range to the
935 private void findContactStartOverlaps(long from, long to,
936 List<SequenceFeature> result)
938 int index = BinarySearcher.findFirst(contactFeatureStarts, true,
939 Compare.GE, (int) from);
941 while (index < contactFeatureStarts.size())
943 SequenceFeature sf = contactFeatureStarts.get(index);
944 if (!sf.isContactFeature())
946 System.err.println("Error! non-contact feature " + sf.toString()
947 + " in contact features list");
951 if (sf.getBegin() > to)
954 * this feature's start (and all following) follows the target range
960 * feature has begin >= from and begin <= to
961 * i.e. contact start point lies within overlap search range
969 * Returns a (possibly empty) list of features whose extent overlaps the given
970 * range. The returned list is not ordered. Contact features are included if
971 * either of the contact points lies within the range. If the {@code result}
972 * parameter is not null, new entries are added to this list and the (possibly
973 * extended) list returned.
976 * start position of overlap range (inclusive)
978 * end position of overlap range (inclusive)
982 public List<SequenceFeature> findOverlappingFeatures(long start, long end,
983 List<SequenceFeature> result)
987 result = new ArrayList<>();
990 findContactFeatures(start, end, result);
991 features.findOverlaps(start, end, result);