/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel.features;
import jalview.datamodel.SequenceFeature;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import intervalstore.api.IntervalStoreI;
import intervalstore.impl.BinarySearcher;
import intervalstore.impl.BinarySearcher.Compare;
public class FeatureStore
{
public enum IntervalStoreType
{
/**
* original NCList-based IntervalStore
*/
INTERVAL_STORE_NCLIST,
/**
* linked-list IntervalStore
*/
INTERVAL_STORE_LINKED_LIST,
/**
* NCList as array buffer IntervalStore
*/
INTERVAL_STORE_NCARRAY
}
/*
* track largest start for quick insertion of ordered features
*/
protected int maxStart = -1;
protected int maxContactStart = -1;
/*
* Non-positional features have no (zero) start/end position.
* Kept as a separate list in case this criterion changes in future.
*/
List nonPositionalFeatures;
/*
* contact features ordered by first contact position
*/
List contactFeatureStarts;
/*
* contact features ordered by second contact position
*/
List contactFeatureEnds;
/*
* IntervalStore holds remaining features and provides efficient
* query for features overlapping any given interval
*/
IntervalStoreI features;
/*
* Feature groups represented in stored positional features
* (possibly including null)
*/
Set positionalFeatureGroups;
/*
* Feature groups represented in stored non-positional features
* (possibly including null)
*/
Set nonPositionalFeatureGroups;
/*
* the total length of all positional features; contact features count 1 to
* the total and 1 to size(), consistent with an average 'feature length' of 1
*/
int totalExtent;
float positionalMinScore;
float positionalMaxScore;
float nonPositionalMinScore;
float nonPositionalMaxScore;
/**
* Answers the 'length' of the feature, counting 0 for non-positional features
* and 1 for contact features
*
* @param feature
* @return
*/
protected static int getFeatureLength(SequenceFeature feature)
{
if (feature.isNonPositional())
{
return 0;
}
if (feature.isContactFeature())
{
return 1;
}
return 1 + feature.getEnd() - feature.getBegin();
}
/**
* Answers true if the list contains the feature, else false. This method is
* optimised for the condition that the list is sorted on feature start
* position ascending, and will give unreliable results if this does not hold.
*
* @param list
* @param feature
* @return
*/
public static boolean listContains(List list,
SequenceFeature feature)
{
if (list == null || feature == null)
{
return false;
}
/*
* locate the first entry in the list which does not precede the feature
*/
int begin = feature.begin;
int pos = BinarySearcher.findFirst(list, true, Compare.GE, begin);
int len = list.size();
while (pos < len)
{
SequenceFeature sf = list.get(pos);
if (sf.begin > begin)
{
return false; // no match found
}
if (sf.equals(feature))
{
return true;
}
pos++;
}
return false;
}
/**
* A helper method to return the maximum of two floats, where a non-NaN value
* is treated as 'greater than' a NaN value (unlike Math.max which does the
* opposite)
*
* @param f1
* @param f2
*/
protected static float max(float f1, float f2)
{
if (Float.isNaN(f1))
{
return Float.isNaN(f2) ? f1 : f2;
}
else
{
return Float.isNaN(f2) ? f1 : Math.max(f1, f2);
}
}
/**
* A helper method to return the minimum of two floats, where a non-NaN value
* is treated as 'less than' a NaN value (unlike Math.min which does the
* opposite)
*
* @param f1
* @param f2
*/
protected static float min(float f1, float f2)
{
if (Float.isNaN(f1))
{
return Float.isNaN(f2) ? f1 : f2;
}
else
{
return Float.isNaN(f2) ? f1 : Math.min(f1, f2);
}
}
/**
* Constructor that defaults to using NCList IntervalStore
*/
public FeatureStore()
{
this(IntervalStoreType.INTERVAL_STORE_NCLIST);
}
/**
* Constructor that allows an alternative IntervalStore implementation to be
* chosen
*/
public FeatureStore(IntervalStoreType intervalStoreType)
{
features = getIntervalStore(intervalStoreType);
positionalFeatureGroups = new HashSet<>();
nonPositionalFeatureGroups = new HashSet<>();
positionalMinScore = Float.NaN;
positionalMaxScore = Float.NaN;
nonPositionalMinScore = Float.NaN;
nonPositionalMaxScore = Float.NaN;
// only construct nonPositionalFeatures or contactFeatures if needed
}
private IntervalStoreI getIntervalStore(
IntervalStoreType type)
{
switch (type)
{
default:
case INTERVAL_STORE_NCLIST:
return new intervalstore.impl.IntervalStore<>();
case INTERVAL_STORE_NCARRAY:
return new intervalstore.nonc.IntervalStore<>();
case INTERVAL_STORE_LINKED_LIST:
return new intervalstore.nonc.IntervalStore0<>();
}
}
/**
* Add a contact feature to the lists that hold them ordered by start (first
* contact) and by end (second contact) position, ensuring the lists remain
* ordered, and returns true. This method allows duplicate features to be
* added, so test before calling to avoid this.
*
* @param feature
* @return
*/
protected synchronized boolean addContactFeature(SequenceFeature feature)
{
if (contactFeatureStarts == null)
{
contactFeatureStarts = new ArrayList<>();
contactFeatureEnds = new ArrayList<>();
}
/*
* insert into list sorted by start (first contact position):
* binary search the sorted list to find the insertion point
*/
int insertAt = BinarySearcher.findFirst(contactFeatureStarts, true,
Compare.GE, feature.begin);
contactFeatureStarts.add(insertAt, feature);
/*
* insert into list sorted by end (second contact position):
* binary search the sorted list to find the insertion point
*/
contactFeatureEnds.add(findFirstEnd(contactFeatureEnds, feature.end),
feature);
return true;
}
/**
* Adds one sequence feature to the store, and returns true, unless the
* feature is already contained in the store, in which case this method
* returns false. Containment is determined by SequenceFeature.equals()
* comparison.
*
* @param feature
*/
public boolean addFeature(SequenceFeature feature)
{
if (feature.isContactFeature())
{
if (containsContactFeature(feature))
{
return false;
}
positionalFeatureGroups.add(feature.getFeatureGroup());
if (feature.begin > maxContactStart)
{
maxContactStart = feature.begin;
}
addContactFeature(feature);
}
else if (feature.isNonPositional())
{
if (containsNonPositionalFeature(feature))
{
return false;
}
addNonPositionalFeature(feature);
}
else
{
if (!features.add(feature, false))
{
return false;
}
positionalFeatureGroups.add(feature.getFeatureGroup());
if (feature.begin > maxStart)
{
maxStart = feature.begin;
}
}
/*
* record the total extent of positional features, to make
* getTotalFeatureLength possible; we count the length of a
* contact feature as 1
*/
totalExtent += getFeatureLength(feature);
/*
* record the minimum and maximum score for positional
* and non-positional features
*/
float score = feature.getScore();
if (!Float.isNaN(score))
{
if (feature.isNonPositional())
{
nonPositionalMinScore = min(nonPositionalMinScore, score);
nonPositionalMaxScore = max(nonPositionalMaxScore, score);
}
else
{
positionalMinScore = min(positionalMinScore, score);
positionalMaxScore = max(positionalMaxScore, score);
}
}
return true;
}
private void addFeaturesForGroup(String group,
Collection sfs, List result)
{
if (sfs == null)
{
return;
}
for (SequenceFeature sf : sfs)
{
String featureGroup = sf.getFeatureGroup();
if (group == null && featureGroup == null
|| group != null && group.equals(featureGroup))
{
result.add(sf);
}
}
}
/**
* Adds the feature to the list of non-positional features (with lazy
* instantiation of the list if it is null), and returns true. The feature
* group is added to the set of distinct feature groups for non-positional
* features. This method allows duplicate features, so test before calling to
* prevent this.
*
* @param feature
*/
protected boolean addNonPositionalFeature(SequenceFeature feature)
{
if (nonPositionalFeatures == null)
{
nonPositionalFeatures = new ArrayList<>();
}
nonPositionalFeatures.add(feature);
nonPositionalFeatureGroups.add(feature.getFeatureGroup());
return true;
}
/**
* Answers true if this store contains the given feature (testing by
* SequenceFeature.equals), else false
*
* @param feature
* @return
*/
public boolean contains(SequenceFeature feature)
{
if (feature.isNonPositional())
{
return containsNonPositionalFeature(feature);
}
if (feature.isContactFeature())
{
return containsContactFeature(feature);
}
return containsPositionalFeature(feature);
}
private boolean containsPositionalFeature(SequenceFeature feature)
{
return features == null || feature.begin > maxStart ? false
: features.contains(feature);
}
/**
* Answers true if this store already contains a contact feature equal to the
* given feature (by {@code SequenceFeature.equals()} test), else false
*
* @param feature
* @return
*/
private boolean containsContactFeature(SequenceFeature feature)
{
return contactFeatureStarts != null && feature.begin <= maxContactStart
&& listContains(contactFeatureStarts, feature);
}
/**
* Answers true if this store already contains a non-positional feature equal
* to the given feature (by {@code SequenceFeature.equals()} test), else false
*
* @param feature
* @return
*/
private boolean containsNonPositionalFeature(SequenceFeature feature)
{
return nonPositionalFeatures == null ? false
: nonPositionalFeatures.contains(feature);
}
/**
* Deletes the given feature from the store, returning true if it was found
* (and deleted), else false. This method makes no assumption that the feature
* is in the 'expected' place in the store, in case it has been modified since
* it was added.
*
* @param sf
*/
public synchronized boolean delete(SequenceFeature sf)
{
boolean removed = false;
/*
* try contact positions (and if found, delete
* from both lists of contact positions)
*/
if (!removed && contactFeatureStarts != null)
{
removed = contactFeatureStarts.remove(sf);
if (removed)
{
contactFeatureEnds.remove(sf);
}
}
/*
* if not found, try non-positional features
*/
if (!removed && nonPositionalFeatures != null)
{
removed = nonPositionalFeatures.remove(sf);
}
/*
* if not found, try nested features
*/
if (!removed && features != null)
{
removed = features.remove(sf);
}
if (removed)
{
rescanAfterDelete();
}
return removed;
}
public List findOverlappingFeatures(long start, long end)
{
return findOverlappingFeatures(start, end, null);
}
public List getContactFeatures()
{
return getContactFeatures(new ArrayList<>());
}
/**
* Answers a list of all contact features. If there are none, returns an
* immutable empty list.
*
* @return
*/
public List getContactFeatures(
List result)
{
if (contactFeatureStarts != null)
{
result.addAll(contactFeatureStarts);
}
return result;
}
/**
* Answers the number of positional (or non-positional) features stored.
* Contact features count as 1.
*
* @param positional
* @return
*/
public int getFeatureCount(boolean positional)
{
if (!positional)
{
return nonPositionalFeatures == null ? 0
: nonPositionalFeatures.size();
}
return (contactFeatureStarts == null ? 0 : contactFeatureStarts.size())
+ features.size();
}
/**
* Answers the set of distinct feature groups stored, possibly including null,
* as an unmodifiable view of the set. The parameter determines whether the
* groups for positional or for non-positional features are returned.
*
* @param positionalFeatures
* @return
*/
public Set getFeatureGroups(boolean positionalFeatures)
{
if (positionalFeatures)
{
return Collections.unmodifiableSet(positionalFeatureGroups);
}
else
{
return nonPositionalFeatureGroups == null
? Collections. emptySet()
: Collections.unmodifiableSet(nonPositionalFeatureGroups);
}
}
/**
* Answers a list of all either positional or non-positional features whose
* feature group matches the given group (which may be null)
*
* @param positional
* @param group
* @return
*/
public List getFeaturesForGroup(boolean positional,
String group)
{
List result = new ArrayList<>();
/*
* if we know features don't include the target group, no need
* to inspect them for matches
*/
if (positional && !positionalFeatureGroups.contains(group)
|| !positional && !nonPositionalFeatureGroups.contains(group))
{
return result;
}
if (positional)
{
addFeaturesForGroup(group, contactFeatureStarts, result);
addFeaturesForGroup(group, features, result);
}
else
{
addFeaturesForGroup(group, nonPositionalFeatures, result);
}
return result;
}
/**
* Answers the maximum score held for positional or non-positional features.
* This may be Float.NaN if there are no features, are none has a non-NaN
* score.
*
* @param positional
* @return
*/
public float getMaximumScore(boolean positional)
{
return positional ? positionalMaxScore : nonPositionalMaxScore;
}
/**
* Answers the minimum score held for positional or non-positional features.
* This may be Float.NaN if there are no features, are none has a non-NaN
* score.
*
* @param positional
* @return
*/
public float getMinimumScore(boolean positional)
{
return positional ? positionalMinScore : nonPositionalMinScore;
}
public List getNonPositionalFeatures()
{
return getNonPositionalFeatures(new ArrayList<>());
}
/**
* Answers a list of all non-positional features. If there are none, returns
* an immutable empty list.
*
* @return
*/
public List getNonPositionalFeatures(
List result)
{
if (nonPositionalFeatures != null)
{
result.addAll(nonPositionalFeatures);
}
return result;
}
public List getPositionalFeatures()
{
return getPositionalFeatures(new ArrayList<>());
}
/**
* Answers a list of all positional features stored, in no guaranteed order
*
* @return
*/
public List getPositionalFeatures(
List result)
{
/*
* add any contact features - from the list by start position
*/
if (contactFeatureStarts != null)
{
result.addAll(contactFeatureStarts);
}
/*
* add any nested features
*/
if (features != null)
{
result.addAll(features);
}
return result;
}
/**
* Answers the total length of positional features (or zero if there are
* none). Contact features contribute a value of 1 to the total.
*
* @return
*/
public int getTotalFeatureLength()
{
return totalExtent;
}
/**
* Answers true if this store has no features, else false
*
* @return
*/
public boolean isEmpty()
{
boolean hasFeatures = (contactFeatureStarts != null
&& !contactFeatureStarts.isEmpty())
|| (nonPositionalFeatures != null
&& !nonPositionalFeatures.isEmpty())
|| features.size() > 0;
return !hasFeatures;
}
/**
* Rescan all features to recompute any cached values after an entry has been
* deleted. This is expected to be an infrequent event, so performance here is
* not critical.
*/
protected synchronized void rescanAfterDelete()
{
positionalFeatureGroups.clear();
nonPositionalFeatureGroups.clear();
totalExtent = 0;
positionalMinScore = Float.NaN;
positionalMaxScore = Float.NaN;
nonPositionalMinScore = Float.NaN;
nonPositionalMaxScore = Float.NaN;
/*
* scan non-positional features for groups and scores
*/
if (nonPositionalFeatures != null)
{
for (int i = 0, n = nonPositionalFeatures.size(); i < n; i++)
{
SequenceFeature sf = nonPositionalFeatures.get(i);
nonPositionalFeatureGroups.add(sf.getFeatureGroup());
float score = sf.getScore();
nonPositionalMinScore = min(nonPositionalMinScore, score);
nonPositionalMaxScore = max(nonPositionalMaxScore, score);
}
}
rescanPositional(contactFeatureStarts);
rescanPositional(features);
}
/**
* Scans the given features and updates cached feature groups, minimum and
* maximum feature score, and total feature extent (length) for positional
* features
*
* @param sfs
*/
private void rescanPositional(Collection sfs)
{
if (sfs == null)
{
return;
}
for (SequenceFeature sf : sfs)
{
positionalFeatureGroups.add(sf.getFeatureGroup());
float score = sf.getScore();
positionalMinScore = min(positionalMinScore, score);
positionalMaxScore = max(positionalMaxScore, score);
totalExtent += getFeatureLength(sf);
}
}
/**
* Adds the shift amount to the start and end of all positional features whose
* start position is at or after fromPosition. Returns true if at least one
* feature was shifted, else false.
*
* @param fromPosition
* @param shiftBy
* @return
*/
public synchronized boolean shiftFeatures(int fromPosition, int shiftBy)
{
/*
* Because begin and end are final fields (to ensure the data store's
* integrity), we have to delete each feature and re-add it as amended.
* (Although a simple shift of all values would preserve data integrity!)
*/
boolean modified = false;
List list = getPositionalFeatures();
for (int i = 0, n = list.size(); i < n; i++)
{
SequenceFeature sf = list.get(i);
if (sf.getBegin() >= fromPosition)
{
modified = true;
int newBegin = sf.getBegin() + shiftBy;
int newEnd = sf.getEnd() + shiftBy;
/*
* sanity check: don't shift left of the first residue
*/
if (newEnd > 0)
{
newBegin = Math.max(1, newBegin);
SequenceFeature sf2 = new SequenceFeature(sf, newBegin, newEnd,
sf.getFeatureGroup(), sf.getScore());
addFeature(sf2);
}
delete(sf);
}
}
return modified;
}
/**
* Answers the position (0, 1...) in the list of the first entry whose end
* position is not less than {@ pos}. If no such entry is found, answers the
* length of the list.
*
* @param list
* @param pos
* @return
*/
protected int findFirstEnd(List list, long pos)
{
return BinarySearcher.findFirst(list, false, Compare.GE, (int) pos);
}
/**
* Adds contact features to the result list where either the second or the
* first contact position lies within the target range
*
* @param from
* @param to
* @param result
*/
protected void findContactFeatures(long from, long to,
List result)
{
if (contactFeatureStarts != null)
{
findContactStartOverlaps(from, to, result);
findContactEndOverlaps(from, to, result);
}
}
/**
* Adds to the result list any contact features whose end (second contact
* point), but not start (first contact point), lies in the query from-to
* range
*
* @param from
* @param to
* @param result
*/
private void findContactEndOverlaps(long from, long to,
List result)
{
/*
* find the first contact feature (if any)
* whose end point is not before the target range
*/
int index = findFirstEnd(contactFeatureEnds, from);
int n = contactFeatureEnds.size();
while (index < n)
{
SequenceFeature sf = contactFeatureEnds.get(index);
if (!sf.isContactFeature())
{
System.err.println("Error! non-contact feature type " + sf.getType()
+ " in contact features list");
index++;
continue;
}
int begin = sf.getBegin();
if (begin >= from && begin <= to)
{
/*
* this feature's first contact position lies in the search range
* so we don't include it in results a second time
*/
index++;
continue;
}
if (sf.getEnd() > to)
{
/*
* this feature (and all following) has end point after the target range
*/
break;
}
/*
* feature has end >= from and end <= to
* i.e. contact end point lies within overlap search range
*/
result.add(sf);
index++;
}
}
/**
* Adds contact features whose start position lies in the from-to range to the
* result list
*
* @param from
* @param to
* @param result
*/
private void findContactStartOverlaps(long from, long to,
List result)
{
int index = BinarySearcher.findFirst(contactFeatureStarts, true,
Compare.GE, (int) from);
while (index < contactFeatureStarts.size())
{
SequenceFeature sf = contactFeatureStarts.get(index);
if (!sf.isContactFeature())
{
System.err.println("Error! non-contact feature " + sf.toString()
+ " in contact features list");
index++;
continue;
}
if (sf.getBegin() > to)
{
/*
* this feature's start (and all following) follows the target range
*/
break;
}
/*
* feature has begin >= from and begin <= to
* i.e. contact start point lies within overlap search range
*/
result.add(sf);
index++;
}
}
/**
* Returns a (possibly empty) list of features whose extent overlaps the given
* range. The returned list is not ordered. Contact features are included if
* either of the contact points lies within the range. If the {@code result}
* parameter is not null, new entries are added to this list and the (possibly
* extended) list returned.
*
* @param start
* start position of overlap range (inclusive)
* @param end
* end position of overlap range (inclusive)
* @param result
* @return
*/
public List findOverlappingFeatures(long start, long end,
List result)
{
if (result == null)
{
result = new ArrayList<>();
}
findContactFeatures(start, end, result);
features.findOverlaps(start, end, result);
return result;
}
}