From 51728d3951398f9c12d7017c776953f17322cc68 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 20 Mar 2017 18:40:43 +0000 Subject: [PATCH] JAL-2446 FeatureStore with NCList - work in progress --- src/jalview/datamodel/Sequence.java | 15 + src/jalview/datamodel/SequenceFeature.java | 7 +- src/jalview/datamodel/SequenceI.java | 11 + src/jalview/datamodel/features/ContiguousI.java | 8 + .../datamodel/features/FeatureLocationI.java | 10 + src/jalview/datamodel/features/FeatureStore.java | 445 ++++++++++++++++++++ src/jalview/datamodel/features/NCList.java | 324 ++++++++++++++ src/jalview/datamodel/features/NCNode.java | 124 ++++++ .../datamodel/features/RangeComparator.java | 69 +++ .../datamodel/features/SequenceFeatures.java | 60 +++ src/jalview/datamodel/features/SubList.java | 27 ++ src/jalview/gui/OverviewPanel.java | 3 + src/jalview/gui/SeqCanvas.java | 3 +- .../renderer/seqfeatures/FeatureRenderer.java | 10 +- .../datamodel/features/FeatureStoreTest.java | 232 ++++++++++ test/jalview/datamodel/features/NCListTest.java | 158 +++++++ .../datamodel/features/RangeComparatorTest.java | 84 ++++ 17 files changed, 1586 insertions(+), 4 deletions(-) create mode 100644 src/jalview/datamodel/features/ContiguousI.java create mode 100644 src/jalview/datamodel/features/FeatureLocationI.java create mode 100644 src/jalview/datamodel/features/FeatureStore.java create mode 100644 src/jalview/datamodel/features/NCList.java create mode 100644 src/jalview/datamodel/features/NCNode.java create mode 100644 src/jalview/datamodel/features/RangeComparator.java create mode 100644 src/jalview/datamodel/features/SequenceFeatures.java create mode 100644 src/jalview/datamodel/features/SubList.java create mode 100644 test/jalview/datamodel/features/FeatureStoreTest.java create mode 100644 test/jalview/datamodel/features/NCListTest.java create mode 100644 test/jalview/datamodel/features/RangeComparatorTest.java diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index c8b94ce..fd60c03 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -22,6 +22,7 @@ package jalview.datamodel; import jalview.analysis.AlignSeq; import jalview.api.DBRefEntryI; +import jalview.datamodel.features.SequenceFeatures; import jalview.util.Comparison; import jalview.util.DBRefUtils; import jalview.util.MapList; @@ -81,6 +82,8 @@ public class Sequence extends ASequence implements SequenceI /** array of sequence features - may not be null for a valid sequence object */ public SequenceFeature[] sequenceFeatures; + private SequenceFeatures sequenceFeatureStore; + /** * Creates a new Sequence object. * @@ -120,6 +123,7 @@ public class Sequence extends ASequence implements SequenceI this.sequence = sequence2; this.start = start2; this.end = end2; + sequenceFeatureStore = new SequenceFeatures(); parseId(); checkValidRange(); } @@ -339,6 +343,8 @@ public class Sequence extends ASequence implements SequenceI temp[sequenceFeatures.length] = sf; sequenceFeatures = temp; + + sequenceFeatureStore.add(sf); } @Override @@ -1475,4 +1481,13 @@ public class Sequence extends ASequence implements SequenceI } } + @Override + public List findFeatures(String type, int from, int to) + { + if (datasetSequence != null) + { + return datasetSequence.findFeatures(type, from, to); + } + return sequenceFeatureStore.findFeatures(type, from, to); + } } diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 15f54b9..c330df9 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -20,6 +20,8 @@ */ package jalview.datamodel; +import jalview.datamodel.features.FeatureLocationI; + import java.util.HashMap; import java.util.Map; import java.util.Vector; @@ -30,7 +32,7 @@ import java.util.Vector; * @author $author$ * @version $Revision$ */ -public class SequenceFeature +public class SequenceFeature implements FeatureLocationI { private static final String STATUS = "status"; @@ -268,6 +270,7 @@ public class SequenceFeature * * @return DOCUMENT ME! */ + @Override public int getBegin() { return begin; @@ -283,6 +286,7 @@ public class SequenceFeature * * @return DOCUMENT ME! */ + @Override public int getEnd() { return end; @@ -538,6 +542,7 @@ public class SequenceFeature * positions, rather than ends of a range. Such features may be visualised or * reported differently to features on a range. */ + @Override public boolean isContactFeature() { // TODO abstract one day to a FeatureType class diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index aec68ab..75394b1 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -468,4 +468,15 @@ public interface SequenceI extends ASequenceI * list */ public List getPrimaryDBRefs(); + + /** + * Returns a (possibly empty) list of sequence features of the given type that + * overlap the range from-to (inclusive) + * + * @param type + * @param from + * @param to + * @return + */ + List findFeatures(String type, int from, int to); } diff --git a/src/jalview/datamodel/features/ContiguousI.java b/src/jalview/datamodel/features/ContiguousI.java new file mode 100644 index 0000000..d0b3259 --- /dev/null +++ b/src/jalview/datamodel/features/ContiguousI.java @@ -0,0 +1,8 @@ +package jalview.datamodel.features; + +public interface ContiguousI +{ + int getBegin(); // todo want long for genomic positions? + + int getEnd(); +} diff --git a/src/jalview/datamodel/features/FeatureLocationI.java b/src/jalview/datamodel/features/FeatureLocationI.java new file mode 100644 index 0000000..d6f0389 --- /dev/null +++ b/src/jalview/datamodel/features/FeatureLocationI.java @@ -0,0 +1,10 @@ +package jalview.datamodel.features; + +/** + * An extension of ContiguousI that allows start/end values to be interpreted + * instead as two contact positions + */ +public interface FeatureLocationI extends ContiguousI +{ + boolean isContactFeature(); +} diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java new file mode 100644 index 0000000..bd94c8a --- /dev/null +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -0,0 +1,445 @@ +package jalview.datamodel.features; + +import jalview.datamodel.SequenceFeature; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * A data store for a set of sequence features that supports efficient lookup of + * features overlapping a given range. + * + * @author gmcarstairs + * + */ +public class FeatureStore +{ + Comparator startOrdering = new RangeComparator(true); + + Comparator endOrdering = new RangeComparator(false); + + /* + * An ordered list of features, with the promise that no feature in the list + * properly contains any other. This constraint allows bounded linear search + * of the list for features overlapping a region. + * Contact features are not included in this list. + */ + List nonNestedFeatures; + + /* + * contact features ordered by first contact position + */ + List contactFeatureStarts; + + /* + * contact features ordered by second contact position + */ + List contactFeatureEnds; + + /* + * Nested Containment List is used to hold any features that are nested + * within (properly contained by) any other feature. This is a recursive tree + * which supports depth-first scan for features overlapping a range. + * It is used here as a 'catch-all' fallback for features that cannot be put + * into a simple ordered list without invalidating the search methods. + */ + NCList nestedFeatures; + + /** + * Constructor + */ + public FeatureStore() + { + nonNestedFeatures = new ArrayList(); + // we only construct contactFeatures and the NCList if we need to + } + + /** + * Add one entry to the data store + * + * @param feature + */ + public void addFeature(SequenceFeature feature) + { + if (feature.isContactFeature()) + { + addContactFeature(feature); + } + else + { + boolean added = addNonNestedFeature(feature); + if (!added) + { + /* + * detected a nested feature - put it in the NCList structure + */ + addNestedFeature(feature); + } + } + } + + /** + * Adds one feature to the NCList that can manage nested features (creating + * the NCList if necessary) + */ + protected synchronized void addNestedFeature(SequenceFeature feature) + { + if (nestedFeatures == null) + { + nestedFeatures = new NCList(feature); + } + else + { + nestedFeatures.add(feature); + } + } + + /** + * Add a feature to the list of non-nested features, maintaining the ordering + * of the list. A check is made for whether the feature is nested in (properly + * contained by) an existing feature. If there is no nesting, the feature is + * added to the list and the method returns true. If nesting is found, the + * feature is not added and the method returns false. + *

+ * Contact features are added at the position of their first contact point + * + * @param feature + * @return + */ + protected boolean addNonNestedFeature(SequenceFeature feature) + { + synchronized (nonNestedFeatures) + { + int insertPosition = binarySearchForAdd(nonNestedFeatures, feature); + + /* + * fail if we detect feature enclosure - of the new feature by + * the one preceding it, or of the next feature by the new one + */ + if (insertPosition > 0) + { + if (encloses(nonNestedFeatures.get(insertPosition - 1), feature)) + { + return false; + } + } + if (insertPosition < nonNestedFeatures.size()) + { + if (encloses(feature, nonNestedFeatures.get(insertPosition))) + { + return false; + } + } + + /* + * checks passed - add or append the feature + */ + if (insertPosition == nonNestedFeatures.size()) + { + nonNestedFeatures.add(feature); + } + else + { + nonNestedFeatures.add(insertPosition, feature); + } + return true; + } + } + + /** + * Answers true if range1 properly encloses range2, else false + * + * @param range1 + * @param range2 + * @return + */ + protected boolean encloses(ContiguousI range1, ContiguousI range2) + { + int begin1 = range1.getBegin(); + int begin2 = range2.getBegin(); + int end1 = range1.getEnd(); + int end2 = range2.getEnd(); + if (begin1 == begin2 && end1 > end2) + { + return true; + } + if (begin1 < begin2 && end1 >= end2) + { + return true; + } + return false; + } + + /** + * Answers the index of the first element in the given list which follows or + * matches the given feature in the sort order. If no such element, answers + * the length of the list. + * + * @param list + * @param feature + * + * @return + */ + protected int binarySearchForAdd(List list, SequenceFeature feature) + { + // TODO binary search! + int i = 0; + while (i < list.size()) + { + if (startOrdering.compare(nonNestedFeatures.get(i), feature) >= 0) + { + break; + } + i++; + } + return i; + } + + /** + * Add a contact feature to the lists that hold them ordered by start (first + * contact) and by end (second contact) position, ensuring the lists remain + * ordered + * + * @param feature + */ + protected synchronized void addContactFeature(SequenceFeature feature) + { + // TODO binary search for insertion points! + if (contactFeatureStarts == null) + { + contactFeatureStarts = new ArrayList(); + } + if (contactFeatureEnds == null) + { + contactFeatureEnds = new ArrayList(); + } + contactFeatureStarts.add(feature); + Collections.sort(contactFeatureStarts, startOrdering); + contactFeatureEnds.add(feature); + Collections.sort(contactFeatureEnds, endOrdering); + } + + /** + * Returns a (possibly empty) list of entries whose range overlaps the given + * range. The returned list is not ordered. Contact features are included if + * either of the contact points lies within the range. + * + * @param start + * start position of overlap range (inclusive) + * @param end + * end position of overlap range (inclusive) + * @return + */ + public List findOverlappingFeatures(long start, long end) + { + List result = new ArrayList(); + + findNonNestedFeatures(start, end, result); + + findContactFeatures(start, end, result); + + if (nestedFeatures != null) + { + result.addAll(nestedFeatures.findOverlaps(start, end)); + } + + return result; + } + + /** + * Adds contact features to the result list where either the second or the + * first contact position lies within the target range. + * + * @param from + * @param to + * @param result + */ + protected void findContactFeatures(long from, long to, + List result) + { + if (contactFeatureStarts != null) + { + findContactStartFeatures(from, to, result); + } + if (contactFeatureEnds != null) + { + findContactEndFeatures(from, to, result); + } + } + + /** + * @param from + * @param to + * @param result + */ + protected void findContactEndFeatures(long from, long to, + List result) + { + // TODO binary search for startPosition + for (int startPosition = 0; startPosition < contactFeatureEnds.size(); startPosition++) + { + SequenceFeature sf = contactFeatureEnds.get(startPosition); + if (!sf.isContactFeature()) + { + System.err.println("Error! non-contact feature type " + + sf.getType() + " in contact features list"); + continue; + } + int begin = sf.getBegin(); + if (begin >= from && begin <= to) + { + /* + * this feature's first contact position lies in the search range + * so we don't include it in results a second time + */ + continue; + } + int end = sf.getEnd(); + if (end >= from && end <= to) + { + result.add(sf); + } + } + } + + /** + * Returns the index of the first contact feature found whose end (second + * contact position) is not before the given start position. If no such + * feature is found, returns the length of the contact features list. + * + * @param start + * @return + */ + protected int contactsBinarySearch(long start) + { + // TODO binary search!! + int i = 0; + while (i < contactFeatureEnds.size()) + { + if (contactFeatureEnds.get(i).getEnd() >= start) + { + break; + } + i++; + } + + return i; + } + + /** + * Adds features to the result list that are at a single position which lies + * within the target range. Non-positional features (start=end=0) and contact + * features are excluded. + * + * @param from + * @param to + * @param result + */ + protected void findNonNestedFeatures(long from, long to, + List result) + { + int startIndex = binarySearch(nonNestedFeatures, from); + findNonNestedFeatures(startIndex, from, to, result); + } + + /** + * Scans the list of non-nested features, starting from startIndex, to find + * those that overlap the from-to range, and adds them to the result list. + * Returns the index of the first feature whose start position is after the + * target range (or the length of the whole list if none such feature exists). + * + * @param startIndex + * @param from + * @param to + * @param result + * @return + */ + protected int findNonNestedFeatures(final int startIndex, long from, + long to, + List result) + { + int i = startIndex; + while (i < nonNestedFeatures.size()) + { + SequenceFeature sf = nonNestedFeatures.get(i); + if (sf.getBegin() > to) + { + break; + } + int start = sf.getBegin(); + int end = sf.getEnd(); + if (sf.isContactFeature()) + { + end = start; + } + if (start <= to && end >= from) + { + result.add(sf); + } + i++; + } + return i; + } + + /** + * Performs a binary search of the (sorted) list to find the index of the + * first entry whose end position is not less than the target position (i.e. + * skip all features that properly precede the given position) + * + * @param features + * @param target + * @return + */ + protected int binarySearch(List features, long target) + { + int width = features.size() / 2; + int lastpos = width; + while (width > 0) + { + int end = features.get(lastpos).getEnd(); + width = width / 2; + if (end > target) + { + lastpos -= width; + } + else + { + lastpos += width; + } + } + // todo correct binary search + return lastpos > 1 ? lastpos - 2 : 0; + // return lastpos; + } + + /** + * Adds contact features whose start position lies in the from-to range to the + * result list + * + * @param from + * @param to + * @param result + */ + protected void findContactStartFeatures(long from, long to, + List result) + { + // TODO binary search for startPosition + for (int startPosition = 0; startPosition < contactFeatureStarts.size(); startPosition++) + { + SequenceFeature sf = contactFeatureStarts.get(startPosition); + if (!sf.isContactFeature()) + { + System.err.println("Error! non-contact feature type " + + sf.getType() + " in contact features list"); + continue; + } + int begin = sf.getBegin(); + if (begin >= from && begin <= to) + { + result.add(sf); + } + } + } +} diff --git a/src/jalview/datamodel/features/NCList.java b/src/jalview/datamodel/features/NCList.java new file mode 100644 index 0000000..7046670 --- /dev/null +++ b/src/jalview/datamodel/features/NCList.java @@ -0,0 +1,324 @@ +package jalview.datamodel.features; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * An adapted implementation of NCList as described in the paper + * + *

+ * todo
+ * 
+ */ +public class NCList +{ + + /* + * a list, in start position order, of sublists of ranges ordered so + * that each contains (or is the same as) the one that follows it + */ + private List> subranges; + + /* + * a comparator to sort intervals by start position ascending, with + * longer (enclosing) intervals preceding those they enclose + */ + Comparator intervalSorter = new RangeComparator(true); + + /** + * Constructor given a list of things that are each located on a contiguous + * interval. Note that the constructor may reorder the list. + *

+ * We assume here that for each range, start <= end. Behaviour for reverse + * ordered ranges is undefined. + * + * @param ranges + */ + public NCList(List ranges) + { + build(ranges); + } + + /** + * @param ranges + */ + protected void build(List ranges) + { + /* + * sort by start ascending so that contained intervals + * follow their containing interval + */ + Collections.sort(ranges, intervalSorter); + + List sublists = findSubranges(ranges); + + subranges = new ArrayList>(); + + /* + * convert each subrange to an NCNode consisting of a range and + * (possibly) its contained NCList + */ + for (SubList sublist : sublists) + { + subranges.add(new NCNode(ranges.subList(sublist.startIndex, + sublist.endIndex + 1))); + } + sublists.clear(); + } + + public NCList(T entry) + { + List ranges = new ArrayList(); + ranges.add(entry); + build(ranges); + } + + /** + * Traverses the sorted ranges to identify sublists, within which each + * interval contains the one that follows it + * + * @param ranges + * @return + */ + protected List findSubranges(List ranges) + { + List sublists = new ArrayList(); + + int listStartIndex = 0; + long lastEndPos = Long.MAX_VALUE; + + for (int i = 0; i < ranges.size(); i++) + { + ContiguousI nextInterval = ranges.get(i); + long nextStart = nextInterval.getBegin(); + long nextEnd = nextInterval.getEnd(); + if (nextStart > lastEndPos || nextEnd > lastEndPos) + { + /* + * this interval is not contained in the preceding one + * close off the last sublist + */ + sublists.add(new SubList(listStartIndex, i - 1)); + listStartIndex = i; + } + lastEndPos = nextEnd; + } + + sublists.add(new SubList(listStartIndex, ranges.size() - 1)); + return sublists; + } + + /** + * Adds one entry to the stored set + * + * @param entry + */ + public synchronized void add(T entry) + { + long start = entry.getBegin(); + long end = entry.getEnd(); + + /* + * cases: + * - precedes all subranges: add as NCNode on front of list + * - follows all subranges: add as NCNode on end of list + * - enclosed by a subrange - add recursively to subrange + * - encloses one or more subranges - push them inside it + * - none of the above - add as a new node and resort nodes list (?) + */ + + /* + * find the first subrange whose end does not precede entry's start + */ + int candidateIndex = findFirstOverlap(start); + if (candidateIndex == -1) + { + /* + * all subranges precede this one - add it on the end + */ + subranges.add(new NCNode(entry)); + return; + } + + /* + * search for maximal span of subranges i-k that the new entry + * encloses; or a subrange that encloses the new entry + */ + int i = candidateIndex; + boolean enclosing = false; + boolean overlapping = false; + + for (int j = candidateIndex; j < subranges.size(); j++) + { + NCNode subrange = subranges.get(j); + + if (end < subrange.getStart() && !overlapping) + { + /* + * new entry lies between subranges j-1 j + */ + subranges.add(j, new NCNode(entry)); + return; + } + + if (subrange.getStart() <= start && subrange.getEnd() >= end) + { + /* + * push new entry inside this subrange as it encloses it + */ + subrange.add(entry); + return; + } + + if (start <= subrange.getStart()) + { + if (end >= subrange.getEnd()) + { + /* + * new entry encloses this subrange (and possibly preceding ones); + * continue to find the maximal list it encloses + */ + enclosing = true; + continue; + } + else + { + /* + * entry spans from before this subrange to inside it + */ + if (enclosing) + { + /* + * entry encloses one or more preceding subranges + */ + addEnclosingRange(entry, i, j - 1); + return; + } + else + { + /* + * entry spans two subranges but doesn't enclose any + * so just add it + */ + subranges.add(j, new NCNode(entry)); + return; + } + } + } + } + } + + /** + * Update the tree so that the range of the new entry encloses subranges i to + * j (inclusive). That is, replace subranges i-j with a new subrange that + * contains them. + * + * @param entry + * @param i + * @param j + */ + protected synchronized void addEnclosingRange(T entry, int i, int j) + { + // TODO how to rebuild the subranges as an NCList? + + } + + /** + * Returns a (possibly empty) list of items whose extent overlaps the given + * range + * + * @param from + * start of overlap range (inclusive) + * @param to + * end of overlap range (inclusive) + * @return + */ + public List findOverlaps(long from, long to) + { + List result = new ArrayList(); + + findOverlaps(from, to, result); + + return result; + } + + /** + * Recursively searches the NCList adding any items that overlap the from-to + * range to the result list + * + * @param from + * @param to + * @param result + */ + protected void findOverlaps(long from, long to, + List result) + { + /* + * find the first sublist that might overlap, i.e. + * the first whose end position is >= from + */ + int candidateIndex = findFirstOverlap(from); + + if (candidateIndex == -1) + { + return; + } + + for (int i = candidateIndex; i < subranges.size(); i++) + { + NCNode candidate = subranges.get(i); + if (candidate.getStart() > to) + { + /* + * we are past the end of our target range + */ + break; + } + candidate.addOverlaps(from, to, result); + } + + } + + /** + * Search subranges for the first one whose end position is not before the + * target range's start position, i.e. the first one that may overlap the + * target range. Returns the index in the list of the first such range found, + * or -1 if none found. + * + * @param from + * @return + */ + protected int findFirstOverlap(long from) + { + // TODO binary search + // for now quick cheat linear search + int i = 0; + if (subranges != null) + { + for (NCNode subrange : subranges) + { + if (subrange.getEnd() >= from) + { + return i; + } + i++; + } + } + return -1; + } + + /** + * Formats the tree as a bracketed list e.g. + * + *

+   * [1-100 [10-30 [10-20]], 15-30 [20-20]]
+   * 
+ */ + @Override + public String toString() + { + return subranges.toString(); + } +} diff --git a/src/jalview/datamodel/features/NCNode.java b/src/jalview/datamodel/features/NCNode.java new file mode 100644 index 0000000..d4c7b0c --- /dev/null +++ b/src/jalview/datamodel/features/NCNode.java @@ -0,0 +1,124 @@ +package jalview.datamodel.features; + +import java.util.ArrayList; +import java.util.List; + +/** + * Each node of the NCList tree consists of a range, and (optionally) the NCList + * of ranges it encloses + * + * @param + */ +class NCNode +{ + /* + * deep size (number of ranges included) + */ + private int size; + + private V region; + + private NCList subregions; + + /** + * Constructor + * + * @param ranges + */ + NCNode(List ranges) + { + build(ranges); + } + + /** + * Constructo + * + * @param range + */ + NCNode(V range) + { + List ranges = new ArrayList(); + ranges.add(range); + build(ranges); + } + + /** + * @param ranges + */ + protected void build(List ranges) + { + size = ranges.size(); + + if (!ranges.isEmpty()) + { + region = ranges.get(0); + } + if (ranges.size() > 1) + { + subregions = new NCList(ranges.subList(1, ranges.size())); + } + } + + int getStart() + { + return region.getBegin(); + } + + int getEnd() + { + return region.getEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(10 * size); + sb.append(region.getBegin()).append("-").append(region.getEnd()); + if (subregions != null) + { + sb.append(" ").append(subregions.toString()); + } + return sb.toString(); + } + + /** + * Add any ranges that overlap the from-to range to the result list + * + * @param from + * @param to + * @param result + */ + void addOverlaps(long from, long to, List result) + { + if (region.getBegin() <= to && region.getEnd() >= from) + { + result.add(region); + } + if (subregions != null) + { + subregions.findOverlaps(from, to, result); + } + } + + /** + * Add one range to this subrange + * + * @param entry + */ + public synchronized void add(V entry) + { + if (entry.getBegin() < region.getBegin() || entry.getEnd() > region.getEnd()) { + throw new IllegalArgumentException(String.format( + "adding improper subrange %d-%d to range %d-%d", + entry.getBegin(), entry.getEnd(), region.getBegin(), + region.getEnd())); + } + if (subregions == null) + { + subregions = new NCList(entry); + } + else + { + subregions.add(entry); + } + } +} diff --git a/src/jalview/datamodel/features/RangeComparator.java b/src/jalview/datamodel/features/RangeComparator.java new file mode 100644 index 0000000..57e9b5f --- /dev/null +++ b/src/jalview/datamodel/features/RangeComparator.java @@ -0,0 +1,69 @@ +package jalview.datamodel.features; + +import java.util.Comparator; + +/** + * A comparator that orders ranges by either start position or end position + * ascending. If the position matches, + * + * @author gmcarstairs + * + */ +public class RangeComparator implements Comparator +{ + boolean byStart; + + @Override + public int compare(ContiguousI o1, ContiguousI o2) + { + int len1 = o1.getEnd() - o1.getBegin(); + int len2 = o2.getEnd() - o2.getBegin(); + + if (byStart) + { + return compare(o1.getBegin(), o2.getBegin(), len1, len2); + } + else + { + return compare(o1.getEnd(), o2.getEnd(), len1, len2); + } + } + + /** + * Compares two ranges for ordering + * + * @param pos1 + * first range positional ordering criterion + * @param pos2 + * second range positional ordering criterion + * @param len1 + * first range length ordering criterion + * @param len2 + * second range length ordering criterion + * @return + */ + public int compare(long pos1, long pos2, int len1, int len2) + { + int order = Long.compare(pos1, pos2); + if (order == 0) + { + /* + * if tied on position order, longer length sorts to left + * i.e. the negation of normal ordering by length + */ + order = -Integer.compare(len1, len2); + } + return order; + } + + /** + * Constructor + * + * @param byStartPosition + * if true, order based on start position, if false by end position + */ + public RangeComparator(boolean byStartPosition) + { + byStart = byStartPosition; + } +} diff --git a/src/jalview/datamodel/features/SequenceFeatures.java b/src/jalview/datamodel/features/SequenceFeatures.java new file mode 100644 index 0000000..d177566 --- /dev/null +++ b/src/jalview/datamodel/features/SequenceFeatures.java @@ -0,0 +1,60 @@ +package jalview.datamodel.features; + +import jalview.datamodel.SequenceFeature; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class SequenceFeatures +{ + + /* + * map from feature type to structured store of features for that type + */ + private Map featureStore; + + /** + * Constructor + */ + public SequenceFeatures() + { + featureStore = new HashMap(); + } + + /** + * Add one sequence feature to the store + * + * @param sf + */ + public void add(SequenceFeature sf) + { + String type = sf.getType(); + if (featureStore.get(type) == null) + { + featureStore.put(type, new FeatureStore()); + } + featureStore.get(type).addFeature(sf); + } + + /** + * Returns a (possibly empty) list of features of the given type which overlap + * the (inclusive) sequence position range + * + * @param type + * @param from + * @param to + * @return + */ + public List findFeatures(String type, int from, + int to) + { + FeatureStore features = featureStore.get(type); + if (features == null) + { + return Collections.emptyList(); + } + return features.findOverlappingFeatures(from, to); + } +} diff --git a/src/jalview/datamodel/features/SubList.java b/src/jalview/datamodel/features/SubList.java new file mode 100644 index 0000000..ace0240 --- /dev/null +++ b/src/jalview/datamodel/features/SubList.java @@ -0,0 +1,27 @@ +package jalview.datamodel.features; + +/** + * SubList holds the start and end index of a run of intervals where each is + * contained by (or the same as) the one preceding it. + *

+ * SubList is used during the construction of an NCList + */ +class SubList +{ + /* + * the index of the sublist's first interval in the 'intervals' list + */ + int startIndex; + + /* + * the index of the sublist's last interval in the 'intervals' list + */ + int endIndex; + + SubList(int from, int to) + { + startIndex = from; + endIndex = to; + } + +} \ No newline at end of file diff --git a/src/jalview/gui/OverviewPanel.java b/src/jalview/gui/OverviewPanel.java index 1c48690..36a4575 100755 --- a/src/jalview/gui/OverviewPanel.java +++ b/src/jalview/gui/OverviewPanel.java @@ -349,6 +349,7 @@ public class OverviewPanel extends JPanel implements Runnable continue; } + // long start = System.currentTimeMillis(); for (col = 0; col < width; col++) { if (resizeAgain) @@ -388,6 +389,8 @@ public class OverviewPanel extends JPanel implements Runnable miniMe.setRGB(col, row, color); } + // System.out.println(String.format("Row %d with width %d took %dms", + // row, width, System.currentTimeMillis() - start)); } if (av.getAlignmentConservationAnnotation() != null) diff --git a/src/jalview/gui/SeqCanvas.java b/src/jalview/gui/SeqCanvas.java index d015292..9bd4a04 100755 --- a/src/jalview/gui/SeqCanvas.java +++ b/src/jalview/gui/SeqCanvas.java @@ -732,7 +732,8 @@ public class SeqCanvas extends JComponent if (av.isShowSequenceFeatures()) { - fr.drawSequence(g, nextSeq, startRes, endRes, offset + fr.drawSequence(g, nextSeq, startRes, + endRes, offset + ((i - startSeq) * charHeight)); } diff --git a/src/jalview/renderer/seqfeatures/FeatureRenderer.java b/src/jalview/renderer/seqfeatures/FeatureRenderer.java index 9e0089f..32a0cbb 100644 --- a/src/jalview/renderer/seqfeatures/FeatureRenderer.java +++ b/src/jalview/renderer/seqfeatures/FeatureRenderer.java @@ -31,6 +31,7 @@ import java.awt.FontMetrics; import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.image.BufferedImage; +import java.util.List; public class FeatureRenderer extends FeatureRendererModel { @@ -340,9 +341,14 @@ public class FeatureRenderer extends FeatureRendererModel // loop through all features in sequence to find // current feature to render - for (sfindex = 0; sfindex < sfSize; sfindex++) + // for (sfindex = 0; sfindex < sfSize; sfindex++) + // { + // final SequenceFeature sequenceFeature = lastSequenceFeatures[sfindex]; + int from = offscreenRender ? start : spos; + int to = offscreenRender ? start : epos; + List overlaps = seq.findFeatures(type, from, to); + for (SequenceFeature sequenceFeature : overlaps) { - final SequenceFeature sequenceFeature = lastSequenceFeatures[sfindex]; if (!sequenceFeature.type.equals(type)) { continue; diff --git a/test/jalview/datamodel/features/FeatureStoreTest.java b/test/jalview/datamodel/features/FeatureStoreTest.java new file mode 100644 index 0000000..e355aaf --- /dev/null +++ b/test/jalview/datamodel/features/FeatureStoreTest.java @@ -0,0 +1,232 @@ +package jalview.datamodel.features; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import jalview.datamodel.SequenceFeature; + +import java.util.List; + +import org.testng.annotations.Test; + +public class FeatureStoreTest +{ + + @Test(groups = "Functional") + public void testFindFeatures_nonNested() + { + FeatureStore fs = new FeatureStore(); + fs.addFeature(new SequenceFeature("", "", 10, 20, Float.NaN, + null)); + fs.addFeature(new SequenceFeature("", "", 10, 20, Float.NaN, null)); + fs.addFeature(new SequenceFeature("", "", 15, 25, Float.NaN, null)); + fs.addFeature(new SequenceFeature("", "", 20, 35, Float.NaN, null)); + + List overlaps = fs.findOverlappingFeatures(1, 9); + assertTrue(overlaps.isEmpty()); + + overlaps = fs.findOverlappingFeatures(8, 10); + assertEquals(overlaps.size(), 2); + assertEquals(overlaps.get(0).getEnd(), 20); + assertEquals(overlaps.get(1).getEnd(), 20); + + overlaps = fs.findOverlappingFeatures(12, 16); + assertEquals(overlaps.size(), 3); + assertEquals(overlaps.get(0).getEnd(), 20); + assertEquals(overlaps.get(1).getEnd(), 20); + assertEquals(overlaps.get(2).getEnd(), 25); + + overlaps = fs.findOverlappingFeatures(33, 33); + assertEquals(overlaps.size(), 1); + assertEquals(overlaps.get(0).getEnd(), 35); + } + + @Test(groups = "Functional") + public void testFindFeatures_nested() + { + FeatureStore fs = new FeatureStore(); + SequenceFeature sf1 = addFeature(fs, 10, 50); + SequenceFeature sf2 = addFeature(fs, 10, 40); + SequenceFeature sf3 = addFeature(fs, 20, 30); + SequenceFeature sf4 = addFeature(fs, 20, 30); + SequenceFeature sf5 = addFeature(fs, 35, 36); + + List overlaps = fs.findOverlappingFeatures(1, 9); + assertTrue(overlaps.isEmpty()); + + overlaps = fs.findOverlappingFeatures(10, 15); + assertEquals(overlaps.size(), 2); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + + overlaps = fs.findOverlappingFeatures(45, 60); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf1)); + + overlaps = fs.findOverlappingFeatures(32, 38); + assertEquals(overlaps.size(), 3); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + assertTrue(overlaps.contains(sf5)); + + overlaps = fs.findOverlappingFeatures(15, 25); + assertEquals(overlaps.size(), 4); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + assertTrue(overlaps.contains(sf3)); + assertTrue(overlaps.contains(sf4)); + } + + @Test(groups = "Functional") + public void testFindFeatures_mixed() + { + FeatureStore fs = new FeatureStore(); + SequenceFeature sf1 = addFeature(fs, 10, 50); + SequenceFeature sf2 = addFeature(fs, 1, 15); + SequenceFeature sf3 = addFeature(fs, 20, 30); + SequenceFeature sf4 = addFeature(fs, 40, 100); + SequenceFeature sf5 = addFeature(fs, 60, 100); + SequenceFeature sf6 = addFeature(fs, 70, 70); + + List overlaps = fs.findOverlappingFeatures(200, 200); + assertTrue(overlaps.isEmpty()); + + overlaps = fs.findOverlappingFeatures(1, 9); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf2)); + + overlaps = fs.findOverlappingFeatures(5, 18); + assertEquals(overlaps.size(), 2); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + + overlaps = fs.findOverlappingFeatures(30, 40); + assertEquals(overlaps.size(), 3); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf3)); + assertTrue(overlaps.contains(sf4)); + + overlaps = fs.findOverlappingFeatures(80, 90); + assertEquals(overlaps.size(), 2); + assertTrue(overlaps.contains(sf4)); + assertTrue(overlaps.contains(sf5)); + + overlaps = fs.findOverlappingFeatures(68, 70); + assertEquals(overlaps.size(), 3); + assertTrue(overlaps.contains(sf4)); + assertTrue(overlaps.contains(sf5)); + assertTrue(overlaps.contains(sf6)); + } + + /** + * Helper method to add a feature of no particular type + * + * @param fs + * @param from + * @param to + * @return + */ + SequenceFeature addFeature(FeatureStore fs, int from, int to) + { + SequenceFeature sf1 = new SequenceFeature("", "", from, to, Float.NaN, + null); + fs.addFeature(sf1); + return sf1; + } + + @Test(groups = "Functional") + public void testFindFeatures_contactFeatures() + { + FeatureStore fs = new FeatureStore(); + + SequenceFeature sf = new SequenceFeature("disulphide bond", "bond", 10, + 20, Float.NaN, null); + fs.addFeature(sf); + + /* + * neither contact point in range + */ + List overlaps = fs.findOverlappingFeatures(1, 9); + assertTrue(overlaps.isEmpty()); + + /* + * neither contact point in range + */ + overlaps = fs.findOverlappingFeatures(11, 19); + assertTrue(overlaps.isEmpty()); + + /* + * first contact point in range + */ + overlaps = fs.findOverlappingFeatures(5, 15); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf)); + + /* + * second contact point in range + */ + overlaps = fs.findOverlappingFeatures(15, 25); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf)); + + /* + * both contact points in range + */ + overlaps = fs.findOverlappingFeatures(5, 25); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf)); + } + + /** + * Tests for the method that returns false for an attempt to add a feature + * that would enclose, or be enclosed by, another feature + */ + @Test(groups = "Functional") + public void testAddNonNestedFeature() + { + FeatureStore fs = new FeatureStore(); + + String type = "Domain"; + SequenceFeature sf1 = new SequenceFeature(type, type, 10, 20, + Float.NaN, null); + assertTrue(fs.addNonNestedFeature(sf1)); + + // co-located feature is ok + SequenceFeature sf2 = new SequenceFeature(type, type, 10, 20, + Float.NaN, null); + assertTrue(fs.addNonNestedFeature(sf2)); + + // overlap left is ok + SequenceFeature sf3 = new SequenceFeature(type, type, 5, 15, Float.NaN, + null); + assertTrue(fs.addNonNestedFeature(sf3)); + + // overlap right is ok + SequenceFeature sf4 = new SequenceFeature(type, type, 15, 25, + Float.NaN, null); + assertTrue(fs.addNonNestedFeature(sf4)); + + // add enclosing feature is not ok + SequenceFeature sf5 = new SequenceFeature(type, type, 10, 21, + Float.NaN, null); + assertFalse(fs.addNonNestedFeature(sf5)); + SequenceFeature sf6 = new SequenceFeature(type, type, 4, 15, Float.NaN, + null); + assertFalse(fs.addNonNestedFeature(sf6)); + SequenceFeature sf7 = new SequenceFeature(type, type, 1, 50, Float.NaN, + null); + assertFalse(fs.addNonNestedFeature(sf7)); + + // add enclosed feature is not ok + SequenceFeature sf8 = new SequenceFeature(type, type, 10, 19, + Float.NaN, null); + assertFalse(fs.addNonNestedFeature(sf8)); + SequenceFeature sf9 = new SequenceFeature(type, type, 16, 25, + Float.NaN, null); + assertFalse(fs.addNonNestedFeature(sf9)); + SequenceFeature sf10 = new SequenceFeature(type, type, 7, 7, Float.NaN, + null); + assertFalse(fs.addNonNestedFeature(sf10)); + } +} diff --git a/test/jalview/datamodel/features/NCListTest.java b/test/jalview/datamodel/features/NCListTest.java new file mode 100644 index 0000000..38227c1 --- /dev/null +++ b/test/jalview/datamodel/features/NCListTest.java @@ -0,0 +1,158 @@ +package jalview.datamodel.features; + +import static org.testng.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.testng.annotations.Test; + +public class NCListTest +{ + class Range implements ContiguousI + { + int start; + + int end; + + @Override + public int getBegin() + { + return start; + } + + @Override + public int getEnd() + { + return end; + } + + Range(int i, int j) + { + start = i; + end = j; + } + + @Override + public String toString() { + return String.valueOf(start) + "-" + String.valueOf(end); + } + } + + /** + * A basic sanity test of the constructor + */ + @Test(groups = "Functional") + public void testConstructor() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 20)); + ranges.add(new Range(10, 20)); + ranges.add(new Range(15, 30)); + ranges.add(new Range(10, 30)); + ranges.add(new Range(11, 19)); + ranges.add(new Range(10, 20)); + ranges.add(new Range(1, 100)); + + NCList ncl = new NCList(ranges); + String expected = "[1-100 [10-30 [10-20 [10-20 [11-19]]]], 15-30 [20-20]]"; + assertEquals(ncl.toString(), expected); + + Collections.reverse(ranges); + ncl = new NCList(ranges); + assertEquals(ncl.toString(), expected); + } + + @Test(groups = "Functional") + public void testFindOverlaps() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 50)); + ranges.add(new Range(30, 70)); + ranges.add(new Range(1, 100)); + ranges.add(new Range(70, 120)); + + NCList ncl = new NCList(ranges); + + List overlaps = ncl.findOverlaps(121, 122); + assertEquals(overlaps.size(), 0); + + overlaps = ncl.findOverlaps(21, 22); + assertEquals(overlaps.size(), 2); + assertEquals(((ContiguousI) overlaps.get(0)).getBegin(), 1); + assertEquals(((ContiguousI) overlaps.get(0)).getEnd(), 100); + assertEquals(((ContiguousI) overlaps.get(1)).getBegin(), 20); + assertEquals(((ContiguousI) overlaps.get(1)).getEnd(), 50); + + overlaps = ncl.findOverlaps(110, 110); + assertEquals(overlaps.size(), 1); + assertEquals(((ContiguousI) overlaps.get(0)).getBegin(), 70); + assertEquals(((ContiguousI) overlaps.get(0)).getEnd(), 120); + } + + @Test(groups = "Functional") + public void testAdd_onTheEnd() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 50)); + NCList ncl = new NCList(ranges); + assertEquals(ncl.toString(), "[20-50]"); + + ncl.add(new Range(60, 70)); + assertEquals(ncl.toString(), "[20-50, 60-70]"); + } + + @Test(groups = "Functional") + public void testAdd_inside() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 50)); + NCList ncl = new NCList(ranges); + assertEquals(ncl.toString(), "[20-50]"); + + ncl.add(new Range(30, 40)); + assertEquals(ncl.toString(), "[20-50 [30-40]]"); + } + + @Test(groups = "Functional") + public void testAdd_onTheFront() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 50)); + NCList ncl = new NCList(ranges); + assertEquals(ncl.toString(), "[20-50]"); + + ncl.add(new Range(5, 15)); + assertEquals(ncl.toString(), "[5-15, 20-50]"); + } + + @Test(groups = "Functional") + public void testAdd_enclosing() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 50)); + ranges.add(new Range(30, 60)); + NCList ncl = new NCList(ranges); + assertEquals(ncl.toString(), "[20-50, 30-60]"); + + ncl.add(new Range(10, 70)); + assertEquals(ncl.toString(), "[10-70 [20-50, 30-60]"); + } + + @Test(groups = "Functional") + public void testAdd_spanning() + { + List ranges = new ArrayList(); + ranges.add(new Range(20, 40)); + ranges.add(new Range(60, 70)); + NCList ncl = new NCList(ranges); + assertEquals(ncl.toString(), "[20-40, 60-70]"); + + ncl.add(new Range(30, 50)); + assertEquals(ncl.toString(), "[20-40, 30-50, 60-70]"); + + ncl.add(new Range(40, 65)); + assertEquals(ncl.toString(), "[20-40, 30-50, 40-65, 60-70]"); + } +} diff --git a/test/jalview/datamodel/features/RangeComparatorTest.java b/test/jalview/datamodel/features/RangeComparatorTest.java new file mode 100644 index 0000000..6f22add --- /dev/null +++ b/test/jalview/datamodel/features/RangeComparatorTest.java @@ -0,0 +1,84 @@ +package jalview.datamodel.features; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + +public class RangeComparatorTest +{ + class Range implements ContiguousI + { + int begin; + + int end; + + @Override + public int getBegin() + { + return begin; + } + + @Override + public int getEnd() + { + return end; + } + + Range(int i, int j) + { + begin = i; + end = j; + } + } + + @Test(groups = "Functional") + public void testCompare() + { + RangeComparator comp = new RangeComparator(true); + + // same position, same length + assertEquals(comp.compare(10, 10, 20, 20), 0); + // same position, len1 > len2 + assertEquals(comp.compare(10, 10, 20, 19), -1); + // same position, len1 < len2 + assertEquals(comp.compare(10, 10, 20, 21), 1); + // pos1 > pos2 + assertEquals(comp.compare(11, 10, 20, 20), 1); + // pos1 < pos2 + assertEquals(comp.compare(10, 11, 20, 10), -1); + } + + @Test(groups = "Functional") + public void testCompare_byStart() + { + RangeComparator comp = new RangeComparator(true); + + // same start position, same length + assertEquals(comp.compare(new Range(10, 20), new Range(10, 20)), 0); + // same start position, len1 > len2 + assertEquals(comp.compare(new Range(10, 20), new Range(10, 19)), -1); + // same start position, len1 < len2 + assertEquals(comp.compare(new Range(10, 18), new Range(10, 20)), 1); + // pos1 > pos2 + assertEquals(comp.compare(new Range(11, 20), new Range(10, 20)), 1); + // pos1 < pos2 + assertEquals(comp.compare(new Range(10, 20), new Range(11, 20)), -1); + } + + @Test(groups = "Functional") + public void testCompare_byEnd() + { + RangeComparator comp = new RangeComparator(false); + + // same end position, same length + assertEquals(comp.compare(new Range(10, 20), new Range(10, 20)), 0); + // same end position, len1 > len2 + assertEquals(comp.compare(new Range(10, 20), new Range(11, 20)), -1); + // same end position, len1 < len2 + assertEquals(comp.compare(new Range(11, 20), new Range(10, 20)), 1); + // end1 > end2 + assertEquals(comp.compare(new Range(10, 21), new Range(10, 20)), 1); + // end1 < end2 + assertEquals(comp.compare(new Range(10, 20), new Range(10, 21)), -1); + } +} -- 1.7.10.2