X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FNCList.java;h=b8160d3c7077516d528477ed10a3ea53edc2470a;hb=c55e1fa5d2782cc46e515c11a91cf502f37386ab;hp=40b062a687c1bea85c1884c8d936b41f06dbaa6e;hpb=0ea61b395605ecf1854406f8e0c9a0be04404cb1;p=jalview.git diff --git a/src/jalview/datamodel/features/NCList.java b/src/jalview/datamodel/features/NCList.java index 40b062a..b8160d3 100644 --- a/src/jalview/datamodel/features/NCList.java +++ b/src/jalview/datamodel/features/NCList.java @@ -1,15 +1,20 @@ package jalview.datamodel.features; +import jalview.datamodel.ContiguousI; +import jalview.datamodel.Range; + import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.List; /** * An adapted implementation of NCList as described in the paper * *
- * todo
+ * Nested Containment List (NCList): a new algorithm for accelerating
+ * interval query of genome alignment and interval databases
+ * - Alexander V. Alekseyenko, Christopher J. Lee
+ * https://doi.org/10.1093/bioinformatics/btl647
  * 
*/ public class NCList @@ -25,12 +30,6 @@ public class NCList */ private List> subranges; - /* - * a comparator to sort intervals by start position ascending, with - * longer (enclosing) intervals preceding those they enclose - */ - Comparator intervalSorter = new RangeComparator(true); - /** * Constructor given a list of things that are each located on a contiguous * interval. Note that the constructor may reorder the list. @@ -47,6 +46,9 @@ public class NCList } /** + * Sort and group ranges into sublists where each sublist represents a region + * and its contained subregions + * * @param ranges */ protected void build(List ranges) @@ -55,18 +57,18 @@ public class NCList * sort by start ascending so that contained intervals * follow their containing interval */ - Collections.sort(ranges, intervalSorter); + Collections.sort(ranges, RangeComparator.BY_START_POSITION); - List sublists = findSubranges(ranges); + List sublists = buildSubranges(ranges); /* * convert each subrange to an NCNode consisting of a range and * (possibly) its contained NCList */ - for (SubList sublist : sublists) + for (Range sublist : sublists) { - subranges.add(new NCNode(ranges.subList(sublist.startIndex, - sublist.endIndex + 1))); + subranges.add(new NCNode(ranges.subList(sublist.start, + sublist.end + 1))); } size = ranges.size(); @@ -75,9 +77,8 @@ public class NCList public NCList(T entry) { this(); - List ranges = new ArrayList(); - ranges.add(entry); - build(ranges); + subranges.add(new NCNode<>(entry)); + size = 1; } public NCList() @@ -92,10 +93,15 @@ public class NCList * @param ranges * @return */ - protected List findSubranges(List ranges) + protected List buildSubranges(List ranges) { - List sublists = new ArrayList(); + List sublists = new ArrayList<>(); + if (ranges.isEmpty()) + { + return sublists; + } + int listStartIndex = 0; long lastEndPos = Long.MAX_VALUE; @@ -110,23 +116,42 @@ public class NCList * this interval is not contained in the preceding one * close off the last sublist */ - sublists.add(new SubList(listStartIndex, i - 1)); + sublists.add(new Range(listStartIndex, i - 1)); listStartIndex = i; } lastEndPos = nextEnd; } - sublists.add(new SubList(listStartIndex, ranges.size() - 1)); + sublists.add(new Range(listStartIndex, ranges.size() - 1)); return sublists; } /** - * Adds one entry to the stored set + * Adds one entry to the stored set (with duplicates allowed) + * + * @param entry + */ + public void add(T entry) + { + add(entry, true); + } + + /** + * Adds one entry to the stored set, and returns true, unless allowDuplicates + * is set to false and it is already contained (by object equality test), in + * which case it is not added and this method returns false. * * @param entry + * @param allowDuplicates + * @return */ - public synchronized void add(T entry) + public synchronized boolean add(T entry, boolean allowDuplicates) { + if (!allowDuplicates && contains(entry)) + { + return false; + } + size++; long start = entry.getBegin(); long end = entry.getEnd(); @@ -149,8 +174,8 @@ public class NCList /* * all subranges precede this one - add it on the end */ - subranges.add(new NCNode(entry)); - return; + subranges.add(new NCNode<>(entry)); + return true; } /* @@ -166,25 +191,25 @@ public class NCList { NCNode subrange = subranges.get(j); - if (end < subrange.getStart() && !overlapping && !enclosing) + if (end < subrange.getBegin() && !overlapping && !enclosing) { /* * new entry lies between subranges j-1 j */ - subranges.add(j, new NCNode(entry)); - return; + subranges.add(j, new NCNode<>(entry)); + return true; } - if (subrange.getStart() <= start && subrange.getEnd() >= end) + if (subrange.getBegin() <= start && subrange.getEnd() >= end) { /* * push new entry inside this subrange as it encloses it */ subrange.add(entry); - return; + return true; } - if (start <= subrange.getStart()) + if (start <= subrange.getBegin()) { if (end >= subrange.getEnd()) { @@ -211,7 +236,7 @@ public class NCList * entry encloses one or more preceding subranges */ addEnclosingRange(entry, firstEnclosed, lastEnclosed); - return; + return true; } else { @@ -219,8 +244,8 @@ public class NCList * entry spans two subranges but doesn't enclose any * so just add it */ - subranges.add(j, new NCNode(entry)); - return; + subranges.add(j, new NCNode<>(entry)); + return true; } } } @@ -232,14 +257,61 @@ public class NCList /* * drops through to here if new range encloses all others + * or overlaps the last one */ if (enclosing) { addEnclosingRange(entry, firstEnclosed, lastEnclosed); } + else + { + subranges.add(new NCNode<>(entry)); + } + + return true; } /** + * Answers true if this NCList contains the given entry (by object equality + * test), else false + * + * @param entry + * @return + */ + public boolean contains(T entry) + { + /* + * find the first sublist that might overlap, i.e. + * the first whose end position is >= from + */ + int candidateIndex = findFirstOverlap(entry.getBegin()); + + if (candidateIndex == -1) + { + return false; + } + + int to = entry.getEnd(); + + for (int i = candidateIndex; i < subranges.size(); i++) + { + NCNode candidate = subranges.get(i); + if (candidate.getBegin() > to) + { + /* + * we are past the end of our target range + */ + break; + } + if (candidate.contains(entry)) + { + return true; + } + } + return false; + } + + /** * Update the tree so that the range of the new entry encloses subranges i to * j (inclusive). That is, replace subranges i-j (inclusive) with a new * subrange that contains them. @@ -251,9 +323,9 @@ public class NCList protected synchronized void addEnclosingRange(T entry, final int i, final int j) { - NCList newNCList = new NCList(); - newNCList.subranges.addAll(subranges.subList(i, j + 1)); - NCNode newNode = new NCNode(entry, newNCList); + NCList newNCList = new NCList<>(); + newNCList.addNodes(subranges.subList(i, j + 1)); + NCNode newNode = new NCNode<>(entry, newNCList); for (int k = j; k >= i; k--) { subranges.remove(k); @@ -261,6 +333,15 @@ public class NCList subranges.add(i, newNode); } + protected void addNodes(List> nodes) + { + for (NCNode node : nodes) + { + subranges.add(node); + size += node.size(); + } + } + /** * Returns a (possibly empty) list of items whose extent overlaps the given * range @@ -273,7 +354,7 @@ public class NCList */ public List findOverlaps(long from, long to) { - List result = new ArrayList(); + List result = new ArrayList<>(); findOverlaps(from, to, result); @@ -288,8 +369,7 @@ public class NCList * @param to * @param result */ - protected void findOverlaps(long from, long to, - List result) + protected void findOverlaps(long from, long to, List result) { /* * find the first sublist that might overlap, i.e. @@ -305,14 +385,14 @@ public class NCList for (int i = candidateIndex; i < subranges.size(); i++) { NCNode candidate = subranges.get(i); - if (candidate.getStart() > to) + if (candidate.getBegin() > to) { /* * we are past the end of our target range */ break; } - candidate.addOverlaps(from, to, result); + candidate.findOverlaps(from, to, result); } } @@ -328,8 +408,12 @@ public class NCList */ protected int findFirstOverlap(long from) { - // TODO binary search - // for now quick cheat linear search + /* + * The NCList paper describes binary search for this step, + * but this not implemented here as (a) I haven't understood it yet + * and (b) it seems to imply complications for adding to an NCList + */ + int i = 0; if (subranges != null) { @@ -359,7 +443,7 @@ public class NCList } /** - * Returns a string representation of the data where containment is shown bgy + * Returns a string representation of the data where containment is shown by * indentation on new lines * * @return @@ -370,6 +454,7 @@ public class NCList int offset = 0; int indent = 2; prettyPrint(sb, offset, indent); + sb.append(System.lineSeparator()); return sb.toString(); } @@ -420,7 +505,7 @@ public class NCList int lastStart = start; for (NCNode subrange : subranges) { - if (subrange.getStart() < lastStart) + if (subrange.getBegin() < lastStart) { System.err.println("error in NCList: range " + subrange.toString() + " starts before " + lastStart); @@ -432,7 +517,7 @@ public class NCList + " ends after " + end); return false; } - lastStart = subrange.getStart(); + lastStart = subrange.getBegin(); if (!subrange.isValid()) { @@ -449,7 +534,7 @@ public class NCList */ public int getStart() { - return subranges.isEmpty() ? 0 : subranges.get(0).getStart(); + return subranges.isEmpty() ? 0 : subranges.get(0).getBegin(); } /** @@ -457,8 +542,85 @@ public class NCList * * @return */ - public int getSize() + public int size() { return size; } + + /** + * Returns a list of all entries stored + * + * @return + */ + public List getEntries() + { + List result = new ArrayList<>(); + getEntries(result); + return result; + } + + /** + * Adds all contained entries to the given list + * + * @param result + */ + void getEntries(List result) + { + for (NCNode subrange : subranges) + { + subrange.getEntries(result); + } + } + + /** + * Deletes the given entry from the store, returning true if it was found (and + * deleted), else false. This method makes no assumption that the entry is in + * the 'expected' place in the store, in case it has been modified since it + * was added. Only the first 'same object' match is deleted, not 'equal' or + * multiple objects. + * + * @param entry + */ + public synchronized boolean delete(T entry) + { + if (entry == null) + { + return false; + } + for (int i = 0; i < subranges.size(); i++) + { + NCNode subrange = subranges.get(i); + NCList subRegions = subrange.getSubRegions(); + + if (subrange.getRegion() == entry) + { + /* + * if the subrange is rooted on this entry, promote its + * subregions (if any) to replace the subrange here; + * NB have to resort subranges after doing this since e.g. + * [10-30 [12-20 [16-18], 13-19]] + * after deleting 12-20, 16-18 is promoted to sibling of 13-19 + * but should follow it in the list of subranges of 10-30 + */ + subranges.remove(i); + if (subRegions != null) + { + subranges.addAll(subRegions.subranges); + Collections.sort(subranges, RangeComparator.BY_START_POSITION); + } + size--; + return true; + } + else + { + if (subRegions != null && subRegions.delete(entry)) + { + size--; + subrange.deleteSubRegionsIfEmpty(); + return true; + } + } + } + return false; + } }