1 package jalview.datamodel.features;
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.Comparator;
9 * An adapted implementation of NCList as described in the paper
12 * Nested Containment List (NCList): a new algorithm for accelerating
13 * interval query of genome alignment and interval databases
14 * - Alexander V. Alekseyenko, Christopher J. Lee
15 * https://doi.org/10.1093/bioinformatics/btl647
18 public class NCList<T extends ContiguousI>
21 * the number of ranges represented
26 * a list, in start position order, of sublists of ranges ordered so
27 * that each contains (or is the same as) the one that follows it
29 private List<NCNode<T>> subranges;
32 * a comparator to sort intervals by start position ascending, with
33 * longer (enclosing) intervals preceding those they enclose
35 Comparator<ContiguousI> intervalSorter = new RangeComparator(true);
38 * Constructor given a list of things that are each located on a contiguous
39 * interval. Note that the constructor may reorder the list.
41 * We assume here that for each range, start <= end. Behaviour for reverse
42 * ordered ranges is undefined.
46 public NCList(List<T> ranges)
55 protected void build(List<T> ranges)
58 * sort by start ascending so that contained intervals
59 * follow their containing interval
61 Collections.sort(ranges, intervalSorter);
63 List<SubList> sublists = buildSubranges(ranges);
66 * convert each subrange to an NCNode consisting of a range and
67 * (possibly) its contained NCList
69 for (SubList sublist : sublists)
71 subranges.add(new NCNode<T>(ranges.subList(sublist.startIndex,
72 sublist.endIndex + 1)));
78 public NCList(T entry)
81 List<T> ranges = new ArrayList<T>();
88 subranges = new ArrayList<NCNode<T>>();
92 * Traverses the sorted ranges to identify sublists, within which each
93 * interval contains the one that follows it
98 protected List<SubList> buildSubranges(List<T> ranges)
100 List<SubList> sublists = new ArrayList<SubList>();
102 if (ranges.isEmpty())
107 int listStartIndex = 0;
108 long lastEndPos = Long.MAX_VALUE;
110 for (int i = 0; i < ranges.size(); i++)
112 ContiguousI nextInterval = ranges.get(i);
113 long nextStart = nextInterval.getBegin();
114 long nextEnd = nextInterval.getEnd();
115 if (nextStart > lastEndPos || nextEnd > lastEndPos)
118 * this interval is not contained in the preceding one
119 * close off the last sublist
121 sublists.add(new SubList(listStartIndex, i - 1));
124 lastEndPos = nextEnd;
127 sublists.add(new SubList(listStartIndex, ranges.size() - 1));
132 * Adds one entry to the stored set (with duplicates allowed)
136 public void add(T entry)
142 * Adds one entry to the stored set, and returns true, unless allowDuplicates
143 * is set to false and it is already contained (by object equality test), in
144 * which case it is not added and this method returns false.
147 * @param allowDuplicates
150 public synchronized boolean add(T entry, boolean allowDuplicates)
152 if (!allowDuplicates && contains(entry))
158 long start = entry.getBegin();
159 long end = entry.getEnd();
163 * - precedes all subranges: add as NCNode on front of list
164 * - follows all subranges: add as NCNode on end of list
165 * - enclosed by a subrange - add recursively to subrange
166 * - encloses one or more subranges - push them inside it
167 * - none of the above - add as a new node and resort nodes list (?)
171 * find the first subrange whose end does not precede entry's start
173 int candidateIndex = findFirstOverlap(start);
174 if (candidateIndex == -1)
177 * all subranges precede this one - add it on the end
179 subranges.add(new NCNode<T>(entry));
184 * search for maximal span of subranges i-k that the new entry
185 * encloses; or a subrange that encloses the new entry
187 boolean enclosing = false;
188 int firstEnclosed = 0;
189 int lastEnclosed = 0;
190 boolean overlapping = false;
192 for (int j = candidateIndex; j < subranges.size(); j++)
194 NCNode<T> subrange = subranges.get(j);
196 if (end < subrange.getBegin() && !overlapping && !enclosing)
199 * new entry lies between subranges j-1 j
201 subranges.add(j, new NCNode<T>(entry));
205 if (subrange.getBegin() <= start && subrange.getEnd() >= end)
208 * push new entry inside this subrange as it encloses it
214 if (start <= subrange.getBegin())
216 if (end >= subrange.getEnd())
219 * new entry encloses this subrange (and possibly preceding ones);
220 * continue to find the maximal list it encloses
233 * entry spans from before this subrange to inside it
238 * entry encloses one or more preceding subranges
240 addEnclosingRange(entry, firstEnclosed, lastEnclosed);
246 * entry spans two subranges but doesn't enclose any
249 subranges.add(j, new NCNode<T>(entry));
261 * drops through to here if new range encloses all others
262 * or overlaps the last one
266 addEnclosingRange(entry, firstEnclosed, lastEnclosed);
270 subranges.add(new NCNode<T>(entry));
277 * Answers true if this NCList contains the given entry (by object equality
283 public boolean contains(T entry)
286 * find the first sublist that might overlap, i.e.
287 * the first whose end position is >= from
289 int candidateIndex = findFirstOverlap(entry.getBegin());
291 if (candidateIndex == -1)
296 int to = entry.getEnd();
298 for (int i = candidateIndex; i < subranges.size(); i++)
300 NCNode<T> candidate = subranges.get(i);
301 if (candidate.getBegin() > to)
304 * we are past the end of our target range
308 if (candidate.contains(entry))
317 * Update the tree so that the range of the new entry encloses subranges i to
318 * j (inclusive). That is, replace subranges i-j (inclusive) with a new
319 * subrange that contains them.
325 protected synchronized void addEnclosingRange(T entry, final int i,
328 NCList<T> newNCList = new NCList<T>();
329 newNCList.subranges.addAll(subranges.subList(i, j + 1));
330 NCNode<T> newNode = new NCNode<T>(entry, newNCList);
331 for (int k = j; k >= i; k--)
335 subranges.add(i, newNode);
339 * Returns a (possibly empty) list of items whose extent overlaps the given
343 * start of overlap range (inclusive)
345 * end of overlap range (inclusive)
348 public List<T> findOverlaps(long from, long to)
350 List<T> result = new ArrayList<T>();
352 findOverlaps(from, to, result);
358 * Recursively searches the NCList adding any items that overlap the from-to
359 * range to the result list
365 protected void findOverlaps(long from, long to, List<T> result)
368 * find the first sublist that might overlap, i.e.
369 * the first whose end position is >= from
371 int candidateIndex = findFirstOverlap(from);
373 if (candidateIndex == -1)
378 for (int i = candidateIndex; i < subranges.size(); i++)
380 NCNode<T> candidate = subranges.get(i);
381 if (candidate.getBegin() > to)
384 * we are past the end of our target range
388 candidate.findOverlaps(from, to, result);
394 * Search subranges for the first one whose end position is not before the
395 * target range's start position, i.e. the first one that may overlap the
396 * target range. Returns the index in the list of the first such range found,
397 * or -1 if none found.
402 protected int findFirstOverlap(long from)
404 // TODO binary search
405 // for now quick cheat linear search
407 if (subranges != null)
409 for (NCNode<T> subrange : subranges)
411 if (subrange.getEnd() >= from)
422 * Formats the tree as a bracketed list e.g.
425 * [1-100 [10-30 [10-20]], 15-30 [20-20]]
429 public String toString()
431 return subranges.toString();
435 * Returns a string representation of the data where containment is shown by
436 * indentation on new lines
440 public String prettyPrint()
442 StringBuilder sb = new StringBuilder(512);
445 prettyPrint(sb, offset, indent);
446 sb.append(System.lineSeparator());
447 return sb.toString();
455 void prettyPrint(StringBuilder sb, int offset, int indent)
457 boolean first = true;
458 for (NCNode<T> subrange : subranges)
462 sb.append(System.lineSeparator());
465 subrange.prettyPrint(sb, offset, indent);
470 * Answers true if the data held satisfy the rules of construction of an
471 * NCList, else false.
475 public boolean isValid()
477 return isValid(Integer.MIN_VALUE, Integer.MAX_VALUE);
481 * Answers true if the data held satisfy the rules of construction of an
482 * NCList bounded within the given start-end range, else false.
484 * Each subrange must lie within start-end (inclusive). Subranges must be
485 * ordered by start position ascending.
492 boolean isValid(final int start, final int end)
494 int lastStart = start;
495 for (NCNode<T> subrange : subranges)
497 if (subrange.getBegin() < lastStart)
499 System.err.println("error in NCList: range " + subrange.toString()
500 + " starts before " + lastStart);
503 if (subrange.getEnd() > end)
505 System.err.println("error in NCList: range " + subrange.toString()
506 + " ends after " + end);
509 lastStart = subrange.getBegin();
511 if (!subrange.isValid())
520 * Answers the lowest start position enclosed by the ranges
524 public int getStart()
526 return subranges.isEmpty() ? 0 : subranges.get(0).getBegin();
530 * Returns the number of ranges held (deep count)
540 * Returns a list of all entries stored
544 public List<T> getEntries()
546 List<T> result = new ArrayList<T>();
552 * Adds all contained entries to the given list
556 void getEntries(List<T> result)
558 for (NCNode<T> subrange : subranges)
560 subrange.getEntries(result);
565 * Deletes the given entry from the store, returning true if it was found (and
566 * deleted), else false. This method makes no assumption that the entry is in
567 * the 'expected' place in the store, in case it has been modified since it
568 * was added. Only the first 'same object' match is deleted, not 'equal' or
573 public synchronized boolean delete(T entry)
579 for (int i = 0; i < subranges.size(); i++)
581 NCNode<T> subrange = subranges.get(i);
582 NCList<T> subRegions = subrange.getSubRegions();
584 if (subrange.getRegion() == entry)
587 * if the subrange is rooted on this entry, promote its
588 * subregions (if any) to replace the subrange here;
589 * NB have to resort subranges after doing this since e.g.
590 * [10-30 [12-20 [16-18], 13-19]]
591 * after deleting 12-20, 16-18 is promoted to sibling of 13-19
592 * but should follow it in the list of subranges of 10-30
595 if (subRegions != null)
597 subranges.addAll(i, subRegions.subranges);
598 Collections.sort(subranges, intervalSorter);
605 if (subRegions != null && subRegions.delete(entry))