1 package jalview.datamodel.features;
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.Comparator;
9 * An adapted implementation of NCList as described in the paper
15 public class NCList<T extends ContiguousI>
18 * the number of ranges represented
23 * a list, in start position order, of sublists of ranges ordered so
24 * that each contains (or is the same as) the one that follows it
26 private List<NCNode<T>> subranges;
29 * a comparator to sort intervals by start position ascending, with
30 * longer (enclosing) intervals preceding those they enclose
32 Comparator<ContiguousI> intervalSorter = new RangeComparator(true);
35 * Constructor given a list of things that are each located on a contiguous
36 * interval. Note that the constructor may reorder the list.
38 * We assume here that for each range, start <= end. Behaviour for reverse
39 * ordered ranges is undefined.
43 public NCList(List<T> ranges)
52 protected void build(List<T> ranges)
55 * sort by start ascending so that contained intervals
56 * follow their containing interval
58 Collections.sort(ranges, intervalSorter);
60 List<SubList> sublists = findSubranges(ranges);
63 * convert each subrange to an NCNode consisting of a range and
64 * (possibly) its contained NCList
66 for (SubList sublist : sublists)
68 subranges.add(new NCNode<T>(ranges.subList(sublist.startIndex,
69 sublist.endIndex + 1)));
75 public NCList(T entry)
78 List<T> ranges = new ArrayList<T>();
85 subranges = new ArrayList<NCNode<T>>();
89 * Traverses the sorted ranges to identify sublists, within which each
90 * interval contains the one that follows it
95 protected List<SubList> findSubranges(List<T> ranges)
97 List<SubList> sublists = new ArrayList<SubList>();
99 int listStartIndex = 0;
100 long lastEndPos = Long.MAX_VALUE;
102 for (int i = 0; i < ranges.size(); i++)
104 ContiguousI nextInterval = ranges.get(i);
105 long nextStart = nextInterval.getBegin();
106 long nextEnd = nextInterval.getEnd();
107 if (nextStart > lastEndPos || nextEnd > lastEndPos)
110 * this interval is not contained in the preceding one
111 * close off the last sublist
113 sublists.add(new SubList(listStartIndex, i - 1));
116 lastEndPos = nextEnd;
119 sublists.add(new SubList(listStartIndex, ranges.size() - 1));
124 * Adds one entry to the stored set
128 public synchronized void add(T entry)
131 long start = entry.getBegin();
132 long end = entry.getEnd();
136 * - precedes all subranges: add as NCNode on front of list
137 * - follows all subranges: add as NCNode on end of list
138 * - enclosed by a subrange - add recursively to subrange
139 * - encloses one or more subranges - push them inside it
140 * - none of the above - add as a new node and resort nodes list (?)
144 * find the first subrange whose end does not precede entry's start
146 int candidateIndex = findFirstOverlap(start);
147 if (candidateIndex == -1)
150 * all subranges precede this one - add it on the end
152 subranges.add(new NCNode<T>(entry));
157 * search for maximal span of subranges i-k that the new entry
158 * encloses; or a subrange that encloses the new entry
160 boolean enclosing = false;
161 int firstEnclosed = 0;
162 int lastEnclosed = 0;
163 boolean overlapping = false;
165 for (int j = candidateIndex; j < subranges.size(); j++)
167 NCNode<T> subrange = subranges.get(j);
169 if (end < subrange.getStart() && !overlapping && !enclosing)
172 * new entry lies between subranges j-1 j
174 subranges.add(j, new NCNode<T>(entry));
178 if (subrange.getStart() <= start && subrange.getEnd() >= end)
181 * push new entry inside this subrange as it encloses it
187 if (start <= subrange.getStart())
189 if (end >= subrange.getEnd())
192 * new entry encloses this subrange (and possibly preceding ones);
193 * continue to find the maximal list it encloses
206 * entry spans from before this subrange to inside it
211 * entry encloses one or more preceding subranges
213 addEnclosingRange(entry, firstEnclosed, lastEnclosed);
219 * entry spans two subranges but doesn't enclose any
222 subranges.add(j, new NCNode<T>(entry));
234 * drops through to here if new range encloses all others
238 addEnclosingRange(entry, firstEnclosed, lastEnclosed);
243 * Update the tree so that the range of the new entry encloses subranges i to
244 * j (inclusive). That is, replace subranges i-j (inclusive) with a new
245 * subrange that contains them.
251 protected synchronized void addEnclosingRange(T entry, final int i,
254 NCList<T> newNCList = new NCList<T>();
255 newNCList.subranges.addAll(subranges.subList(i, j + 1));
256 NCNode<T> newNode = new NCNode<T>(entry, newNCList);
257 for (int k = j; k >= i; k--)
261 subranges.add(i, newNode);
265 * Returns a (possibly empty) list of items whose extent overlaps the given
269 * start of overlap range (inclusive)
271 * end of overlap range (inclusive)
274 public List<T> findOverlaps(long from, long to)
276 List<T> result = new ArrayList<T>();
278 findOverlaps(from, to, result);
284 * Recursively searches the NCList adding any items that overlap the from-to
285 * range to the result list
291 protected void findOverlaps(long from, long to,
295 * find the first sublist that might overlap, i.e.
296 * the first whose end position is >= from
298 int candidateIndex = findFirstOverlap(from);
300 if (candidateIndex == -1)
305 for (int i = candidateIndex; i < subranges.size(); i++)
307 NCNode<T> candidate = subranges.get(i);
308 if (candidate.getStart() > to)
311 * we are past the end of our target range
315 candidate.addOverlaps(from, to, result);
321 * Search subranges for the first one whose end position is not before the
322 * target range's start position, i.e. the first one that may overlap the
323 * target range. Returns the index in the list of the first such range found,
324 * or -1 if none found.
329 protected int findFirstOverlap(long from)
331 // TODO binary search
332 // for now quick cheat linear search
334 if (subranges != null)
336 for (NCNode<T> subrange : subranges)
338 if (subrange.getEnd() >= from)
349 * Formats the tree as a bracketed list e.g.
352 * [1-100 [10-30 [10-20]], 15-30 [20-20]]
356 public String toString()
358 return subranges.toString();
362 * Returns a string representation of the data where containment is shown bgy
363 * indentation on new lines
367 public String prettyPrint()
369 StringBuilder sb = new StringBuilder(512);
372 prettyPrint(sb, offset, indent);
373 return sb.toString();
381 void prettyPrint(StringBuilder sb, int offset, int indent)
383 boolean first = true;
384 for (NCNode<T> subrange : subranges)
388 sb.append(System.lineSeparator());
391 subrange.prettyPrint(sb, offset, indent);
396 * Answers true if the data held satisfy the rules of construction of an
397 * NCList, else false.
401 public boolean isValid()
403 return isValid(Integer.MIN_VALUE, Integer.MAX_VALUE);
407 * Answers true if the data held satisfy the rules of construction of an
408 * NCList bounded within the given start-end range, else false.
410 * Each subrange must lie within start-end (inclusive). Subranges must be
411 * ordered by start position ascending.
418 boolean isValid(final int start, final int end)
420 int lastStart = start;
421 for (NCNode<T> subrange : subranges)
423 if (subrange.getStart() < lastStart)
425 System.err.println("error in NCList: range " + subrange.toString()
426 + " starts before " + lastStart);
429 if (subrange.getEnd() > end)
431 System.err.println("error in NCList: range " + subrange.toString()
432 + " ends after " + end);
435 lastStart = subrange.getStart();
437 if (!subrange.isValid())
446 * Answers the lowest start position enclosed by the ranges
450 public int getStart()
452 return subranges.isEmpty() ? 0 : subranges.get(0).getStart();
456 * Returns the number of ranges held (deep count)