X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2Ffeatures%2FNCList.java;fp=src%2Fjalview%2Fdatamodel%2Ffeatures%2FNCList.java;h=0000000000000000000000000000000000000000;hb=9be95195238a6cacaf586724f01e0ddad9ed9d01;hp=ae58a69768184cc87f02453eac2c02c6980d3dc8;hpb=6084ed0c6da51ac6569320a33b7f4aa0ec523c80;p=jalview.git diff --git a/src/jalview/datamodel/features/NCList.java b/src/jalview/datamodel/features/NCList.java deleted file mode 100644 index ae58a69..0000000 --- a/src/jalview/datamodel/features/NCList.java +++ /dev/null @@ -1,646 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.datamodel.features; - -import jalview.datamodel.ContiguousI; -import jalview.datamodel.Range; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * An adapted implementation of NCList as described in the paper - * - *
- * Nested Containment List (NCList): a new algorithm for accelerating
- * interval query of genome alignment and interval databases
- * - Alexander V. Alekseyenko, Christopher J. Lee
- * https://doi.org/10.1093/bioinformatics/btl647
- * 
- */ -public class NCList -{ - /* - * the number of ranges represented - */ - private int size; - - /* - * a list, in start position order, of sublists of ranges ordered so - * that each contains (or is the same as) the one that follows it - */ - private List> subranges; - - /** - * Constructor given a list of things that are each located on a contiguous - * interval. Note that the constructor may reorder the list. - *

- * We assume here that for each range, start <= end. Behaviour for reverse - * ordered ranges is undefined. - * - * @param ranges - */ - public NCList(List ranges) - { - this(); - build(ranges); - } - - /** - * Sort and group ranges into sublists where each sublist represents a region - * and its contained subregions - * - * @param ranges - */ - protected void build(List ranges) - { - /* - * sort by start ascending so that contained intervals - * follow their containing interval - */ - Collections.sort(ranges, RangeComparator.BY_START_POSITION); - - List sublists = buildSubranges(ranges); - - /* - * convert each subrange to an NCNode consisting of a range and - * (possibly) its contained NCList - */ - for (Range sublist : sublists) - { - subranges.add(new NCNode(ranges.subList(sublist.start, - sublist.end + 1))); - } - - size = ranges.size(); - } - - public NCList(T entry) - { - this(); - subranges.add(new NCNode<>(entry)); - size = 1; - } - - public NCList() - { - subranges = new ArrayList>(); - } - - /** - * Traverses the sorted ranges to identify sublists, within which each - * interval contains the one that follows it - * - * @param ranges - * @return - */ - protected List buildSubranges(List ranges) - { - List sublists = new ArrayList<>(); - - if (ranges.isEmpty()) - { - return sublists; - } - - int listStartIndex = 0; - long lastEndPos = Long.MAX_VALUE; - - for (int i = 0; i < ranges.size(); i++) - { - ContiguousI nextInterval = ranges.get(i); - long nextStart = nextInterval.getBegin(); - long nextEnd = nextInterval.getEnd(); - if (nextStart > lastEndPos || nextEnd > lastEndPos) - { - /* - * this interval is not contained in the preceding one - * close off the last sublist - */ - sublists.add(new Range(listStartIndex, i - 1)); - listStartIndex = i; - } - lastEndPos = nextEnd; - } - - sublists.add(new Range(listStartIndex, ranges.size() - 1)); - return sublists; - } - - /** - * Adds one entry to the stored set (with duplicates allowed) - * - * @param entry - */ - public void add(T entry) - { - add(entry, true); - } - - /** - * Adds one entry to the stored set, and returns true, unless allowDuplicates - * is set to false and it is already contained (by object equality test), in - * which case it is not added and this method returns false. - * - * @param entry - * @param allowDuplicates - * @return - */ - public synchronized boolean add(T entry, boolean allowDuplicates) - { - if (!allowDuplicates && contains(entry)) - { - return false; - } - - size++; - long start = entry.getBegin(); - long end = entry.getEnd(); - - /* - * cases: - * - precedes all subranges: add as NCNode on front of list - * - follows all subranges: add as NCNode on end of list - * - enclosed by a subrange - add recursively to subrange - * - encloses one or more subranges - push them inside it - * - none of the above - add as a new node and resort nodes list (?) - */ - - /* - * find the first subrange whose end does not precede entry's start - */ - int candidateIndex = findFirstOverlap(start); - if (candidateIndex == -1) - { - /* - * all subranges precede this one - add it on the end - */ - subranges.add(new NCNode<>(entry)); - return true; - } - - /* - * search for maximal span of subranges i-k that the new entry - * encloses; or a subrange that encloses the new entry - */ - boolean enclosing = false; - int firstEnclosed = 0; - int lastEnclosed = 0; - boolean overlapping = false; - - for (int j = candidateIndex; j < subranges.size(); j++) - { - NCNode subrange = subranges.get(j); - - if (end < subrange.getBegin() && !overlapping && !enclosing) - { - /* - * new entry lies between subranges j-1 j - */ - subranges.add(j, new NCNode<>(entry)); - return true; - } - - if (subrange.getBegin() <= start && subrange.getEnd() >= end) - { - /* - * push new entry inside this subrange as it encloses it - */ - subrange.add(entry); - return true; - } - - if (start <= subrange.getBegin()) - { - if (end >= subrange.getEnd()) - { - /* - * new entry encloses this subrange (and possibly preceding ones); - * continue to find the maximal list it encloses - */ - if (!enclosing) - { - firstEnclosed = j; - } - lastEnclosed = j; - enclosing = true; - continue; - } - else - { - /* - * entry spans from before this subrange to inside it - */ - if (enclosing) - { - /* - * entry encloses one or more preceding subranges - */ - addEnclosingRange(entry, firstEnclosed, lastEnclosed); - return true; - } - else - { - /* - * entry spans two subranges but doesn't enclose any - * so just add it - */ - subranges.add(j, new NCNode<>(entry)); - return true; - } - } - } - else - { - overlapping = true; - } - } - - /* - * drops through to here if new range encloses all others - * or overlaps the last one - */ - if (enclosing) - { - addEnclosingRange(entry, firstEnclosed, lastEnclosed); - } - else - { - subranges.add(new NCNode<>(entry)); - } - - return true; - } - - /** - * Answers true if this NCList contains the given entry (by object equality - * test), else false - * - * @param entry - * @return - */ - public boolean contains(T entry) - { - /* - * find the first sublist that might overlap, i.e. - * the first whose end position is >= from - */ - int candidateIndex = findFirstOverlap(entry.getBegin()); - - if (candidateIndex == -1) - { - return false; - } - - int to = entry.getEnd(); - - for (int i = candidateIndex; i < subranges.size(); i++) - { - NCNode candidate = subranges.get(i); - if (candidate.getBegin() > to) - { - /* - * we are past the end of our target range - */ - break; - } - if (candidate.contains(entry)) - { - return true; - } - } - return false; - } - - /** - * Update the tree so that the range of the new entry encloses subranges i to - * j (inclusive). That is, replace subranges i-j (inclusive) with a new - * subrange that contains them. - * - * @param entry - * @param i - * @param j - */ - protected synchronized void addEnclosingRange(T entry, final int i, - final int j) - { - NCList newNCList = new NCList<>(); - newNCList.addNodes(subranges.subList(i, j + 1)); - NCNode newNode = new NCNode<>(entry, newNCList); - for (int k = j; k >= i; k--) - { - subranges.remove(k); - } - subranges.add(i, newNode); - } - - protected void addNodes(List> nodes) - { - for (NCNode node : nodes) - { - subranges.add(node); - size += node.size(); - } - } - - /** - * Returns a (possibly empty) list of items whose extent overlaps the given - * range - * - * @param from - * start of overlap range (inclusive) - * @param to - * end of overlap range (inclusive) - * @return - */ - public List findOverlaps(long from, long to) - { - List result = new ArrayList<>(); - - findOverlaps(from, to, result); - - return result; - } - - /** - * Recursively searches the NCList adding any items that overlap the from-to - * range to the result list - * - * @param from - * @param to - * @param result - */ - protected void findOverlaps(long from, long to, List result) - { - /* - * find the first sublist that might overlap, i.e. - * the first whose end position is >= from - */ - int candidateIndex = findFirstOverlap(from); - - if (candidateIndex == -1) - { - return; - } - - for (int i = candidateIndex; i < subranges.size(); i++) - { - NCNode candidate = subranges.get(i); - if (candidate.getBegin() > to) - { - /* - * we are past the end of our target range - */ - break; - } - candidate.findOverlaps(from, to, result); - } - - } - - /** - * Search subranges for the first one whose end position is not before the - * target range's start position, i.e. the first one that may overlap the - * target range. Returns the index in the list of the first such range found, - * or -1 if none found. - * - * @param from - * @return - */ - protected int findFirstOverlap(long from) - { - /* - * The NCList paper describes binary search for this step, - * but this not implemented here as (a) I haven't understood it yet - * and (b) it seems to imply complications for adding to an NCList - */ - - int i = 0; - if (subranges != null) - { - for (NCNode subrange : subranges) - { - if (subrange.getEnd() >= from) - { - return i; - } - i++; - } - } - return -1; - } - - /** - * Formats the tree as a bracketed list e.g. - * - *

-   * [1-100 [10-30 [10-20]], 15-30 [20-20]]
-   * 
- */ - @Override - public String toString() - { - return subranges.toString(); - } - - /** - * Returns a string representation of the data where containment is shown by - * indentation on new lines - * - * @return - */ - public String prettyPrint() - { - StringBuilder sb = new StringBuilder(512); - int offset = 0; - int indent = 2; - prettyPrint(sb, offset, indent); - sb.append(System.lineSeparator()); - return sb.toString(); - } - - /** - * @param sb - * @param offset - * @param indent - */ - void prettyPrint(StringBuilder sb, int offset, int indent) - { - boolean first = true; - for (NCNode subrange : subranges) - { - if (!first) - { - sb.append(System.lineSeparator()); - } - first = false; - subrange.prettyPrint(sb, offset, indent); - } - } - - /** - * Answers true if the data held satisfy the rules of construction of an - * NCList, else false. - * - * @return - */ - public boolean isValid() - { - return isValid(Integer.MIN_VALUE, Integer.MAX_VALUE); - } - - /** - * Answers true if the data held satisfy the rules of construction of an - * NCList bounded within the given start-end range, else false. - *

- * Each subrange must lie within start-end (inclusive). Subranges must be - * ordered by start position ascending. - *

- * - * @param start - * @param end - * @return - */ - boolean isValid(final int start, final int end) - { - int lastStart = start; - for (NCNode subrange : subranges) - { - if (subrange.getBegin() < lastStart) - { - System.err.println("error in NCList: range " + subrange.toString() - + " starts before " + lastStart); - return false; - } - if (subrange.getEnd() > end) - { - System.err.println("error in NCList: range " + subrange.toString() - + " ends after " + end); - return false; - } - lastStart = subrange.getBegin(); - - if (!subrange.isValid()) - { - return false; - } - } - return true; - } - - /** - * Answers the lowest start position enclosed by the ranges - * - * @return - */ - public int getStart() - { - return subranges.isEmpty() ? 0 : subranges.get(0).getBegin(); - } - - /** - * Returns the number of ranges held (deep count) - * - * @return - */ - public int size() - { - return size; - } - - /** - * Returns a list of all entries stored - * - * @return - */ - public List getEntries() - { - List result = new ArrayList<>(); - getEntries(result); - return result; - } - - /** - * Adds all contained entries to the given list - * - * @param result - */ - void getEntries(List result) - { - for (NCNode subrange : subranges) - { - subrange.getEntries(result); - } - } - - /** - * Deletes the given entry from the store, returning true if it was found (and - * deleted), else false. This method makes no assumption that the entry is in - * the 'expected' place in the store, in case it has been modified since it - * was added. Only the first 'same object' match is deleted, not 'equal' or - * multiple objects. - * - * @param entry - */ - public synchronized boolean delete(T entry) - { - if (entry == null) - { - return false; - } - for (int i = 0; i < subranges.size(); i++) - { - NCNode subrange = subranges.get(i); - NCList subRegions = subrange.getSubRegions(); - - if (subrange.getRegion() == entry) - { - /* - * if the subrange is rooted on this entry, promote its - * subregions (if any) to replace the subrange here; - * NB have to resort subranges after doing this since e.g. - * [10-30 [12-20 [16-18], 13-19]] - * after deleting 12-20, 16-18 is promoted to sibling of 13-19 - * but should follow it in the list of subranges of 10-30 - */ - subranges.remove(i); - if (subRegions != null) - { - subranges.addAll(subRegions.subranges); - Collections.sort(subranges, RangeComparator.BY_START_POSITION); - } - size--; - return true; - } - else - { - if (subRegions != null && subRegions.delete(entry)) - { - size--; - subrange.deleteSubRegionsIfEmpty(); - return true; - } - } - } - return false; - } -}