+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.datamodel.features;
+import jalview.datamodel.ContiguousI;
+import jalview.datamodel.Range;
+
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
import java.util.List;
/**
* An adapted implementation of NCList as described in the paper
*
* <pre>
- * todo
+ * Nested Containment List (NCList): a new algorithm for accelerating
+ * interval query of genome alignment and interval databases
+ * - Alexander V. Alekseyenko, Christopher J. Lee
+ * https://doi.org/10.1093/bioinformatics/btl647
* </pre>
*/
public class NCList<T extends ContiguousI>
*/
private List<NCNode<T>> subranges;
- /*
- * a comparator to sort intervals by start position ascending, with
- * longer (enclosing) intervals preceding those they enclose
- */
- Comparator<ContiguousI> intervalSorter = new RangeComparator(true);
-
/**
* Constructor given a list of things that are each located on a contiguous
* interval. Note that the constructor may reorder the list.
}
/**
+ * Sort and group ranges into sublists where each sublist represents a region
+ * and its contained subregions
+ *
* @param ranges
*/
protected void build(List<T> ranges)
* sort by start ascending so that contained intervals
* follow their containing interval
*/
- Collections.sort(ranges, intervalSorter);
+ Collections.sort(ranges, RangeComparator.BY_START_POSITION);
- List<SubList> sublists = findSubranges(ranges);
+ List<Range> sublists = buildSubranges(ranges);
/*
* convert each subrange to an NCNode consisting of a range and
* (possibly) its contained NCList
*/
- for (SubList sublist : sublists)
+ for (Range sublist : sublists)
{
- subranges.add(new NCNode<T>(ranges.subList(sublist.startIndex,
- sublist.endIndex + 1)));
+ subranges.add(new NCNode<T>(ranges.subList(sublist.start,
+ sublist.end + 1)));
}
size = ranges.size();
public NCList(T entry)
{
this();
- List<T> ranges = new ArrayList<T>();
- ranges.add(entry);
- build(ranges);
+ subranges.add(new NCNode<>(entry));
+ size = 1;
}
public NCList()
* @param ranges
* @return
*/
- protected List<SubList> findSubranges(List<T> ranges)
+ protected List<Range> buildSubranges(List<T> ranges)
{
- List<SubList> sublists = new ArrayList<SubList>();
+ List<Range> sublists = new ArrayList<>();
+ if (ranges.isEmpty())
+ {
+ return sublists;
+ }
+
int listStartIndex = 0;
long lastEndPos = Long.MAX_VALUE;
* this interval is not contained in the preceding one
* close off the last sublist
*/
- sublists.add(new SubList(listStartIndex, i - 1));
+ sublists.add(new Range(listStartIndex, i - 1));
listStartIndex = i;
}
lastEndPos = nextEnd;
}
- sublists.add(new SubList(listStartIndex, ranges.size() - 1));
+ sublists.add(new Range(listStartIndex, ranges.size() - 1));
return sublists;
}
/**
- * Adds one entry to the stored set
+ * Adds one entry to the stored set (with duplicates allowed)
*
* @param entry
*/
- public synchronized void add(T entry)
+ public void add(T entry)
{
+ add(entry, true);
+ }
+
+ /**
+ * Adds one entry to the stored set, and returns true, unless allowDuplicates
+ * is set to false and it is already contained (by object equality test), in
+ * which case it is not added and this method returns false.
+ *
+ * @param entry
+ * @param allowDuplicates
+ * @return
+ */
+ public synchronized boolean add(T entry, boolean allowDuplicates)
+ {
+ if (!allowDuplicates && contains(entry))
+ {
+ return false;
+ }
+
size++;
long start = entry.getBegin();
long end = entry.getEnd();
/*
* all subranges precede this one - add it on the end
*/
- subranges.add(new NCNode<T>(entry));
- return;
+ subranges.add(new NCNode<>(entry));
+ return true;
}
/*
{
NCNode<T> subrange = subranges.get(j);
- if (end < subrange.getStart() && !overlapping && !enclosing)
+ if (end < subrange.getBegin() && !overlapping && !enclosing)
{
/*
* new entry lies between subranges j-1 j
*/
- subranges.add(j, new NCNode<T>(entry));
- return;
+ subranges.add(j, new NCNode<>(entry));
+ return true;
}
- if (subrange.getStart() <= start && subrange.getEnd() >= end)
+ if (subrange.getBegin() <= start && subrange.getEnd() >= end)
{
/*
* push new entry inside this subrange as it encloses it
*/
subrange.add(entry);
- return;
+ return true;
}
- if (start <= subrange.getStart())
+ if (start <= subrange.getBegin())
{
if (end >= subrange.getEnd())
{
* entry encloses one or more preceding subranges
*/
addEnclosingRange(entry, firstEnclosed, lastEnclosed);
- return;
+ return true;
}
else
{
* entry spans two subranges but doesn't enclose any
* so just add it
*/
- subranges.add(j, new NCNode<T>(entry));
- return;
+ subranges.add(j, new NCNode<>(entry));
+ return true;
}
}
}
/*
* drops through to here if new range encloses all others
+ * or overlaps the last one
*/
if (enclosing)
{
addEnclosingRange(entry, firstEnclosed, lastEnclosed);
}
+ else
+ {
+ subranges.add(new NCNode<>(entry));
+ }
+
+ return true;
}
/**
+ * Answers true if this NCList contains the given entry (by object equality
+ * test), else false
+ *
+ * @param entry
+ * @return
+ */
+ public boolean contains(T entry)
+ {
+ /*
+ * find the first sublist that might overlap, i.e.
+ * the first whose end position is >= from
+ */
+ int candidateIndex = findFirstOverlap(entry.getBegin());
+
+ if (candidateIndex == -1)
+ {
+ return false;
+ }
+
+ int to = entry.getEnd();
+
+ for (int i = candidateIndex; i < subranges.size(); i++)
+ {
+ NCNode<T> candidate = subranges.get(i);
+ if (candidate.getBegin() > to)
+ {
+ /*
+ * we are past the end of our target range
+ */
+ break;
+ }
+ if (candidate.contains(entry))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
* Update the tree so that the range of the new entry encloses subranges i to
* j (inclusive). That is, replace subranges i-j (inclusive) with a new
* subrange that contains them.
protected synchronized void addEnclosingRange(T entry, final int i,
final int j)
{
- NCList<T> newNCList = new NCList<T>();
- newNCList.subranges.addAll(subranges.subList(i, j + 1));
- NCNode<T> newNode = new NCNode<T>(entry, newNCList);
+ NCList<T> newNCList = new NCList<>();
+ newNCList.addNodes(subranges.subList(i, j + 1));
+ NCNode<T> newNode = new NCNode<>(entry, newNCList);
for (int k = j; k >= i; k--)
{
subranges.remove(k);
subranges.add(i, newNode);
}
+ protected void addNodes(List<NCNode<T>> nodes)
+ {
+ for (NCNode<T> node : nodes)
+ {
+ subranges.add(node);
+ size += node.size();
+ }
+ }
+
/**
* Returns a (possibly empty) list of items whose extent overlaps the given
* range
*/
public List<T> findOverlaps(long from, long to)
{
- List<T> result = new ArrayList<T>();
+ List<T> result = new ArrayList<>();
findOverlaps(from, to, result);
* @param to
* @param result
*/
- protected void findOverlaps(long from, long to,
- List<T> result)
+ protected void findOverlaps(long from, long to, List<T> result)
{
/*
* find the first sublist that might overlap, i.e.
for (int i = candidateIndex; i < subranges.size(); i++)
{
NCNode<T> candidate = subranges.get(i);
- if (candidate.getStart() > to)
+ if (candidate.getBegin() > to)
{
/*
* we are past the end of our target range
*/
break;
}
- candidate.addOverlaps(from, to, result);
+ candidate.findOverlaps(from, to, result);
}
}
*/
protected int findFirstOverlap(long from)
{
- // TODO binary search
- // for now quick cheat linear search
+ /*
+ * The NCList paper describes binary search for this step,
+ * but this not implemented here as (a) I haven't understood it yet
+ * and (b) it seems to imply complications for adding to an NCList
+ */
+
int i = 0;
if (subranges != null)
{
}
/**
- * Returns a string representation of the data where containment is shown bgy
+ * Returns a string representation of the data where containment is shown by
* indentation on new lines
*
* @return
int offset = 0;
int indent = 2;
prettyPrint(sb, offset, indent);
+ sb.append(System.lineSeparator());
return sb.toString();
}
int lastStart = start;
for (NCNode<T> subrange : subranges)
{
- if (subrange.getStart() < lastStart)
+ if (subrange.getBegin() < lastStart)
{
System.err.println("error in NCList: range " + subrange.toString()
+ " starts before " + lastStart);
+ " ends after " + end);
return false;
}
- lastStart = subrange.getStart();
+ lastStart = subrange.getBegin();
if (!subrange.isValid())
{
*/
public int getStart()
{
- return subranges.isEmpty() ? 0 : subranges.get(0).getStart();
+ return subranges.isEmpty() ? 0 : subranges.get(0).getBegin();
}
/**
*
* @return
*/
- public int getSize()
+ public int size()
{
return size;
}
+
+ /**
+ * Returns a list of all entries stored
+ *
+ * @return
+ */
+ public List<T> getEntries()
+ {
+ List<T> result = new ArrayList<>();
+ getEntries(result);
+ return result;
+ }
+
+ /**
+ * Adds all contained entries to the given list
+ *
+ * @param result
+ */
+ void getEntries(List<T> result)
+ {
+ for (NCNode<T> subrange : subranges)
+ {
+ subrange.getEntries(result);
+ }
+ }
+
+ /**
+ * Deletes the given entry from the store, returning true if it was found (and
+ * deleted), else false. This method makes no assumption that the entry is in
+ * the 'expected' place in the store, in case it has been modified since it
+ * was added. Only the first 'same object' match is deleted, not 'equal' or
+ * multiple objects.
+ *
+ * @param entry
+ */
+ public synchronized boolean delete(T entry)
+ {
+ if (entry == null)
+ {
+ return false;
+ }
+ for (int i = 0; i < subranges.size(); i++)
+ {
+ NCNode<T> subrange = subranges.get(i);
+ NCList<T> subRegions = subrange.getSubRegions();
+
+ if (subrange.getRegion() == entry)
+ {
+ /*
+ * if the subrange is rooted on this entry, promote its
+ * subregions (if any) to replace the subrange here;
+ * NB have to resort subranges after doing this since e.g.
+ * [10-30 [12-20 [16-18], 13-19]]
+ * after deleting 12-20, 16-18 is promoted to sibling of 13-19
+ * but should follow it in the list of subranges of 10-30
+ */
+ subranges.remove(i);
+ if (subRegions != null)
+ {
+ subranges.addAll(subRegions.subranges);
+ Collections.sort(subranges, RangeComparator.BY_START_POSITION);
+ }
+ size--;
+ return true;
+ }
+ else
+ {
+ if (subRegions != null && subRegions.delete(entry))
+ {
+ size--;
+ subrange.deleteSubRegionsIfEmpty();
+ return true;
+ }
+ }
+ }
+ return false;
+ }
}