From b74cdec4b07000bee431a7cca86a948f44d3ffbe Mon Sep 17 00:00:00 2001 From: hansonr Date: Wed, 4 Sep 2019 13:49:36 -0500 Subject: [PATCH] JAL-3397 final update --- src/intervalstore/api/IntervalI.java | 12 +- src/intervalstore/api/IntervalStoreI.java | 24 + src/intervalstore/impl/BinarySearcher.java | 33 +- src/intervalstore/impl/IntervalStore.java | 64 +- src/intervalstore/impl/NCList.java | 94 +- src/intervalstore/impl/NCListBuilder.java | 64 +- src/intervalstore/impl/NCNode.java | 5 +- src/intervalstore/impl/Range.java | 107 ++ src/intervalstore/impl/SimpleFeature.java | 109 ++ src/intervalstore/nonc/IntervalEndSorter.java | 686 +++++++++++++ src/intervalstore/nonc/IntervalStore.java | 856 +++++++++++----- src/intervalstore/nonc/IntervalStore0.java | 1055 ++++++++++++++++++++ src/jalview/analysis/CrossRef.java | 3 +- src/jalview/datamodel/SequenceFeature.java | 46 +- src/jalview/datamodel/features/FeatureStore.java | 165 ++- .../datamodel/features/FeatureStoreImpl.java | 50 +- src/jalview/datamodel/features/FeatureStoreJS.java | 24 +- .../datamodel/features/SequenceFeatures.java | 40 +- src/jalview/urls/IdentifiersUrlProvider.java | 2 +- .../datamodel/features/FeatureStoreJSTest.java | 42 +- ...ureStoreTest.java => FeatureStoreJavaTest.java} | 15 +- ...reNoNCTest.java => FeatureStoreLinkedTest.java} | 5 +- .../features/FeatureStoreNCListBufferTest.java | 899 +++++++++++++++++ 23 files changed, 3872 insertions(+), 528 deletions(-) create mode 100644 src/intervalstore/impl/Range.java create mode 100644 src/intervalstore/impl/SimpleFeature.java create mode 100644 src/intervalstore/nonc/IntervalEndSorter.java create mode 100644 src/intervalstore/nonc/IntervalStore0.java rename test/jalview/datamodel/features/{FeatureStoreTest.java => FeatureStoreJavaTest.java} (99%) rename test/jalview/datamodel/features/{FeatureStoreNoNCTest.java => FeatureStoreLinkedTest.java} (99%) create mode 100644 test/jalview/datamodel/features/FeatureStoreNCListBufferTest.java diff --git a/src/intervalstore/api/IntervalI.java b/src/intervalstore/api/IntervalI.java index c170a43..d2594b8 100644 --- a/src/intervalstore/api/IntervalI.java +++ b/src/intervalstore/api/IntervalI.java @@ -40,6 +40,9 @@ public interface IntervalI /** * Compare intervals by start position ascending and end position descending. + * + * BIGENDIAN sorts 10-100 ahead of 10-80 (original IntervalStoreJ method + * */ static Comparator COMPARATOR_BIGENDIAN = new Comparator() { @@ -52,7 +55,10 @@ public interface IntervalI }; /** - * Compare intervals by start position ascending and end position ascending. + * Compare intervals by start position ascending and end position descending. + * + * LITTLEENDIAN sorts 10-100 after 10-80 + * */ static Comparator COMPARATOR_LITTLEENDIAN = new Comparator() { @@ -153,7 +159,9 @@ public interface IntervalI * that point we know that we have the correct type. * * @param i - * @return true if equal + * may be null + * @return true if equal; null value must return false, not throw + * NullPointerException */ abstract boolean equalsInterval(IntervalI i); diff --git a/src/intervalstore/api/IntervalStoreI.java b/src/intervalstore/api/IntervalStoreI.java index 3b0f575..43aea2b 100644 --- a/src/intervalstore/api/IntervalStoreI.java +++ b/src/intervalstore/api/IntervalStoreI.java @@ -91,6 +91,30 @@ public interface IntervalStoreI extends Collection */ boolean revalidate(); + /** + * Get the i-th interval, whatever that means to this store. + * + * @param i + * @return + */ IntervalI get(int i); + /** + * Check to see if this store can check for duplicates while adding. + * + * @return + */ + boolean canCheckForDuplicates(); + + /** + * Add with a check for duplicates, if possible. + * + * @param interval + * @param checkForDuplicate + * @return false only if addition was unsuccessful because there was an + * identical interval already in the store or because the store cannot + * check for duplicates + */ + boolean add(T interval, boolean checkForDuplicate); + } \ No newline at end of file diff --git a/src/intervalstore/impl/BinarySearcher.java b/src/intervalstore/impl/BinarySearcher.java index 1086e91..6c598ce 100644 --- a/src/intervalstore/impl/BinarySearcher.java +++ b/src/intervalstore/impl/BinarySearcher.java @@ -32,7 +32,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package intervalstore.impl; import java.util.List; -import java.util.function.Function; +import java.util.function.ToIntFunction; + +import intervalstore.api.IntervalI; /** * Provides a method to perform binary search of an ordered list for the first @@ -42,6 +44,29 @@ import java.util.function.Function; */ public final class BinarySearcher { + + public static ToIntFunction fbegin = new ToIntFunction() + { + + @Override + public int applyAsInt(IntervalI value) + { + return value.getBegin(); + } + + }; + + public static ToIntFunction fend = new ToIntFunction() + { + + @Override + public int applyAsInt(IntervalI value) + { + return value.getEnd(); + } + + }; + private BinarySearcher() { } @@ -63,8 +88,8 @@ public final class BinarySearcher * @return * @see java.util.Collections#binarySearch(List, Object) */ - public static int findFirst(List list, - Function test) + public static int findFirst(List list, int pos, + ToIntFunction test) { int start = 0; int end = list.size() - 1; @@ -74,7 +99,7 @@ public final class BinarySearcher { int mid = (start + end) / 2; T entry = list.get(mid); - boolean itsTrue = test.apply(entry); + boolean itsTrue = test.applyAsInt(entry) >= pos; if (itsTrue) { matched = mid; diff --git a/src/intervalstore/impl/IntervalStore.java b/src/intervalstore/impl/IntervalStore.java index 8634ad4..9faae7f 100644 --- a/src/intervalstore/impl/IntervalStore.java +++ b/src/intervalstore/impl/IntervalStore.java @@ -216,7 +216,8 @@ public class IntervalStore * find the first stored interval which doesn't precede the new one */ int insertPosition = BinarySearcher.findFirst(nonNested, - val -> val.getBegin() >= entry.getBegin()); + entry.getBegin(), + BinarySearcher.fbegin); /* * fail if we detect interval enclosure * - of the new interval by the one before or after it @@ -224,7 +225,8 @@ public class IntervalStore */ if (insertPosition > 0) { - if (nonNested.get(insertPosition - 1).properlyContainsInterval(entry)) + if (nonNested.get(insertPosition - 1) + .properlyContainsInterval(entry)) { return false; } @@ -257,7 +259,7 @@ public class IntervalStore if (nested != null) { - result.addAll(nested.findOverlaps(from, to)); + nested.findOverlaps(from, to, result); } return result; @@ -269,7 +271,7 @@ public class IntervalStore String pp = nonNested.toString(); if (nested != null) { - pp += System.lineSeparator() + nested.prettyPrint(); + pp += '\n' + nested.prettyPrint(); } return pp; } @@ -358,13 +360,14 @@ public class IntervalStore * start position is not less than the target range start * (NB inequality test ensures the first match if any is found) */ - int startIndex = BinarySearcher.findFirst(nonNested, - val -> val.getBegin() >= entry.getBegin()); + int from = entry.getBegin(); + int startIndex = BinarySearcher.findFirst(nonNested, from, + BinarySearcher.fbegin); /* * traverse intervals to look for a match */ - int from = entry.getBegin(); + int i = startIndex; int size = nonNested.size(); while (i < size) @@ -429,13 +432,14 @@ public class IntervalStore /* * locate the first entry in the list which does not precede the interval */ - int pos = BinarySearcher.findFirst(intervals, - val -> val.getBegin() >= interval.getBegin()); + int from = interval.getBegin(); + int pos = BinarySearcher.findFirst(intervals, from, + BinarySearcher.fbegin); int len = intervals.size(); while (pos < len) { T sf = intervals.get(pos); - if (sf.getBegin() > interval.getBegin()) + if (sf.getBegin() > from) { return false; // no match found } @@ -482,23 +486,19 @@ public class IntervalStore * find the first interval whose end position is * after the target range start */ - int startIndex = BinarySearcher.findFirst(nonNested, - val -> val.getEnd() >= from); - - final int startIndex1 = startIndex; - int i = startIndex1; - while (i < nonNested.size()) + int startIndex = BinarySearcher.findFirst(nonNested, (int) from, + BinarySearcher.fend); + for (int i = startIndex, n = nonNested.size(); i < n; i++) { T sf = nonNested.get(i); if (sf.getBegin() > to) { break; } - if (sf.getBegin() <= to && sf.getEnd() >= from) + if (sf.getEnd() >= from) { result.add(sf); } - i++; } } @@ -508,7 +508,8 @@ public class IntervalStore String s = nonNested.toString(); if (nested != null) { - s = s + System.lineSeparator() + nested.toString(); + s = s + '\n'// + System.lineSeparator() + + nested.toString(); } return s; } @@ -516,28 +517,39 @@ public class IntervalStore @Override public int getWidth() { - // TODO Auto-generated method stub - return 0; + return (nonNested == null ? 0 : nonNested.size()) + + (nested == null ? 0 : nested.size()); } @Override public List findOverlaps(long start, long end, List result) { - // TODO Auto-generated method stub - return null; + return findOverlaps(start, end); } @Override public boolean revalidate() { - // TODO Auto-generated method stub - return false; + // not applicable + return true; } @Override public IntervalI get(int i) { - // TODO Auto-generated method stub + // not supported (but could be) return null; } + + @Override + public boolean canCheckForDuplicates() + { + return false; + } + + @Override + public boolean add(T interval, boolean checkForDuplicate) + { + return add(interval); + } } diff --git a/src/intervalstore/impl/NCList.java b/src/intervalstore/impl/NCList.java index 0bf6e1b..243192d 100644 --- a/src/intervalstore/impl/NCList.java +++ b/src/intervalstore/impl/NCList.java @@ -39,7 +39,6 @@ import java.util.List; import java.util.NoSuchElementException; import intervalstore.api.IntervalI; -import intervalstore.impl.Range; /** * An adapted implementation of NCList as described in the paper @@ -53,6 +52,9 @@ import intervalstore.impl.Range; */ public class NCList extends AbstractCollection { + + // private static final boolean OPTION_FIND_ANY = false; + /** * A depth-first iterator over the elements stored in the NCList */ @@ -200,7 +202,7 @@ public class NCList extends AbstractCollection * sort by start ascending, length descending, so that * contained intervals follow their containing interval */ - Collections.sort(ranges, new NCListBuilder<>().getComparator()); + Collections.sort(ranges, IntervalI.COMPARATOR_BIGENDIAN); int listStartIndex = 0; @@ -455,8 +457,13 @@ public class NCList extends AbstractCollection * @param to * @param result */ - protected void findOverlaps(long from, long to, List result) + protected List findOverlaps(long from, long to, List result) { + + // if (OPTION_FIND_ANY) + // { + // return findAnyOverlaps(from, to, result); + // } /* * find the first sublist that might overlap, i.e. * the first whose end position is >= from @@ -475,8 +482,74 @@ public class NCList extends AbstractCollection } candidate.findOverlaps(from, to, result); } - + return result; } + + // /** + // * Recursively searches the NCList adding any items that overlap the from-to + // * range to the result list + // * + // * @param from + // * @param to + // * @param result + // */ + // protected List findAnyOverlaps(long from, long to, List result) + // { + // + // // BH find ANY overlap + // + // int candidateIndex = findAnyOverlap(subranges, from, to); + // + // if (candidateIndex < 0) + // return result; + // for (int i = candidateIndex, n = subranges.size(); i < n; i++) + // { + // NCNode candidate = subranges.get(i); + // if (candidate.getBegin() > to) + // { + // /* + // * we are past the end of our target range + // */ + // break; + // } + // candidate.findOverlaps(from, to, result); + // } + // + // // BH adds dual-direction check + // + // for (int i = candidateIndex; --i >= 0;) + // { + // NCNode candidate = subranges.get(i); + // if (candidate.getEnd() < from) + // { + // break; + // } + // candidate.findOverlaps(from, to, result); + // } + // return result; + // } + // + // private int findAnyOverlap(List> ranges, long from, long to) + // { + // int start = 0; + // int end = ranges.size() - 1; + // while (start <= end) + // { + // int mid = (start + end) >>> 1; + // NCNode r = ranges.get(mid); + // if (r.getEnd() >= from) + // { + // if (r.getBegin() <= to) + // return mid; + // end = mid - 1; + // } + // else + // { + // start = mid + 1; + // } + // } + // return -1; + // } /** * Search subranges for the first one whose end position is not before the @@ -489,8 +562,8 @@ public class NCList extends AbstractCollection */ protected int findFirstOverlap(final long from) { - return BinarySearcher.findFirst(subranges, - val -> val.getEnd() >= from); + return BinarySearcher.findFirst(subranges, (int) from, + BinarySearcher.fend); } /** @@ -517,7 +590,7 @@ public class NCList extends AbstractCollection int offset = 0; int indent = 2; prettyPrint(sb, offset, indent); - sb.append(System.lineSeparator()); + sb.append('\n');// System.lineSeparator()); return sb.toString(); } @@ -533,7 +606,7 @@ public class NCList extends AbstractCollection { if (!first) { - sb.append(System.lineSeparator()); + sb.append('\n');// System.lineSeparator()); } first = false; subrange.prettyPrint(sb, offset, indent); @@ -749,4 +822,9 @@ public class NCList extends AbstractCollection subranges.clear(); size = 0; } + + public int getWidth() + { + return subranges.size(); + } } diff --git a/src/intervalstore/impl/NCListBuilder.java b/src/intervalstore/impl/NCListBuilder.java index 4c87306..d640589 100644 --- a/src/intervalstore/impl/NCListBuilder.java +++ b/src/intervalstore/impl/NCListBuilder.java @@ -37,7 +37,6 @@ import java.util.Comparator; import java.util.List; import intervalstore.api.IntervalI; -import intervalstore.impl.Range; /** * A comparator that orders ranges by either start position ascending. If @@ -49,36 +48,37 @@ import intervalstore.impl.Range; */ public class NCListBuilder { - class NCListComparator implements Comparator - { - /** - * Compares two intervals in a way that will sort a list by start position - * ascending, then by length descending. Answers - *
    - *
  • a negative value if o1.begin < o2.begin
  • - *
  • else a positive value if o1.begin > o2.begin
  • - *
  • else a negative value if o1.end > o2.end
  • - *
  • else a positive value of o1.end < o2.end
  • - *
  • else zero
  • - */ - @Override - public int compare(V o1, V o2) - { - int order = Integer.compare(o1.getBegin(), o2.getBegin()); - if (order == 0) - { - /* - * if tied on start position, longer length sorts to left - * i.e. the negation of normal ordering by length - */ - order = Integer.compare(o2.getEnd(), o1.getEnd()); - } - return order; - } - } + // class NCListComparator implements Comparator + // { + // /** + // * Compares two intervals in a way that will sort a list by start position + // * ascending, then by length descending. Answers + // *
      + // *
    • a negative value if o1.begin < o2.begin
    • + // *
    • else a positive value if o1.begin > o2.begin
    • + // *
    • else a negative value if o1.end > o2.end
    • + // *
    • else a positive value of o1.end < o2.end
    • + // *
    • else zero
    • + // */ + // @Override + // public int compare(V o1, V o2) + // { + // int order = Integer.compare(o1.getBegin(), o2.getBegin()); + // if (order == 0) + // { + // /* + // * if tied on start position, longer length sorts to left + // * i.e. the negation of normal ordering by length + // */ + // order = Integer.compare(o2.getEnd(), o1.getEnd()); + // } + // return order; + // } + // } - private Comparator comparator = new NCListComparator<>(); + private Comparator comparator = IntervalI.COMPARATOR_BIGENDIAN;// new + // NCListComparator<>(); /** * Default constructor @@ -93,7 +93,7 @@ public class NCListBuilder * * @return */ - Comparator getComparator() + Comparator getComparator() { return comparator; } @@ -121,7 +121,7 @@ public class NCListBuilder IntervalI lastParent = ranges.get(0); boolean first = true; - for (int i = 0; i < ranges.size(); i++) + for (int i = 0, n = ranges.size(); i < n; i++) { IntervalI nextInterval = ranges.get(i); if (!first && !lastParent.properlyContainsInterval(nextInterval)) diff --git a/src/intervalstore/impl/NCNode.java b/src/intervalstore/impl/NCNode.java index 16ae0b7..a3702f5 100644 --- a/src/intervalstore/impl/NCNode.java +++ b/src/intervalstore/impl/NCNode.java @@ -168,7 +168,7 @@ class NCNode implements IntervalI sb.append(region.toString()); if (subregions != null) { - sb.append(System.lineSeparator()); + sb.append('\n');// System.lineSeparator()); subregions.prettyPrint(sb, offset + 2, indent); } } @@ -374,7 +374,8 @@ class NCNode implements IntervalI @Override public boolean equalsInterval(IntervalI i) { - return getBegin() == i.getBegin() && getEnd() == i.getEnd(); + return i != null && getBegin() == i.getBegin() + && getEnd() == i.getEnd(); } diff --git a/src/intervalstore/impl/Range.java b/src/intervalstore/impl/Range.java new file mode 100644 index 0000000..c07e793 --- /dev/null +++ b/src/intervalstore/impl/Range.java @@ -0,0 +1,107 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import intervalstore.api.IntervalI; + +/** + * An immutable data bean that models a start-end range + */ +public class Range implements IntervalI +{ + + // no need for final here; these can be fully mutable as long as + // store.revalidate() is run afterwords + + public int start; + + public int end; + + + @Override + public int getBegin() + { + return start; + } + + @Override + public int getEnd() + { + return end; + } + + public Range(int i, int j) + { + start = i; + end = j; + } + + @Override + public String toString() + { + return String.valueOf(start) + "-" + String.valueOf(end); + } + + @Override + public int hashCode() + { + return start * 31 + end; + } + + @Override + public boolean equals(Object o) + { + return (o != null && o instanceof Range && equalsInterval((Range) o)); + } + + @Override + public boolean equalsInterval(IntervalI obj) + { + + // override equalsInterval, not equals + return (obj != null && start == ((Range) obj).start + && end == ((Range) obj).end); + + } + + public void setStart(int pos) + { + start = pos; + } + + public void setEnd(int pos) + { + end = pos; + } + + +} diff --git a/src/intervalstore/impl/SimpleFeature.java b/src/intervalstore/impl/SimpleFeature.java new file mode 100644 index 0000000..be1db97 --- /dev/null +++ b/src/intervalstore/impl/SimpleFeature.java @@ -0,0 +1,109 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.impl; + +import intervalstore.api.IntervalI; + +/** + * A simplified feature instance sufficient for unit test purposes + */ +public class SimpleFeature extends Range +{ + + private String description; + + + /** + * Constructor + * + * @param from + * @param to + * @param desc + */ + public SimpleFeature(int from, int to, String desc) + { + super(from, to); + description = desc; + } + + /** + * Copy constructor + * + * @param sf1 + */ + public SimpleFeature(SimpleFeature sf1) + { + this(sf1.start, sf1.end, sf1.description); + } + + public String getDescription() + { + return description; + } + + @Override + public int hashCode() + { + return start + 37 * end + + (description == null ? 0 : description.hashCode()); + } + + @Override + public boolean equals(Object o) + { + return (o != null && o instanceof SimpleFeature + && equalsInterval((SimpleFeature) o)); + } + + /** + * Equals method that requires two instances to have the same description, as + * well as start and end position. Does not do a test for null + */ + @Override + public boolean equalsInterval(IntervalI o) + { + // must override equalsInterval, not equals + return (o != null && start == ((SimpleFeature) o).start + && end == ((SimpleFeature) o).end) + && (description == null + ? ((SimpleFeature) o).description == null + : description.equals(((SimpleFeature) o).description)); + } + + @Override + public String toString() + { + return start + ":" + end + ":" + description; + } + + +} diff --git a/src/intervalstore/nonc/IntervalEndSorter.java b/src/intervalstore/nonc/IntervalEndSorter.java new file mode 100644 index 0000000..282c880 --- /dev/null +++ b/src/intervalstore/nonc/IntervalEndSorter.java @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package intervalstore.nonc; + +import intervalstore.api.IntervalI; + +/** + * A dual pivot quicksort for int[] where the int is a pointer to something for + * which the value needs to be checked. This class is not used; it was just an + * idea I was trying. But it is sort of cool, so I am keeping it in the package + * for possible future use. + * + * Adapted from Java 7 java.util.DualPivotQuicksort -- int[] only. The only + * difference is that wherever an a[] value is compared, we use val(a[i]) + * instead of a[i] itself. Pretty straightforward. Could be adapted for general + * use. Why didn't they do this in Java? + * + * val(i) is just a hack here, of course. A more general implementation might + * use a Function call. + * + * Just thought it was cool that you can do this. + * + * @author Bob Hanson 2019.09.02 + * + */ + +class IntervalEndSorter +{ + + private IntervalI[] intervals; + + private int val(int i) + { + return intervals[i].getEnd(); + } + + /* + * Tuning parameters. + */ + + /** + * The maximum number of runs in merge sort. + */ + private static final int MAX_RUN_COUNT = 67; + + /** + * The maximum length of run in merge sort. + */ + private static final int MAX_RUN_LENGTH = 33; + + /** + * If the length of an array to be sorted is less than this constant, + * Quicksort is used in preference to merge sort. + */ + private static final int QUICKSORT_THRESHOLD = 286; + + /** + * If the length of an array to be sorted is less than this constant, + * insertion sort is used in preference to Quicksort. + */ + private static final int INSERTION_SORT_THRESHOLD = 47; + + /* + * Sorting methods for seven primitive types. + */ + + /** + * Sorts the specified range of the array using the given workspace array + * slice if possible for merging + * + * @param a + * the array to be sorted + * @param left + * the index of the first element, inclusive, to be sorted + * @param right + * the index of the last element, inclusive, to be sorted + * @param work + * a workspace array (slice) + * @param workBase + * origin of usable space in work array + * @param workLen + * usable size of work array + */ + void sort(int[] a, IntervalI[] intervals, int len) + { + this.intervals = intervals; + + int left = 0, right = len - 1; + // Use Quicksort on small arrays + if (right - left < QUICKSORT_THRESHOLD) + { + sort(a, left, right, true); + return; + } + + /* + * Index run[i] is the start of i-th run + * (ascending or descending sequence). + */ + int[] run = new int[MAX_RUN_COUNT + 1]; + int count = 0; + run[0] = left; + + // Check if the array is nearly sorted + for (int k = left; k < right; run[count] = k) + { + switch (Integer.signum(val(a[k + 1]) - val(a[k]))) + { + case 1: + // ascending + while (++k <= right && val(a[k - 1]) <= val(a[k])) + ; + break; + case -1: + // descending + while (++k <= right && val(a[k - 1]) >= val(a[k])) + ; + for (int lo = run[count] - 1, hi = k; ++lo < --hi;) + { + int t = a[lo]; + a[lo] = a[hi]; + a[hi] = t; + } + break; + default: + // equal + for (int m = MAX_RUN_LENGTH; ++k <= right + && val(a[k - 1]) == val(a[k]);) + { + if (--m == 0) + { + sort(a, left, right, true); + return; + } + } + } + + /* + * The array is not highly structured, + * use Quicksort instead of merge sort. + */ + if (++count == MAX_RUN_COUNT) + { + sort(a, left, right, true); + return; + } + } + + // Check special cases + // Implementation note: variable "right" is increased by 1. + if (run[count] == right++) + { // The last run contains one element + run[++count] = right; + } + else if (count == 1) + { // The array is already sorted + return; + } + + // Determine alternation base for merge + byte odd = 0; + for (int n = 1; (n <<= 1) < count; odd ^= 1) + ; + + // Use or create temporary array b for merging + int[] b; // temp array; alternates with a + int ao, bo; // array offsets from 'left' + int blen = right - left; // space needed for b + int[] work = new int[blen]; + int workBase = 0; + if (odd == 0) + { + System.arraycopy(a, left, work, workBase, blen); + b = a; + bo = 0; + a = work; + ao = workBase - left; + } + else + { + b = work; + ao = 0; + bo = workBase - left; + } + + // Merging + for (int last; count > 1; count = last) + { + for (int k = (last = 0) + 2; k <= count; k += 2) + { + int hi = run[k], mi = run[k - 1]; + for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) + { + if (q >= hi || p < mi && val(a[p + ao]) <= val(a[q + ao])) + { + b[i + bo] = a[p++ + ao]; + } + else + { + b[i + bo] = a[q++ + ao]; + } + } + run[++last] = hi; + } + if ((count & 1) != 0) + { + for (int i = right, lo = run[count - 1]; --i >= lo; b[i + bo] = a[i + + ao]) + ; + run[++last] = right; + } + int[] t = a; + a = b; + b = t; + int o = ao; + ao = bo; + bo = o; + } + } + + /** + * Sorts the specified range of the array by Dual-Pivot Quicksort. + * + * @param a + * the array to be sorted + * @param left + * the index of the first element, inclusive, to be sorted + * @param right + * the index of the last element, inclusive, to be sorted + * @param leftmost + * indicates if this part is the leftmost in the range + */ + private void sort(int[] a, int left, int right, boolean leftmost) + { + int length = right - left + 1; + + // Use insertion sort on tiny arrays + if (length < INSERTION_SORT_THRESHOLD) + { + if (leftmost) + { + /* + * Traditional (without sentinel) insertion sort, + * optimized for server VM, is used in case of + * the leftmost part. + */ + for (int i = left, j = i; i < right; j = ++i) + { + int ai = a[i + 1]; + while (val(ai) < val(a[j])) + { + a[j + 1] = a[j]; + if (j-- == left) + { + break; + } + } + a[j + 1] = ai; + } + } + else + { + /* + * Skip the longest ascending sequence. + */ + do + { + if (left >= right) + { + return; + } + } while (val(a[++left]) >= val(a[left - 1])); + + /* + * Every element from adjoining part plays the role + * of sentinel, therefore this allows us to avoid the + * left range check on each iteration. Moreover, we use + * the more optimized algorithm, so called pair insertion + * sort, which is faster (in the context of Quicksort) + * than traditional implementation of insertion sort. + */ + for (int k = left; ++left <= right; k = ++left) + { + int a1 = a[k], a2 = a[left]; + + if (val(a1) < val(a2)) + { + a2 = a1; + a1 = a[left]; + } + while (val(a1) < val(a[--k])) + { + a[k + 2] = a[k]; + } + a[++k + 1] = a1; + + while (val(a2) < val(a[--k])) + { + a[k + 1] = a[k]; + } + a[k + 1] = a2; + } + int last = a[right]; + + while (val(last) < val(a[--right])) + { + a[right + 1] = a[right]; + } + a[right + 1] = last; + } + return; + } + + // Inexpensive approximation of length / 7 + int seventh = (length >> 3) + (length >> 6) + 1; + + /* + * Sort five evenly spaced elements around (and including) the + * center element in the range. These elements will be used for + * pivot selection as described below. The choice for spacing + * these elements was empirically determined to work well on + * a wide variety of inputs. + */ + int e3 = (left + right) >>> 1; // The midpoint + int e2 = e3 - seventh; + int e1 = e2 - seventh; + int e4 = e3 + seventh; + int e5 = e4 + seventh; + + // Sort these elements using insertion sort + if (val(a[e2]) < val(a[e1])) + { + int t = a[e2]; + a[e2] = a[e1]; + a[e1] = t; + } + + if (val(a[e3]) < val(a[e2])) + { + int t = a[e3]; + a[e3] = a[e2]; + a[e2] = t; + if (val(t) < val(a[e1])) + { + a[e2] = a[e1]; + a[e1] = t; + } + } + if (val(a[e4]) < val(a[e3])) + { + int t = a[e4]; + a[e4] = a[e3]; + a[e3] = t; + int vt = val(t); + if (vt < val(a[e2])) + { + a[e3] = a[e2]; + a[e2] = t; + if (vt < val(a[e1])) + { + a[e2] = a[e1]; + a[e1] = t; + } + } + } + if (val(a[e5]) < val(a[e4])) + { + int t = a[e5]; + a[e5] = a[e4]; + a[e4] = t; + int vt = val(t); + if (vt < val(a[e3])) + { + a[e4] = a[e3]; + a[e3] = t; + if (vt < val(a[e2])) + { + a[e3] = a[e2]; + a[e2] = t; + if (vt < val(a[e1])) + { + a[e2] = a[e1]; + a[e1] = t; + } + } + } + } + + // Pointers + int less = left; // The index of the first element of center part + int great = right; // The index before the first element of right part + + if (val(a[e1]) != val(a[e2]) && val(a[e2]) != val(a[e3]) + && val(a[e3]) != val(a[e4]) && val(a[e4]) != val(a[e5])) + { + /* + * Use the second and fourth of the five sorted elements as pivots. + * These values are inexpensive approximations of the first and + * second terciles of the array. Note that pivot1 <= pivot2. + */ + int pivot1 = val(a[e2]); + int pivot2 = val(a[e4]); + int pivot1k = a[e2]; + int pivot2k = a[e4]; + + /* + * The first and the last elements to be sorted are moved to the + * locations formerly occupied by the pivots. When partitioning + * is complete, the pivots are swapped back into their final + * positions, and excluded from subsequent sorting. + */ + a[e2] = a[left]; + a[e4] = a[right]; + + /* + * Skip elements, which are less or greater than pivot values. + */ + while (val(a[++less]) < pivot1) + ; + while (val(a[--great]) > pivot2) + ; + + /* + * Partitioning: + * + * left part center part right part + * +--------------------------------------------------------------+ + * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | + * +--------------------------------------------------------------+ + * ^ ^ ^ + * | | | + * less k great + * + * Invariants: + * + * all in (left, less) < pivot1 + * pivot1 <= all in [less, k) <= pivot2 + * all in (great, right) > pivot2 + * + * Pointer k is the first index of ?-part. + */ + outer: for (int k = less - 1; ++k <= great;) + { + int ak = a[k]; + if (val(ak) < pivot1) + { // Move a[k] to left part + a[k] = a[less]; + /* + * Here and below we use "a[i] = b; i++;" instead + * of "a[i++] = b;" due to performance issue. + */ + a[less] = ak; + ++less; + } + else if (val(ak) > pivot2) + { // Move a[k] to right part + while (val(a[great]) > pivot2) + { + if (great-- == k) + { + break outer; + } + } + if (val(a[great]) < pivot1) + { // a[great] <= pivot2 + a[k] = a[less]; + a[less] = a[great]; + ++less; + } + else + { // pivot1 <= a[great] <= pivot2 + a[k] = a[great]; + } + /* + * Here and below we use "a[i] = b; i--;" instead + * of "a[i--] = b;" due to performance issue. + */ + a[great] = ak; + --great; + } + } + + // Swap pivots into their final positions + a[left] = a[less - 1]; + a[less - 1] = pivot1k; + a[right] = a[great + 1]; + a[great + 1] = pivot2k; + + // Sort left and right parts recursively, excluding known pivots + sort(a, left, less - 2, leftmost); + sort(a, great + 2, right, false); + + /* + * If center part is too large (comprises > 4/7 of the array), + * swap internal pivot values to ends. + */ + if (less < e1 && e5 < great) + { + /* + * Skip elements, which are equal to pivot values. + */ + while (val(a[less]) == pivot1) + { + ++less; + } + + while (val(a[great]) == pivot2) + { + --great; + } + + /* + * Partitioning: + * + * left part center part right part + * +----------------------------------------------------------+ + * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | + * +----------------------------------------------------------+ + * ^ ^ ^ + * | | | + * less k great + * + * Invariants: + * + * all in (*, less) == pivot1 + * pivot1 < all in [less, k) < pivot2 + * all in (great, *) == pivot2 + * + * Pointer k is the first index of ?-part. + */ + outer: for (int k = less - 1; ++k <= great;) + { + int ak = a[k]; + if (val(ak) == pivot1) + { // Move a[k] to left part + a[k] = a[less]; + a[less] = ak; + ++less; + } + else if (val(ak) == pivot2) + { // Move a[k] to right part + while (val(a[great]) == pivot2) + { + if (great-- == k) + { + break outer; + } + } + if (val(a[great]) == pivot1) + { // a[great] < pivot2 + a[k] = a[less]; + /* + * Even though a[great] equals to pivot1, the + * assignment a[less] = pivot1 may be incorrect, + * if a[great] and pivot1 are floating-point zeros + * of different signs. Therefore in float and + * double sorting methods we have to use more + * accurate assignment a[less] = a[great]. + */ + a[less] = pivot1k; + ++less; + } + else + { // pivot1 < a[great] < pivot2 + a[k] = a[great]; + } + a[great] = ak; + --great; + } + } + } + + // Sort center part recursively + sort(a, less, great, false); + + } + else + { // Partitioning with one pivot + /* + * Use the third of the five sorted elements as pivot. + * This value is inexpensive approximation of the median. + */ + int pivot = val(a[e3]); + + /* + * Partitioning degenerates to the traditional 3-way + * (or "Dutch National Flag") schema: + * + * left part center part right part + * +-------------------------------------------------+ + * | < pivot | == pivot | ? | > pivot | + * +-------------------------------------------------+ + * ^ ^ ^ + * | | | + * less k great + * + * Invariants: + * + * all in (left, less) < pivot + * all in [less, k) == pivot + * all in (great, right) > pivot + * + * Pointer k is the first index of ?-part. + */ + for (int k = less; k <= great; ++k) + { + if (val(a[k]) == pivot) + { + continue; + } + int ak = a[k]; + if (val(ak) < pivot) + { // Move a[k] to left part + a[k] = a[less]; + a[less] = ak; + ++less; + } + else + { // a[k] > pivot - Move a[k] to right part + while (val(a[great]) > pivot) + { + --great; + } + if (val(a[great]) < pivot) + { // a[great] <= pivot + a[k] = a[less]; + a[less] = a[great]; + ++less; + } + else + { // a[great] == pivot + /* + * Even though a[great] equals to pivot, the + * assignment a[k] = pivot may be incorrect, + * if a[great] and pivot are floating-point + * zeros of different signs. Therefore in float + * and double sorting methods we have to use + * more accurate assignment a[k] = a[great]. + */ + // So, guess what? + // + // Actually, we do need a[great] for IntervalStore, + // because here, two, the numbers are not necessarily the same item + // + // a[k] = pivot; + a[k] = a[great]; + } + a[great] = ak; + --great; + } + } + + /* + * Sort left and right parts recursively. + * All elements from center part are equal + * and, therefore, already sorted. + */ + sort(a, left, less - 1, leftmost); + sort(a, great + 1, right, false); + } + } + +} diff --git a/src/intervalstore/nonc/IntervalStore.java b/src/intervalstore/nonc/IntervalStore.java index 55d29e8..bc9ca83 100644 --- a/src/intervalstore/nonc/IntervalStore.java +++ b/src/intervalstore/nonc/IntervalStore.java @@ -46,92 +46,70 @@ import intervalstore.api.IntervalStoreI; /** * - * A second idea, doing a double binary sort for the full interval. Seemed like - * a good idea, but is 50% slower. + * A fourth idea, implementing NCList as a pointer system identical in operation + * to IntervalStoreJ's implementation using ArrayLists but here using just two + * int[] arrays and a single IntervalI[] array that is in the proper order for + * holding all nested and unnested arrays. + * + * Use of unnesting is optional and can be experimented with by changing the + * createUnnested flag to false. + * + * Preliminary testing suggests that this implementation is about 10% faster for + * store interval size 50, store sequence factor 10, query width -1000 (fixed + * 1000-unit-wide window), and query count 100000. + * + * Origional note (Mungo Carstairs, IntervalStoreJ) * * A Collection class to store interval-associated data, with options for "lazy" * sorting so as to speed incremental construction of the data prior to issuing * a findOverlap call. * - * * Accepts duplicate entries but not null values. * * * - * @author Bob Hanson 2019.08.06 + * @author Bob Hanson 2019.09.01 * * @param - * any type providing getBegin(), getEnd() - * getContainedBy(), and setContainedBy() + * any type providing getBegin() and + * getEnd(), primarily */ public class IntervalStore extends AbstractCollection implements IntervalStoreI { /** - * Search for the last interval that starts before or at the specified from/to - * range and the first interval that starts after it. In the situation that - * there are multiple intervals starting at from, this method returns the - * first of those. + * Search for the last interval that ends at or just after the specified + * position. In the situation that there are multiple intervals starting at + * pos, this method returns the first of those. * - * @param a + * @param nests + * the nest-ordered array from createArrays() * @param from - * @param to - * @param ret - * @return + * the position at the start of the interval of interest + * @param start + * the starting point for the subarray search + * @param end + * the ending point for the subarray search + * @return index into the nests array or one greater than end if not found */ - public int binaryLastIntervalSearch(long from, - long to, int[] ret) + public static int binarySearchFirstEndNotBefore(IntervalI[] nests, long from, + int start, int end) { - int start = 0, start2 = 0; - int matched = 0; - int end = intervalCount - 1, end2 = intervalCount; - int mid, begin; - IntervalI e; + int matched = end + 1; + int mid; while (start <= end) { mid = (start + end) >>> 1; - e = intervals[mid]; - begin = e.getBegin(); - switch (Long.signum(begin - from)) + if (nests[mid].getEnd() >= from) { - case -1: matched = mid; - start = mid + 1; - break; - case 0: - case 1: - end = mid - 1; - if (begin > to) - { - end2 = mid; - } - else - { - start2 = mid; - } - break; - } - } - ret[0] = end2; - start = Math.max(start2, end); - end = end2 - 1; - - while (start <= end) - { - mid = (start + end) >>> 1; - e = intervals[mid]; - begin = e.getBegin(); - if (begin > to) - { - ret[0] = mid; end = mid - 1; } else { start = mid + 1; } - } return matched; } @@ -150,22 +128,20 @@ public class IntervalStore private boolean isSorted; + private boolean createUnnested = true; + private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE, maxEnd = Integer.MAX_VALUE; - // private Comparator icompare = new IntervalComparator(); - private boolean isTainted; private int capacity = 8; - private IntervalI[] intervals = new IntervalI[capacity]; + protected IntervalI[] intervals = new IntervalI[capacity]; private int[] offsets; - private int[] ret = new int[1]; - - private int intervalCount; + protected int intervalCount; private int added; @@ -174,8 +150,28 @@ public class IntervalStore private BitSet bsDeleted; /** - * Constructor + * the key array that lists the intervals in sub-interval order so that the + * binary search can be isolated to a single subinterval just by indicating + * start and end within one array */ + private IntervalI[] nests; + + /** + * pointers to the starting positions in nests[] for a subinterval; the first + * element is the "unnested" pointer when unnesting (2) or the root level nest + * pointer when not unnesting (1); the second element is root level nest when + * unnesting or the start of nest data when not unnesting; after that, nests + * are in contiguous sets of binary-searchable blocks + * + */ + private int[] nestStarts; + + /** + * the count of intervals within a nest + * + */ + private int[] nestCounts; + public IntervalStore() { this(true); @@ -204,7 +200,10 @@ public class IntervalStore */ public IntervalStore(List intervals, boolean presort) { - this(intervals, presort, null, false); + // setting default to BIG_ENDIAN, meaning + // the order will be [10,100] before [10,80] + // this order doesn't really matter much. + this(intervals, presort, null, true); } /** @@ -218,29 +217,45 @@ public class IntervalStore * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that * sorts by description as well, for example. * @param bigendian - * true if the comparator sorts [10-30] before [10-20] + * true if the comparator sorts [10-100] before [10-80]; defaults to + * true */ public IntervalStore(List intervals, boolean presort, Comparator comparator, boolean bigendian) { + icompare = (comparator != null ? comparator + : bigendian ? IntervalI.COMPARATOR_BIGENDIAN + : IntervalI.COMPARATOR_LITTLEENDIAN); + this.bigendian = bigendian; + if (intervals != null) { + // So, five hours later, we learn that all my timing has been thrown off + // because I used Array.sort, which if you look in the Java JDK is exactly + // what Collections.sort is, but for whatever reason, all my times were + // high by about 100-200 ms 100% reproducibly. Just one call to Array.sort + // prior to the nanotimer start messed it all up. Some sort of memory or + // garbage issue; I do not know. But using Collections.sort here fixes the + // problem. + + Collections.sort(intervals, icompare); intervals.toArray( this.intervals = new IntervalI[capacity = intervalCount = intervals .size()]); } DO_PRESORT = presort; - icompare = (comparator != null ? comparator - : bigendian ? IntervalI.COMPARATOR_BIGENDIAN - : IntervalI.COMPARATOR_LITTLEENDIAN); - this.bigendian = bigendian; - if (DO_PRESORT && intervalCount > 1) { - sort(); + updateMinMaxStart(); + isSorted = true; + isTainted = true; + ensureFinalized(); + } + else + { + isSorted = DO_PRESORT; + isTainted = true; } - isSorted = DO_PRESORT; - isTainted = true; } /** @@ -257,9 +272,16 @@ public class IntervalStore /** * Adds one interval to the store, optionally checking for duplicates. * + * This fast-adding algorithm uses a double-length int[] (offsets) to hold + * pointers into intervals[] that allows continual sorting of an expanding + * array buffer. When the time comes, this is cleaned up and packed back into + * a standard array, but in the mean time, it can be added to with no loss of + * sorting. + * * @param interval * @param allowDuplicates */ + @Override public boolean add(T interval, boolean allowDuplicates) { if (interval == null) @@ -298,6 +320,9 @@ public class IntervalStore else { index = findInterval(interval); + // System.out.println("index = " + index + " for " + interval + "\n" + // + Arrays.toString(intervals) + "\n" + // + Arrays.toString(offsets)); if (!allowDuplicates && index >= 0) { return false; @@ -349,6 +374,12 @@ public class IntervalStore } } + /** + * Clean up the intervals array into a simple ordered array. + * + * @param dest + * @return + */ private IntervalI[] finalizeAddition(IntervalI[] dest) { if (dest == null) @@ -364,11 +395,12 @@ public class IntervalStore capacity = dest.length; return dest; } + // System.out.println("finalizing " + intervalCount + " " + added); // array is [(intervalCount)...null...(added)] int ntotal = intervalCount + added; - for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;) + for (int ptShift = ntotal, pt = intervalCount; pt >= 0;) { int pt0 = pt; while (--pt >= 0 && offsets[pt] == 0) @@ -390,22 +422,30 @@ public class IntervalStore { break; } - for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) - { - dest[--ptShift] = intervals[offset]; - --added; - } + for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) + { + dest[--ptShift] = intervals[offset]; + --added; + } } offsets = null; intervalCount = ntotal; capacity = dest.length; + // System.out.println(Arrays.toString(dest)); return dest; } + /** + * A binary search for a duplicate. + * + * @param interval + * @return + */ public int binaryIdentitySearch(IntervalI interval) { return binaryIdentitySearch(interval, null); } + /** * for remove() and contains() * @@ -445,10 +485,10 @@ public class IntervalStore IntervalI iv = intervals[mid]; if ((bsIgnore == null || !bsIgnore.get(mid)) && iv.equalsInterval(interval)) - { + { return mid; - // found one; just scan up and down now, first checking the range, but - // also checking other possible aspects of equivalence. + // found one; just scan up and down now, first checking the range, but + // also checking other possible aspects of equivalence. } for (int i = mid; ++i <= end;) @@ -465,7 +505,8 @@ public class IntervalStore } for (int i = mid; --i >= start;) { - if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1) + if ((iv = intervals[i]).getBegin() != r0 + || (bigendian ? r1 < iv.getEnd() : iv.getEnd() < r1)) { return -1 - ++i; } @@ -475,41 +516,22 @@ public class IntervalStore return i; } } - return -1 - start; + return -1 - mid; } } return -1 - start; } - // private int binaryInsertionSearch(long from, long to) - // { - // int matched = intervalCount; - // int end = matched - 1; - // int start = matched; - // if (end < 0 || from > intervals[end].getEnd() - // || from < intervals[start = 0].getBegin()) - // return start; - // while (start <= end) - // { - // int mid = (start + end) >>> 1; - // switch (compareRange(intervals[mid], from, to)) - // { - // case 0: - // return mid; - // case 1: - // matched = mid; - // end = mid - 1; - // continue; - // case -1: - // start = mid + 1; - // continue; - // } - // - // } - // return matched; - // } - + @Override + public boolean canCheckForDuplicates() + { + return true; + } + /** + * Reset all arrays. + * + */ @Override public void clear() { @@ -517,9 +539,13 @@ public class IntervalStore isSorted = true; isTainted = true; offsets = null; + intervals = new IntervalI[8]; + nestStarts = nestCounts = null; + nests = null; minStart = maxEnd = Integer.MAX_VALUE; maxStart = Integer.MIN_VALUE; } + /** * Compare an interval t to a from/to range for insertion purposes * @@ -532,10 +558,6 @@ public class IntervalStore */ private int compareRange(IntervalI t, long from, long to) { - if (t == null) - { - System.out.println("???"); - } int order = Long.signum(t.getBegin() - from); return (order == 0 ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to) @@ -545,7 +567,7 @@ public class IntervalStore @Override public boolean contains(Object entry) { - if (entry == null || intervalCount == 0) + if (entry == null || intervalCount == 0 && added == 0 && deleted == 0) { return false; } @@ -553,26 +575,30 @@ public class IntervalStore { sort(); } - return (findInterval((IntervalI) entry) >= 0); + int n = findInterval((IntervalI) entry); + return (n >= 0); } + /** + * Check to see if a given interval is within another. + * + * Not implemented. + * + * @param outer + * @param inner + * @return + */ public boolean containsInterval(IntervalI outer, IntervalI inner) { - ensureFinalized(); - int index = binaryIdentitySearch(inner, null); - if (index >= 0) - { - while ((index = index - Math.abs(offsets[index])) >= 0) - { - if (intervals[index] == outer) - { - return true; - } - } - } - return false; + return false; // not applicable } + /** + * Ensure that all addition, deletion, and sorting has been done, and that the + * nesting arrays have been created so that we are ready for findOverlaps(). + * + */ + private void ensureFinalized() { if (isTainted) @@ -581,11 +607,10 @@ public class IntervalStore { sort(); } - if (offsets == null || offsets.length < intervalCount) + if (intervalCount > 0) { - offsets = new int[intervalCount]; + createArrays(); } - linkFeatures(); isTainted = false; } } @@ -599,18 +624,16 @@ public class IntervalStore @Override public List findOverlaps(long from, long to) { - List list = findOverlaps(from, to, null); - Collections.reverse(list); - return list; + return findOverlaps(from, to, null); } /** * Find all overlaps within the given range, inclusively. * - * @return a list sorted in descending order of start position + * @return a list sorted in the order provided by the features list comparator * */ - + @SuppressWarnings("unchecked") @Override public List findOverlaps(long from, long to, List result) @@ -638,39 +661,114 @@ public class IntervalStore { return result; } - int index = binaryLastIntervalSearch(from, to, ret); - int index1 = ret[0]; - if (index1 < 0) + int root = 0; + if (createUnnested) { - return result; + if (nestCounts[0] > 0) + { + searchNonnested(nestCounts[0], nests, from, to, + (List) result); + } + root = 1; } + if (nestCounts[root] > 0) + { + search(nests, from, to, root, result); + } + return result; + } - if (index1 > index + 1) + /** + * A simpler search, since we know we don't have any subintervals. Not + * necessary, actually. + * + * @param nestStarts + * @param nestCounts + * @param nests + * @param from + * @param to + * @param result + */ + private static void searchNonnested(int n, + IntervalI[] nests, long from, long to, List result) + { + int end = 2 + n - 1; + for (int pt = binarySearchFirstEndNotBefore(nests, from, 2, + end); pt <= end; pt++) { - while (--index1 > index) + IntervalI ival = nests[pt]; + if (ival.getBegin() > to) { - result.add((T) intervals[index1]); + break; } + result.add(ival); } - boolean isMonotonic = false; - while (index >= 0) + } + + /** + * The main search of the nests[] array's subarrays + * + * @param nests + * @param from + * @param to + * @param nest + * @param result + */ + @SuppressWarnings("unchecked") + private void search(IntervalI[] nests, long from, long to, int nest, + List result) + { + int start = nestStarts[nest]; + int n = nestCounts[nest]; + int end = start + n - 1; + IntervalI first = nests[start]; + IntervalI last = nests[end]; + + // quick tests for common cases: + // out of range + if (last.getEnd() < from || first.getBegin() > to) { - IntervalI sf = intervals[index]; - if (sf.getEnd() >= from) + return; + } + int pt; + switch (n) + { + case 1: + // just one interval and hasn't failed begin/end test + pt = start; + break; + case 2: + // just two and didn't fail begin/end test + // so there is only one option: either the first or the second is our + // winner + pt = (first.getEnd() >= from ? start : end); + break; + default: + // do the binary search + pt = binarySearchFirstEndNotBefore(nests, from, start, end); + break; + } + for (; pt <= end; pt++) + { + IntervalI ival = nests[pt]; + // check for out of range + if (ival.getBegin() > to) { - result.add((T) sf); + break; } - else if (isMonotonic) + result.add((T) ival); + if (nestCounts[pt] > 0) { - break; + // check subintervals in this nest + search(nests, from, to, pt, result); } - int offset = offsets[index]; - isMonotonic = (offset < 0); - index -= (isMonotonic ? -offset : offset); } - return result; } + /** + * return the i-th interval in the designated order (bigendian or + * littleendian) + */ @Override public IntervalI get(int i) { @@ -682,68 +780,57 @@ public class IntervalStore return intervals[i]; } - private int getContainedBy(int index, int begin) - { - while (index >= 0) - { - IntervalI sf = intervals[index]; - if (sf.getEnd() >= begin) - { - // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0 - // + "\nFS in " + sf.getIndex1() + ":" + sf); - return index; - } - index -= Math.abs(offsets[index]); - } - return IntervalI.NOT_CONTAINED; - } - + /** + * Return the deepest level of nesting. + * + */ @Override public int getDepth() { ensureFinalized(); - if (intervalCount < 2) + BitSet bsTested = new BitSet(); + return Math.max((createUnnested ? getDepth(1, bsTested) : 0), + getDepth(0, bsTested)); + } + + /** + * Iteratively dive deeply. + * + * @param pt + * @param bsTested + * @return + */ + private int getDepth(int pt, BitSet bsTested) + { + int maxDepth = 0; + int depth; + int n = nestCounts[pt]; + if (n == 0 || bsTested.get(pt)) { - return intervalCount; + return 1; } - int maxDepth = 1; - IntervalI root = null; - for (int i = 0; i < intervalCount; i++) + bsTested.set(pt); + for (int st = nestStarts[pt], i = st + n; --i >= st;) { - IntervalI element = intervals[i]; - if (offsets[i] == IntervalI.NOT_CONTAINED) - { - root = element; - } - int depth = 1; - int index = i; - int offset; - while ((index = index - Math.abs(offset = offsets[index])) >= 0) + if ((depth = getDepth(i, bsTested)) > maxDepth) { - element = intervals[index]; - if (++depth > maxDepth && (element == root || offset < 0)) - { - maxDepth = depth; - break; - } + maxDepth = depth; } } - return maxDepth; + return maxDepth + 1; } + /** + * Get the number of root-level nests. + * + */ @Override public int getWidth() { ensureFinalized(); - int w = 0; - for (int i = offsets.length; --i >= 0;) - { - if (offsets[i] > 0) - { - w++; - } - } - return w; + // System.out.println( + // "ISList w[0]=" + nestCounts[0] + " w[1]=" + nestCounts[1]); + return nestCounts[0] + (createUnnested ? nestCounts[1] : 0); } @Override @@ -788,62 +875,48 @@ public class IntervalStore }; } - private void linkFeatures() + /** + * Indented printing of the intervals. + * + */ + @Override + public String prettyPrint() { - if (intervalCount == 0) + ensureFinalized(); + StringBuffer sb = new StringBuffer(); + if (createUnnested) { - return; + sb.append("unnested:"); + dump(0, sb, "\n"); + sb.append("\nnested:"); + dump(1, sb, "\n"); } - maxEnd = intervals[0].getEnd(); - offsets[0] = IntervalI.NOT_CONTAINED; - if (intervalCount == 1) + else { - return; + dump(0, sb, "\n"); } - boolean isMonotonic = true; - for (int i = 1; i < intervalCount; i++) - { - IntervalI sf = intervals[i]; - int begin = sf.getBegin(); - int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1); - // System.out.println(sf + " is contained by " - // + (index < 0 ? null : starts[index])); - - offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED - : isMonotonic ? index - i : i - index); - isMonotonic = (sf.getEnd() > maxEnd); - if (isMonotonic) - { - maxEnd = sf.getEnd(); - } - } - + return sb.toString(); } - @Override - public String prettyPrint() + /** + * Iterative nest dump. + * + * @param nest + * @param sb + * @param sep + */ + private void dump(int nest, StringBuffer sb, String sep) { - switch (intervalCount + added) - { - case 0: - return ""; - case 1: - return intervals[0] + "\n"; - } - ensureFinalized(); - String sep = "\t"; - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < intervalCount; i++) + int pt = nestStarts[nest]; + int n = nestCounts[nest]; + sep += " "; + + for (int i = 0; i < n; i++) { - IntervalI range = intervals[i]; - int index = i; - while ((index = index - Math.abs(offsets[index])) >= 0) - { - sb.append(sep); - } - sb.append(range.toString()).append('\n'); + sb.append(sep).append(nests[pt + i].toString()); + if (nestCounts[pt + i] > 0) + dump(pt + i, sb, sep + " "); } - return sb.toString(); } @Override @@ -878,29 +951,29 @@ public class IntervalStore return pt; } pt = -1 - pt; - int start = interval.getBegin(); - int end = interval.getEnd(); + int start = interval.getBegin(); + int end = interval.getEnd(); - int match = pt; + int match = pt; - while ((pt = offsets[pt]) != 0) + while ((pt = offsets[pt]) != 0) + { + IntervalI iv = intervals[pt]; + switch (compareRange(iv, start, end)) { - IntervalI iv = intervals[pt]; - switch (compareRange(iv, start, end)) + case -1: + break; + case 0: + if (iv.equalsInterval(interval)) { - case -1: - break; - case 0: - if (iv.equalsInterval(interval)) - { - return pt; - } + return pt; + } // fall through - case 1: + case 1: match = pt; - continue; - } + continue; } + } return -1 - match; } else @@ -948,6 +1021,10 @@ public class IntervalStore return (isTainted = true); } + /** + * Fill in the gaps of the intervals array after one or more deletions. + * + */ private void finalizeDeletion() { if (deleted == 0) @@ -984,9 +1061,12 @@ public class IntervalStore i = pt1; } - } + /** + * Recreate the key nest arrays. + * + */ @Override public boolean revalidate() { @@ -996,12 +1076,19 @@ public class IntervalStore return true; } + /** + * Return the total number of intervals in the store. + * + */ @Override public int size() { return intervalCount + added - deleted; } + /** + * AbstractCollection override to ensure that we have finalized the store. + */ @Override public Object[] toArray() { @@ -1010,7 +1097,7 @@ public class IntervalStore } /** - * Sort intervals by start (lowest first) and end (highest first). + * Sort intervals by start. */ private void sort() { @@ -1024,12 +1111,250 @@ public class IntervalStore } else { + // SOMETHING HAPPENS WHEN Arrays.sort is run that + // adds 100 ms to a 150 ms run time. + // I don't know why. Arrays.sort(intervals, 0, intervalCount, icompare); } updateMinMaxStart(); isSorted = true; } + // 0 5-5 + // 1 6-8 + // 2 10-80 + // 3 10-100 + // 4 10-100 + // 5 20-30 + // 6 35-40 + // 7 50-80 + // 8 51-51 + // 9 52-52 + // 10 55-60 + // 11 56-56 + // 12 70-120 + // 13 78-78 + // + // cont [-1, -1, -1, -1, 3, 4, 4, 4, 7, 7, 7, 10, -1, 12] + // nests [0, 0, 1, 2, 3, 12, 4, 5, 6, 7, 8, 9, 10, 11, 13] + // starts [1, 0, 0, 0, 6, 14, 7, 0, 0, 10, 0, 0, 13, 0, 0] + // counts [5, 0, 0, 0, 1, 1, 3, 0, 0, 3, 0, 0, 1, 0, 0] + + /** + * Create the key arrays: nests, nestStarts, and nestCounts. The starting + * point is getting the container array, which may hold -1 (top level nesting) + * and -2 (unnested set, if doing that). + * + * This is a pretty complicated method; it was way simpler before I decided to + * support nesting as an option. + * + */ + private void createArrays() + { + + /** + * When unnesting, we need a second top-level listing. + * + */ + int incr = (createUnnested ? 2 : 1); + + /** + * The three key arrays produced by this method: + */ + + nests = new IntervalI[intervalCount + incr]; + nestStarts = new int[intervalCount + incr]; + nestCounts = new int[intervalCount + incr]; + + /** + * a temporary array used in Phase Two. + */ + + int[] counts = new int[intervalCount + incr]; + + /** + * the objective of Phase One + */ + int[] myContainer = new int[intervalCount]; + + myContainer[0] = -incr; + counts[0] = 1; + int beginLast = intervals[0].getBegin(); + int endLast = intervals[0].getEnd(); + int ptLastNot2 = -1; + int endLast2 = endLast; + int beginLast2 = beginLast; + + // Phase One: Get the temporary container array myContainer. + + for (int i = 1; i < intervalCount; i++) + { + int pt = i - 1; + int end = intervals[i].getEnd(); + int begin = intervals[i].getBegin(); + + // set the pointer to the element that is containing + // this interval, or -2 (unnested) or -1 (root-level nest) + + myContainer[i] = -incr; + + // OK, now figure it all out... + + boolean isNested; + if (createUnnested) + { + // Using a method isNested(...) here, because there are different + // ways of defining "nested" when start or end are the + // same. The definition used here would not be my first choice, + // but it matches results for IntervalStoreJ + // perfectly, down to the exact number of times that the + // binary search runs through its start/mid/end loops in findOverlap. + + // beginLast2 and endLast2 refer to the root-level or unnested level + + if (!isNested(begin, end, beginLast2, endLast2)) + { + isNested = false; + } + else + { + // this is tricky; making sure we properly get the + // nests that are to be removed from the top-level + // unnested list assigned a container -1, while all + // top-level nests get -2. + + pt = ptLastNot2; + isNested = (pt < 0 || isNested(begin, end, + intervals[pt].getBegin(), intervals[pt].getEnd())); + if (!isNested) + { + myContainer[i] = -1; + } + } + } + else + { + isNested = isNested(begin, end, beginLast, endLast); + } + + // ...almost done... + + if (isNested) + { + myContainer[i] = pt; + } + else + { + + // monotonic -- find the parent that is doing the nesting + + while ((pt = myContainer[pt]) >= 0) + { + if (isNested(begin, end, intervals[pt].getBegin(), + intervals[pt].getEnd())) + { + myContainer[i] = pt; + // fully contained by a previous interval + // System.out.println("mycontainer " + i + " = " + pt); + break; + } + } + } + + // update the counts and pointers + + counts[myContainer[i] + incr]++; + if (myContainer[i] == -2) + { + endLast2 = end; + beginLast2 = begin; + } + else + { + ptLastNot2 = i; + endLast = end; + beginLast = begin; + } + } + + // Phase Two: construct the nests[] array and its associated + // starting pointer array and nest element counts. These counts + // are actually produced above, but we reconstruct it as a set + // of dynamic pointers during construction. + + // incr is either 1 (no separate unnested set) or 2 (include unnested) + + int nextStart = counts[0] + incr; + /** + * this array tracks the pointer within nestStarts to the nest block start + * in nests[]. + */ + int[] startPt = new int[intervalCount + incr]; + nestStarts[0] = incr; + + // When not unnesting, nestStarts[0] = 1, and the length + // will start out here as 0 but increment as we go. + // We do this even though we know its size already, because that + // value serves as a dynamic pointer as well. + + if (createUnnested) + { + + // Unnesting requires two separate lists with proper pointers and counts. + // The first, nestStarts[0] = 0, is for the unnested set (container -2); + // the second (container -1, nestStarts[1]) is for the nest root. + + startPt[1] = 1; + nestStarts[1] = nextStart; + nextStart += counts[1]; + } + + // Now get all the pointers right and set the nests[] pointer into intervals + // correctly. + + for (int i = 0; i < intervalCount; i++) + { + int n = counts[i + incr]; + int ptNest = startPt[myContainer[i] + incr]; + int p = nestStarts[ptNest] + nestCounts[ptNest]++; + nests[p] = intervals[i]; + if (n > 0) + { + startPt[i + incr] = p; + nestStarts[p] = nextStart; + nextStart += n; + } + } + + // System.out.println("intervals " + Arrays.toString(intervals)); + // System.out.println("nests " + Arrays.toString(nests)); + // System.out.println("conts " + Arrays.toString(myContainer)); + // System.out.println("starts " + Arrays.toString(nestStarts)); + // System.out.println("counts " + Arrays.toString(nestCounts)); + // System.out.println("done " + nestCounts[0]); + } + + /** + * Child-Parent relationships to match IntervalStoreJ. Perhaps a bit arcane? + * Objective is to minimize the depth when we can. + * + * @param childStart + * @param childEnd + * @param parentStart + * @param parentEnd + * @return + */ + private static boolean isNested(int childStart, int childEnd, + int parentStart, int parentEnd) + { + return (parentStart <= childStart && parentEnd > childEnd + || parentStart < childStart && parentEnd == childEnd); + } + + /** + * Just a couple of pointers to help speed findOverlaps along a bit. + * + */ private void updateMinMaxStart() { if (intervalCount > 0) @@ -1050,4 +1375,5 @@ public class IntervalStore return prettyPrint(); } + } diff --git a/src/intervalstore/nonc/IntervalStore0.java b/src/intervalstore/nonc/IntervalStore0.java new file mode 100644 index 0000000..389439f --- /dev/null +++ b/src/intervalstore/nonc/IntervalStore0.java @@ -0,0 +1,1055 @@ +/* +BSD 3-Clause License + +Copyright (c) 2018, Mungo Carstairs +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package intervalstore.nonc; + +import java.util.AbstractCollection; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import intervalstore.api.IntervalI; +import intervalstore.api.IntervalStoreI; + +/** + * + * A second idea, doing a double binary sort for the full interval. Seemed like + * a good idea, but is 50% slower. + * + * A Collection class to store interval-associated data, with options for "lazy" + * sorting so as to speed incremental construction of the data prior to issuing + * a findOverlap call. + * + * + * Accepts duplicate entries but not null values. + * + * + * + * @author Bob Hanson 2019.08.06 + * + * @param + * any type providing getBegin(), getEnd() + * getContainedBy(), and setContainedBy() + */ +public class IntervalStore0 + extends AbstractCollection implements IntervalStoreI +{ + + /** + * Search for the last interval that starts before or at the specified from/to + * range and the first interval that starts after it. In the situation that + * there are multiple intervals starting at from, this method returns the + * first of those. + * + * @param a + * @param from + * @param to + * @param ret + * @return + */ + public int binaryLastIntervalSearch(long from, long to, int[] ret) + { + int start = 0, start2 = 0; + int matched = 0; + int end = intervalCount - 1, end2 = intervalCount; + int mid, begin; + IntervalI e; + while (start <= end) + { + mid = (start + end) >>> 1; + e = intervals[mid]; + begin = e.getBegin(); + switch (Long.signum(begin - from)) + { + case -1: + matched = mid; + start = mid + 1; + break; + case 0: + case 1: + end = mid - 1; + if (begin > to) + { + end2 = mid; + } + else + { + start2 = mid; + } + break; + } + } + ret[0] = end2; + start = Math.max(start2, end); + end = end2 - 1; + + while (start <= end) + { + mid = (start + end) >>> 1; + e = intervals[mid]; + begin = e.getBegin(); + if (begin > to) + { + ret[0] = mid; + end = mid - 1; + } + else + { + start = mid + 1; + } + + } + return matched; + } + + /** + * My preference is for a bigendian comparison, but you may differ. + */ + private Comparator icompare; + + /** + * bigendian is what NCList does; change icompare to switch to that + */ + private boolean bigendian; + + private final boolean DO_PRESORT; + + private boolean isSorted; + + private int minStart = Integer.MAX_VALUE, maxStart = Integer.MIN_VALUE, + maxEnd = Integer.MAX_VALUE; + + // private Comparator icompare = new IntervalComparator(); + + private boolean isTainted; + + private int capacity = 8; + + protected IntervalI[] intervals = new IntervalI[capacity]; + + private int[] offsets; + + private int[] ret = new int[1]; + + protected int intervalCount; + + private int added; + + private int deleted; + + private BitSet bsDeleted; + + /** + * Constructor + */ + public IntervalStore0() + { + this(true); + } + + public IntervalStore0(boolean presort) + { + this(null, presort); + } + + /** + * Constructor given a list of intervals. Note that the list may get sorted as + * a side-effect of calling this constructor. + */ + public IntervalStore0(List intervals) + { + this(intervals, true); + } + + /** + * Allows a presort option, which can speed up initial loading of individual + * features but will delay the first findOverlap if set to true. + * + * @param intervals + * @param presort + */ + public IntervalStore0(List intervals, boolean presort) + { + this(intervals, presort, null, false); + } + + /** + * + * @param intervals + * intervals to initialize with (others may still be added) + * @param presort + * whether or not to presort the list as additions are made + * @param comparator + * IntervalI.COMPARATOR_LITTLEENDIAN or + * IntervalI.COMPARATOR_BIGENDIAN, but this could also be one that + * sorts by description as well, for example. + * @param bigendian + * true if the comparator sorts [10-30] before [10-20] + */ + public IntervalStore0(List intervals, boolean presort, + Comparator comparator, boolean bigendian) + { + if (intervals != null) + { + intervals.toArray( + this.intervals = new IntervalI[capacity = intervalCount = intervals + .size()]); + } + DO_PRESORT = presort; + icompare = (comparator != null ? comparator + : bigendian ? IntervalI.COMPARATOR_BIGENDIAN + : IntervalI.COMPARATOR_LITTLEENDIAN); + this.bigendian = bigendian; + + if (DO_PRESORT && intervalCount > 1) + { + sort(); + } + isSorted = DO_PRESORT; + isTainted = true; + } + + /** + * Adds one interval to the store, allowing duplicates. + * + * @param interval + */ + @Override + public boolean add(T interval) + { + return add(interval, true); + } + + /** + * Adds one interval to the store, optionally checking for duplicates. + * + * @param interval + * @param allowDuplicates + */ + @Override + public boolean add(T interval, boolean allowDuplicates) + { + if (interval == null) + { + return false; + } + + if (deleted > 0) + { + finalizeDeletion(); + } + if (!isTainted) + { + offsets = null; + isTainted = true; + } + + synchronized (intervals) + { + int index = intervalCount; + int start = interval.getBegin(); + + if (intervalCount + added + 1 >= capacity) + { + intervals = finalizeAddition( + new IntervalI[capacity = capacity << 1]); + + } + + if (DO_PRESORT && isSorted) + { + if (intervalCount == 0) + { + // ignore + } + else + { + index = findInterval(interval); + if (!allowDuplicates && index >= 0) + { + return false; + } + if (index < 0) + { + index = -1 - index; + } + else + { + index++; + } + } + + } + else + { + if (!allowDuplicates && findInterval(interval) >= 0) + { + return false; + } + isSorted = false; + } + + if (index == intervalCount) + { + intervals[intervalCount++] = interval; + // System.out.println("added " + intervalCount + " " + interval); + } + else + { + int pt = capacity - ++added; + intervals[pt] = interval; + // System.out.println("stashed " + pt + " " + interval + " for " + // + index + " " + intervals[index]); + if (offsets == null) + { + offsets = new int[capacity]; + } + + offsets[pt] = offsets[index]; + + offsets[index] = pt; + } + + minStart = Math.min(minStart, start); + maxStart = Math.max(maxStart, start); + return true; + } + } + + private IntervalI[] finalizeAddition(IntervalI[] dest) + { + if (dest == null) + { + dest = intervals; + } + if (added == 0) + { + if (intervalCount > 0 && dest != intervals) + { + System.arraycopy(intervals, 0, dest, 0, intervalCount); + } + capacity = dest.length; + return dest; + } + + // array is [(intervalCount)...null...(added)] + + int ntotal = intervalCount + added; + for (int ptShift = intervalCount + added, pt = intervalCount; pt >= 0;) + { + int pt0 = pt; + while (--pt >= 0 && offsets[pt] == 0) + { + ; + } + if (pt < 0) + { + pt = 0; + } + int nOK = pt0 - pt; + // shift upper intervals right + ptShift -= nOK; + if (nOK > 0) + { + System.arraycopy(intervals, pt, dest, ptShift, nOK); + } + if (added == 0) + { + break; + } + for (int offset = offsets[pt]; offset > 0; offset = offsets[offset]) + { + dest[--ptShift] = intervals[offset]; + --added; + } + } + offsets = null; + intervalCount = ntotal; + capacity = dest.length; + return dest; + } + + public int binaryIdentitySearch(IntervalI interval) + { + return binaryIdentitySearch(interval, null); + } + + /** + * for remove() and contains() + * + * @param list + * @param interval + * @param bsIgnore + * for deleted + * @return index or, if not found, -1 - "would be here" + */ + public int binaryIdentitySearch(IntervalI interval, BitSet bsIgnore) + { + int start = 0; + int r0 = interval.getBegin(); + int r1 = interval.getEnd(); + int end = intervalCount - 1; + if (end < 0 || r0 < minStart) + { + return -1; + } + if (r0 > maxStart) + { + return -1 - intervalCount; + } + while (start <= end) + { + int mid = (start + end) >>> 1; + IntervalI r = intervals[mid]; + switch (compareRange(r, r0, r1)) + { + case -1: + start = mid + 1; + continue; + case 1: + end = mid - 1; + continue; + case 0: + IntervalI iv = intervals[mid]; + if ((bsIgnore == null || !bsIgnore.get(mid)) + && iv.equalsInterval(interval)) + { + return mid; + // found one; just scan up and down now, first checking the range, but + // also checking other possible aspects of equivalence. + } + + for (int i = mid; ++i <= end;) + { + if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() != r1) + { + break; + } + if ((bsIgnore == null || !bsIgnore.get(i)) + && iv.equalsInterval(interval)) + { + return i; + } + } + for (int i = mid; --i >= start;) + { + if ((iv = intervals[i]).getBegin() != r0 || iv.getEnd() < r1) + { + return -1 - ++i; + } + if ((bsIgnore == null || !bsIgnore.get(i)) + && iv.equalsInterval(interval)) + { + return i; + } + } + return -1 - start; + } + } + return -1 - start; + } + + // private int binaryInsertionSearch(long from, long to) + // { + // int matched = intervalCount; + // int end = matched - 1; + // int start = matched; + // if (end < 0 || from > intervals[end].getEnd() + // || from < intervals[start = 0].getBegin()) + // return start; + // while (start <= end) + // { + // int mid = (start + end) >>> 1; + // switch (compareRange(intervals[mid], from, to)) + // { + // case 0: + // return mid; + // case 1: + // matched = mid; + // end = mid - 1; + // continue; + // case -1: + // start = mid + 1; + // continue; + // } + // + // } + // return matched; + // } + + @Override + public void clear() + { + intervalCount = added = 0; + isSorted = true; + isTainted = true; + offsets = null; + minStart = maxEnd = Integer.MAX_VALUE; + maxStart = Integer.MIN_VALUE; + } + + /** + * Compare an interval t to a from/to range for insertion purposes + * + * @param t + * @param from + * @param to + * @return 0 if same, 1 if start is after from, or start equals from and + * [bigendian: end is before to | littleendian: end is after to], else + * -1 + */ + private int compareRange(IntervalI t, long from, long to) + { + int order = Long.signum(t.getBegin() - from); + return (order == 0 + ? Long.signum(bigendian ? to - t.getEnd() : t.getEnd() - to) + : order); + } + + @Override + public boolean contains(Object entry) + { + if (entry == null || intervalCount == 0) + { + return false; + } + if (!isSorted || deleted > 0) + { + sort(); + } + return (findInterval((IntervalI) entry) >= 0); + } + + public boolean containsInterval(IntervalI outer, IntervalI inner) + { + ensureFinalized(); + int index = binaryIdentitySearch(inner, null); + if (index >= 0) + { + while ((index = index - Math.abs(offsets[index])) >= 0) + { + if (intervals[index] == outer) + { + return true; + } + } + } + return false; + } + + private void ensureFinalized() + { + if (isTainted) + { + if (!isSorted || added > 0 || deleted > 0) + { + sort(); + } + if (offsets == null || offsets.length < intervalCount) + { + offsets = new int[intervalCount]; + } + linkFeatures(); + isTainted = false; + } + } + + /** + * Find all overlaps within the given range, inclusively. + * + * @return a list sorted in ascending order of start position + * + */ + @Override + public List findOverlaps(long from, long to) + { + List list = findOverlaps(from, to, null); + Collections.reverse(list); + return list; + } + + /** + * Find all overlaps within the given range, inclusively. + * + * @return a list sorted in descending order of start position + * + */ + + @SuppressWarnings("unchecked") + @Override + public List findOverlaps(long from, long to, List result) + { + if (result == null) + { + result = new ArrayList<>(); + } + switch (intervalCount + added) + { + case 0: + return result; + case 1: + IntervalI sf = intervals[0]; + if (sf.getBegin() <= to && sf.getEnd() >= from) + { + result.add((T) sf); + } + return result; + } + + ensureFinalized(); + + if (from > maxEnd || to < minStart) + { + return result; + } + int index = binaryLastIntervalSearch(from, to, ret); + int index1 = ret[0]; + if (index1 < 0) + { + return result; + } + + if (index1 > index + 1) + { + while (--index1 > index) + { + result.add((T) intervals[index1]); + } + } + boolean isMonotonic = false; + while (index >= 0) + { + IntervalI sf = intervals[index]; + if (sf.getEnd() >= from) + { + result.add((T) sf); + } + else if (isMonotonic) + { + break; + } + int offset = offsets[index]; + isMonotonic = (offset < 0); + index -= (isMonotonic ? -offset : offset); + } + return result; + } + + @Override + public IntervalI get(int i) + { + if (i < 0 || i >= intervalCount + added) + { + return null; + } + ensureFinalized(); + return intervals[i]; + } + + private int getContainedBy(int index, int begin) + { + while (index >= 0) + { + IntervalI sf = intervals[index]; + if (sf.getEnd() >= begin) + { + // System.out.println("\nIS found " + sf0.getIndex1() + ":" + sf0 + // + "\nFS in " + sf.getIndex1() + ":" + sf); + return index; + } + index -= Math.abs(offsets[index]); + } + return IntervalI.NOT_CONTAINED; + } + + @Override + public int getDepth() + { + ensureFinalized(); + if (intervalCount < 2) + { + return intervalCount; + } + int maxDepth = 1; + IntervalI root = null; + for (int i = 0; i < intervalCount; i++) + { + IntervalI element = intervals[i]; + if (offsets[i] == IntervalI.NOT_CONTAINED) + { + root = element; + } + int depth = 1; + int index = i; + int offset; + while ((index = index - Math.abs(offset = offsets[index])) >= 0) + { + element = intervals[index]; + if (++depth > maxDepth && (element == root || offset < 0)) + { + maxDepth = depth; + break; + } + } + } + return maxDepth; + } + + @Override + public int getWidth() + { + ensureFinalized(); + int w = 0; + for (int i = offsets.length; --i >= 0;) + { + if (offsets[i] > 0) + { + w++; + } + } + return w; + } + + @Override + public boolean isValid() + { + ensureFinalized(); + return true; + } + + /** + * Answers an iterator over the intervals in the store, with no particular + * ordering guaranteed. The iterator does not support the optional + * remove operation (throws + * UnsupportedOperationException if attempted). + */ + @Override + public Iterator iterator() + { + ensureFinalized(); + return new Iterator() + { + + private int next; + + @Override + public boolean hasNext() + { + return next < intervalCount; + } + + @SuppressWarnings("unchecked") + @Override + public T next() + { + if (next >= intervalCount) + { + throw new NoSuchElementException(); + } + return (T) intervals[next++]; + } + + }; + } + + private void linkFeatures() + { + if (intervalCount == 0) + { + return; + } + maxEnd = intervals[0].getEnd(); + offsets[0] = IntervalI.NOT_CONTAINED; + if (intervalCount == 1) + { + return; + } + boolean isMonotonic = true; + for (int i = 1; i < intervalCount; i++) + { + IntervalI sf = intervals[i]; + int begin = sf.getBegin(); + int index = (begin <= maxEnd ? getContainedBy(i - 1, begin) : -1); + // System.out.println(sf + " is contained by " + // + (index < 0 ? null : starts[index])); + + offsets[i] = (index < 0 ? IntervalI.NOT_CONTAINED + : isMonotonic ? index - i : i - index); + isMonotonic = (sf.getEnd() > maxEnd); + if (isMonotonic) + { + maxEnd = sf.getEnd(); + } + } + + } + + @Override + public String prettyPrint() + { + switch (intervalCount + added) + { + case 0: + return ""; + case 1: + return intervals[0] + "\n"; + } + ensureFinalized(); + String sep = "\t"; + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < intervalCount; i++) + { + IntervalI range = intervals[i]; + int index = i; + while ((index = index - Math.abs(offsets[index])) >= 0) + { + sb.append(sep); + } + sb.append(range.toString()).append('\n'); + } + return sb.toString(); + } + + @Override + public synchronized boolean remove(Object o) + { + // if (o == null) + // { + // throw new NullPointerException(); + // } + return (o != null && intervalCount > 0 + && removeInterval((IntervalI) o)); + } + + /** + * Find the interval or return where it should go, possibly into the add + * buffer + * + * @param interval + * @return index (nonnegative) or index where it would go (negative) + */ + + private int findInterval(IntervalI interval) + { + + if (isSorted) + { + int pt = binaryIdentitySearch(interval, null); + // if (addPt == intervalCount || offsets[pt] == 0) + // return pt; + if (pt >= 0 || added == 0 || pt == -1 - intervalCount) + { + return pt; + } + pt = -1 - pt; + int start = interval.getBegin(); + int end = interval.getEnd(); + + int match = pt; + + while ((pt = offsets[pt]) != 0) + { + IntervalI iv = intervals[pt]; + switch (compareRange(iv, start, end)) + { + case -1: + break; + case 0: + if (iv.equalsInterval(interval)) + { + return pt; + } + // fall through + case 1: + match = pt; + continue; + } + } + return -1 - match; + } + else + { + int i = intervalCount; + while (--i >= 0 && !intervals[i].equalsInterval(interval)) + { + ; + } + return i; + } + } + + /** + * Uses a binary search to find the entry and removes it if found. + * + * @param interval + * @return + */ + protected boolean removeInterval(IntervalI interval) + { + + if (!isSorted || added > 0) + { + sort(); + } + int i = binaryIdentitySearch(interval, bsDeleted); + if (i < 0) + { + return false; + } + if (deleted == 0) + { + if (bsDeleted == null) + { + bsDeleted = new BitSet(intervalCount); + } + else + { + bsDeleted.clear(); + } + } + bsDeleted.set(i); + deleted++; + return (isTainted = true); + } + + private void finalizeDeletion() + { + if (deleted == 0) + { + return; + } + + // ......xxx.....xxxx.....xxxxx.... + // ......^i,pt + // ...... ....... + // ............ + for (int i = bsDeleted.nextSetBit(0), pt = i; i >= 0;) + { + i = bsDeleted.nextClearBit(i + 1); + int pt1 = bsDeleted.nextSetBit(i + 1); + if (pt1 < 0) + { + pt1 = intervalCount; + } + int n = pt1 - i; + System.arraycopy(intervals, i, intervals, pt, n); + pt += n; + if (pt1 == intervalCount) + { + for (i = pt1; --i >= pt;) + { + intervals[i] = null; + } + intervalCount -= deleted; + deleted = 0; + bsDeleted.clear(); + break; + } + i = pt1; + } + + } + + @Override + public boolean revalidate() + { + isTainted = true; + isSorted = false; + ensureFinalized(); + return true; + } + + @Override + public int size() + { + return intervalCount + added - deleted; + } + + @Override + public Object[] toArray() + { + ensureFinalized(); + return super.toArray(); + } + + /** + * Sort intervals by start (lowest first) and end (highest first). + */ + private void sort() + { + if (added > 0) + { + intervals = finalizeAddition(new IntervalI[intervalCount + added]); + } + else if (deleted > 0) + { + finalizeDeletion(); + } + else + { + Arrays.sort(intervals, 0, intervalCount, icompare); + } + updateMinMaxStart(); + isSorted = true; + } + + private void updateMinMaxStart() + { + if (intervalCount > 0) + { + minStart = intervals[0].getBegin(); + maxStart = intervals[intervalCount - 1].getBegin(); + } + else + { + minStart = Integer.MAX_VALUE; + maxStart = Integer.MIN_VALUE; + } + } + + @Override + public String toString() + { + return prettyPrint(); + } + + @Override + public boolean canCheckForDuplicates() + { + return true; + } + +} diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index dbad53e..1ab8a24 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -652,7 +652,8 @@ public class CrossRef @Override public boolean equalsInterval(IntervalI sf) { - return equals((SequenceFeature) sf, true); + return sf != null + && equals((SequenceFeature) sf, true); } }; diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 6f51420..30e0929 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -230,7 +230,7 @@ public class SequenceFeature implements FeatureLocationI @Override public boolean equalsInterval(IntervalI sf) { - return equals((SequenceFeature) sf, false); + return sf != null && equals((SequenceFeature) sf, false); } /** * Overloaded method allows the equality test to optionally ignore the @@ -244,39 +244,17 @@ public class SequenceFeature implements FeatureLocationI */ public boolean equals(SequenceFeature sf, boolean ignoreParent) { - boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score) - : score == sf.score; - if (begin != sf.begin || end != sf.end || !sameScore) - { - return false; - } - - if (getStrand() != sf.getStrand()) - { - return false; - } - - if (!(type + description + featureGroup + getPhase()).equals( - sf.type + sf.description + sf.featureGroup + sf.getPhase())) - { - return false; - } - if (!equalAttribute(getValue("ID"), sf.getValue("ID"))) - { - return false; - } - if (!equalAttribute(getValue("Name"), sf.getValue("Name"))) - { - return false; - } - if (!ignoreParent) - { - if (!equalAttribute(getValue("Parent"), sf.getValue("Parent"))) - { - return false; - } - } - return true; + return (begin == sf.begin && end == sf.end + && getStrand() == sf.getStrand() + && (Float.isNaN(score) ? Float.isNaN(sf.score) + : score == sf.score) + && (type + description + featureGroup + getPhase()) + .equals(sf.type + sf.description + sf.featureGroup + + sf.getPhase()) + && equalAttribute(getValue("ID"), sf.getValue("ID")) + && equalAttribute(getValue("Name"), sf.getValue("Name")) + && (ignoreParent || equalAttribute(getValue("Parent"), + sf.getValue("Parent")))); } /** diff --git a/src/jalview/datamodel/features/FeatureStore.java b/src/jalview/datamodel/features/FeatureStore.java index 1451892..75ec45a 100644 --- a/src/jalview/datamodel/features/FeatureStore.java +++ b/src/jalview/datamodel/features/FeatureStore.java @@ -21,6 +21,7 @@ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; +import jalview.util.Platform; import java.util.ArrayList; import java.util.Collection; @@ -29,13 +30,96 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import intervalstore.api.IntervalStoreI; + public abstract class FeatureStore implements FeatureStoreI { /** * track last start for quick insertion of ordered features */ - protected int lastStart = -1, lastContactStart = -1; + protected int lastStart = -1; + + protected int lastContactStart = -1; + + /* + * Non-positional features have no (zero) start/end position. + * Kept as a separate list in case this criterion changes in future. + */ + List nonPositionalFeatures; + + /* + * contact features ordered by first contact position + */ + List contactFeatureStarts; + + /* + * contact features ordered by second contact position + */ + List contactFeatureEnds; + + /* + * IntervalStore holds remaining features and provides efficient + * query for features overlapping any given interval + */ + IntervalStoreI features; + + /* + * Feature groups represented in stored positional features + * (possibly including null) + */ + Set positionalFeatureGroups; + + /* + * Feature groups represented in stored non-positional features + * (possibly including null) + */ + Set nonPositionalFeatureGroups; + + /* + * the total length of all positional features; contact features count 1 to + * the total and 1 to size(), consistent with an average 'feature length' of 1 + */ + int totalExtent; + + float positionalMinScore; + + float positionalMaxScore; + + float nonPositionalMinScore; + + float nonPositionalMaxScore; + + public final static int INTERVAL_STORE_DEFAULT = -1; + + /** + * original NCList-based IntervalStore + */ + public final static int INTERVAL_STORE_NCLIST_OBJECT = 0; + + /** + * linked-list IntervalStore + */ + public final static int INTERVAL_STORE_LINKED_LIST_PRESORT = 1; + + /** + * linked-list IntervalStore + */ + public final static int INTERVAL_STORE_LINKED_LIST_NO_PRESORT = 2; + + /** + * NCList as array buffer IntervalStore + */ + public final static int INTERVAL_STORE_NCLIST_BUFFER_PRESORT = 3; + + /** + * NCList as array buffer IntervalStore + */ + public final static int INTERVAL_STORE_NCLIST_BUFFER_NO_PRESORT = 4; + + static final int intervalStoreJavaOption = INTERVAL_STORE_NCLIST_OBJECT; + + static final int intervalStoreJSOption = INTERVAL_STORE_NCLIST_BUFFER_PRESORT; /** * Answers the 'length' of the feature, counting 0 for non-positional features @@ -151,59 +235,20 @@ public abstract class FeatureStore implements FeatureStoreI } } - /* - * Non-positional features have no (zero) start/end position. - * Kept as a separate list in case this criterion changes in future. - */ - List nonPositionalFeatures; - - /* - * contact features ordered by first contact position - */ - List contactFeatureStarts; - - /* - * contact features ordered by second contact position - */ - List contactFeatureEnds; - - /* - * IntervalStore holds remaining features and provides efficient - * query for features overlapping any given interval - */ - Collection features; - - /* - * Feature groups represented in stored positional features - * (possibly including null) - */ - Set positionalFeatureGroups; - - /* - * Feature groups represented in stored non-positional features - * (possibly including null) - */ - Set nonPositionalFeatureGroups; - - /* - * the total length of all positional features; contact features count 1 to - * the total and 1 to size(), consistent with an average 'feature length' of 1 + /** + * standard constructor */ - int totalExtent; - - float positionalMinScore; - - float positionalMaxScore; - - float nonPositionalMinScore; - - float nonPositionalMaxScore; + public FeatureStore() + { + this(INTERVAL_STORE_DEFAULT); + } /** - * Constructor + * constructor for testing only */ - public FeatureStore() + public FeatureStore(int intervalStoreType) { + features = getIntervalStore(intervalStoreType); positionalFeatureGroups = new HashSet<>(); nonPositionalFeatureGroups = new HashSet<>(); positionalMinScore = Float.NaN; @@ -214,6 +259,27 @@ public abstract class FeatureStore implements FeatureStoreI // we only construct nonPositionalFeatures, contactFeatures if we need to } + private IntervalStoreI getIntervalStore(int type) + { + switch (type != INTERVAL_STORE_DEFAULT ? type : // + Platform.isJS() // + ? intervalStoreJSOption + : intervalStoreJavaOption) + { + default: + case INTERVAL_STORE_NCLIST_OBJECT: + return new intervalstore.impl.IntervalStore<>(); + case INTERVAL_STORE_NCLIST_BUFFER_PRESORT: + return new intervalstore.nonc.IntervalStore<>(true); + case INTERVAL_STORE_NCLIST_BUFFER_NO_PRESORT: + return new intervalstore.nonc.IntervalStore<>(false); + case INTERVAL_STORE_LINKED_LIST_PRESORT: + return new intervalstore.nonc.IntervalStore0<>(true); + case INTERVAL_STORE_LINKED_LIST_NO_PRESORT: + return new intervalstore.nonc.IntervalStore0<>(false); + } + } + /** * Add a contact feature to the lists that hold them ordered by start (first * contact) and by end (second contact) position, ensuring the lists remain @@ -303,7 +369,6 @@ public abstract class FeatureStore implements FeatureStoreI return false; } positionalFeatureGroups.add(feature.getFeatureGroup()); - // addPositionalFeature(feature); if (feature.begin > lastStart) { lastStart = feature.begin; diff --git a/src/jalview/datamodel/features/FeatureStoreImpl.java b/src/jalview/datamodel/features/FeatureStoreImpl.java index a90755d..63ee678 100644 --- a/src/jalview/datamodel/features/FeatureStoreImpl.java +++ b/src/jalview/datamodel/features/FeatureStoreImpl.java @@ -25,7 +25,6 @@ import jalview.datamodel.SequenceFeature; import java.util.ArrayList; import java.util.List; -import intervalstore.api.IntervalStoreI; import intervalstore.impl.BinarySearcher; /** @@ -39,18 +38,14 @@ import intervalstore.impl.BinarySearcher; public class FeatureStoreImpl extends FeatureStore { - /** - * Default constructor uses NCList - */ public FeatureStoreImpl() { - this(true); + super(); } - public FeatureStoreImpl(boolean useNCList) + public FeatureStoreImpl(int option) { - features = (useNCList ? new intervalstore.impl.IntervalStore<>() - : new intervalstore.nonc.IntervalStore<>(false)); + super(option); } /** @@ -75,7 +70,7 @@ public class FeatureStoreImpl extends FeatureStore * insert into list sorted by start (first contact position): * binary search the sorted list to find the insertion point */ - int insertPosition = findFirstBeginStatic(contactFeatureStarts, + int insertPosition = findFirstBegin(contactFeatureStarts, feature.getBegin()); contactFeatureStarts.add(insertPosition, feature); @@ -83,7 +78,7 @@ public class FeatureStoreImpl extends FeatureStore * insert into list sorted by end (second contact position): * binary search the sorted list to find the insertion point */ - insertPosition = findFirstEndStatic(contactFeatureEnds, + insertPosition = findFirstEnd(contactFeatureEnds, feature.getEnd()); contactFeatureEnds.add(insertPosition, feature); @@ -143,7 +138,7 @@ public class FeatureStoreImpl extends FeatureStore * find the first contact feature (if any) * whose end point is not before the target range */ - int index = findFirstEndStatic(contactFeatureEnds, from); + int index = findFirstEnd(contactFeatureEnds, from); int n = contactFeatureEnds.size(); while (index < n) @@ -253,46 +248,21 @@ public class FeatureStoreImpl extends FeatureStore private void findOverlaps(long start, long end, List result) { - result.addAll(((IntervalStoreI) features) + result.addAll(features .findOverlaps(start, end)); } @Override protected int findFirstBegin(List list, long pos) { - return findFirstBeginStatic(list, pos); - } - - /** - * Possibly a bit faster using a static method. - * - * @param list - * @param pos - * @return - */ - private static int findFirstBeginStatic(List list, - long pos) - { - return BinarySearcher.findFirst(list, f -> f.getBegin() >= pos); + return BinarySearcher.findFirst(list, (int) pos, + BinarySearcher.fbegin); } @Override protected int findFirstEnd(List list, long pos) { - return findFirstEndStatic(list, pos); - } - - /** - * Possibly a bit faster using a static method. - * - * @param list - * @param pos - * @return - */ - private static int findFirstEndStatic(List list, - long pos) - { - return BinarySearcher.findFirst(list, f -> f.getEnd() >= pos); + return BinarySearcher.findFirst(list, (int) pos, BinarySearcher.fend); } @Override diff --git a/src/jalview/datamodel/features/FeatureStoreJS.java b/src/jalview/datamodel/features/FeatureStoreJS.java index 4f49360..05adeb1 100644 --- a/src/jalview/datamodel/features/FeatureStoreJS.java +++ b/src/jalview/datamodel/features/FeatureStoreJS.java @@ -25,8 +25,6 @@ import jalview.datamodel.SequenceFeature; import java.util.ArrayList; import java.util.List; -import intervalstore.nonc.IntervalStore; - /** * An adaption of FeatureStore that is efficient and lightweight, accelerating * processing speed in JavaScript. @@ -49,15 +47,16 @@ import intervalstore.nonc.IntervalStore; */ public class FeatureStoreJS extends FeatureStore { - private IntervalStore featureStore; + public FeatureStoreJS() { - // the only reference to features field in this class -- for the superclass - - // linked-list no-NCList IntervalStore with presort + super(); + } - features = featureStore = new IntervalStore<>(true); + public FeatureStoreJS(int option) + { + super(option); } /** @@ -84,7 +83,6 @@ public class FeatureStoreJS extends FeatureStore return true; } - /** * Add a feature to the IntervalStore, not allowing for duplicates. * @@ -95,7 +93,7 @@ public class FeatureStoreJS extends FeatureStore protected synchronized boolean addPositionalFeature( SequenceFeature feature) { - return featureStore.add(feature, false); + return features.add(feature, false); } /** @@ -126,13 +124,13 @@ public class FeatureStoreJS extends FeatureStore @Override protected boolean containsFeature(SequenceFeature feature) { - return featureStore.contains(feature); + return features.contains(feature); } @Override protected boolean findAndRemoveNonContactFeature(SequenceFeature sf) { - return featureStore.remove(sf); + return features.remove(sf); } /** @@ -252,9 +250,9 @@ public class FeatureStoreJS extends FeatureStore findContactFeatures(start, end, result); } } - if (featureStore.size() > 0) + if (features.size() > 0) { - featureStore.findOverlaps(start, end, result); + features.findOverlaps(start, end, result); } return result; } diff --git a/src/jalview/datamodel/features/SequenceFeatures.java b/src/jalview/datamodel/features/SequenceFeatures.java index cb2b8cc..6c83013 100644 --- a/src/jalview/datamodel/features/SequenceFeatures.java +++ b/src/jalview/datamodel/features/SequenceFeatures.java @@ -54,32 +54,6 @@ public class SequenceFeatures implements SequenceFeaturesI private Map featureStore; /** - * original NCList-based IntervalStore - */ - private final static int INTERVAL_STORE_NCLIST = 0; - - /** - * linked-list deferred-sort IntervalStore - experimental only; unused - */ - private final static int INTERVAL_STORE_LINKED_LIST_NO_PRESORT = 1; - - /** - * linked-list IntervalStore option for JavaScript - */ - private final static int INTERVAL_STORE_LINKED_LIST = -1; - - /** - * mode for Java or JavaScript; can be set differently for testing, but - * default is LINKED_LIST for JalviewJS and NCLIST for Java - */ - private final int INTERVAL_STORE_MODE = ( - // true || // - Platform.isJS() ? // - INTERVAL_STORE_LINKED_LIST // - : INTERVAL_STORE_NCLIST// - ); - - /** * Constructor */ public SequenceFeatures() @@ -130,16 +104,10 @@ public class SequenceFeatures implements SequenceFeaturesI private FeatureStoreI newFeatureStore() { - switch (INTERVAL_STORE_MODE) - { - default: - case INTERVAL_STORE_NCLIST: - return new FeatureStoreImpl(true); - case INTERVAL_STORE_LINKED_LIST_NO_PRESORT: - return new FeatureStoreImpl(false); - case INTERVAL_STORE_LINKED_LIST: - return new FeatureStoreJS(); - } + return (// + Platform.isJS()// + ? new FeatureStoreJS() + : new FeatureStoreImpl()); } /** diff --git a/src/jalview/urls/IdentifiersUrlProvider.java b/src/jalview/urls/IdentifiersUrlProvider.java index a44b9b9..8744958 100644 --- a/src/jalview/urls/IdentifiersUrlProvider.java +++ b/src/jalview/urls/IdentifiersUrlProvider.java @@ -85,7 +85,7 @@ private HashMap readIdentifiers(String idFileName) { // NOTE: THIS WILL FAIL IN SWINGJS BECAUSE IT INVOLVES A FILE READER - System.out.println("IDentifiersURL " + idFileName); + System.out.println("IDentifiersURLProvider.idFileName=" + idFileName); FileReader reader = new FileReader(idFileName); String key = ""; Map obj = (Map) JSONUtils.parse(reader); diff --git a/test/jalview/datamodel/features/FeatureStoreJSTest.java b/test/jalview/datamodel/features/FeatureStoreJSTest.java index 2b0c9ea..ac80298 100644 --- a/test/jalview/datamodel/features/FeatureStoreJSTest.java +++ b/test/jalview/datamodel/features/FeatureStoreJSTest.java @@ -16,6 +16,8 @@ import org.testng.annotations.Test; public class FeatureStoreJSTest { + private int intervalStoreOption = FeatureStore.intervalStoreJSOption; + @Test(groups = "Functional") public void testFindFeatures_nonNested() { @@ -38,10 +40,9 @@ public class FeatureStoreJSTest overlaps = fs.findOverlappingFeatures(12, 16); assertEquals(overlaps.size(), 3); - // BH note, this is reversed from IS-NCList 2 1 0, not 0 1 2 - assertEquals(overlaps.get(2).getEnd(), 20); + assertEquals(overlaps.get(2).getEnd(), 25); assertEquals(overlaps.get(1).getEnd(), 20); - assertEquals(overlaps.get(0).getEnd(), 25); + assertEquals(overlaps.get(0).getEnd(), 20); overlaps = fs.findOverlappingFeatures(33, 33); assertEquals(overlaps.size(), 1); @@ -50,8 +51,7 @@ public class FeatureStoreJSTest private FeatureStoreI newFeatureStore() { - return new FeatureStoreJS(); - // return new FeatureStoreImpl(); + return new FeatureStoreJS(intervalStoreOption); } @Test(groups = "Functional") @@ -93,9 +93,41 @@ public class FeatureStoreJSTest assertTrue(overlaps.contains(sf4)); } + private void testFind() + { + FeatureStoreI fs1 = newFeatureStore(); + + SequenceFeature sf = addFeature(fs1, 1, 3000); + + for (int i = 1; i < 1000; i++) + + { + + addFeature(fs1, 1 + i, 1000 + i); + + } + + // 1.......3000 + // 2....1001 + // 3....1002 + // 4....1003 + // ... + // 1000..1999 + + List overlaps1 = fs1.findOverlappingFeatures(2000, + 2001); + + assertEquals(overlaps1.size(), 1); + + assertTrue(overlaps1.contains(sf)); + + } + @Test(groups = "Functional") public void testFindFeatures_mixed() { + testFind(); + FeatureStoreI fs = newFeatureStore(); SequenceFeature sf1 = addFeature(fs, 10, 50); SequenceFeature sf2 = addFeature(fs, 1, 15); diff --git a/test/jalview/datamodel/features/FeatureStoreTest.java b/test/jalview/datamodel/features/FeatureStoreJavaTest.java similarity index 99% rename from test/jalview/datamodel/features/FeatureStoreTest.java rename to test/jalview/datamodel/features/FeatureStoreJavaTest.java index a1593eb..5d2ca3d 100644 --- a/test/jalview/datamodel/features/FeatureStoreTest.java +++ b/test/jalview/datamodel/features/FeatureStoreJavaTest.java @@ -13,9 +13,16 @@ import java.util.Set; import org.testng.annotations.Test; -public class FeatureStoreTest +public class FeatureStoreJavaTest { + private int intervalStoreOption = FeatureStore.intervalStoreJavaOption; + + private FeatureStoreI newFeatureStore() + { + return new FeatureStoreImpl(intervalStoreOption); + } + @Test(groups = "Functional") public void testFindFeatures_nonNested() { @@ -46,12 +53,6 @@ public class FeatureStoreTest assertEquals(overlaps.get(0).getEnd(), 35); } - private FeatureStoreI newFeatureStore() - { - // return new FeatureStoreJS(); - return new FeatureStoreImpl(); - } - @Test(groups = "Functional") public void testFindFeatures_nested() { diff --git a/test/jalview/datamodel/features/FeatureStoreNoNCTest.java b/test/jalview/datamodel/features/FeatureStoreLinkedTest.java similarity index 99% rename from test/jalview/datamodel/features/FeatureStoreNoNCTest.java rename to test/jalview/datamodel/features/FeatureStoreLinkedTest.java index 900a7ca..25fdbb2 100644 --- a/test/jalview/datamodel/features/FeatureStoreNoNCTest.java +++ b/test/jalview/datamodel/features/FeatureStoreLinkedTest.java @@ -13,12 +13,13 @@ import java.util.Set; import org.testng.annotations.Test; -public class FeatureStoreNoNCTest +public class FeatureStoreLinkedTest { private FeatureStoreI newFeatureStore() { - return new FeatureStoreImpl(false); + return new FeatureStoreImpl( + FeatureStore.INTERVAL_STORE_LINKED_LIST_PRESORT); } @Test(groups = "Functional") diff --git a/test/jalview/datamodel/features/FeatureStoreNCListBufferTest.java b/test/jalview/datamodel/features/FeatureStoreNCListBufferTest.java new file mode 100644 index 0000000..2b9198a --- /dev/null +++ b/test/jalview/datamodel/features/FeatureStoreNCListBufferTest.java @@ -0,0 +1,899 @@ +package jalview.datamodel.features; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; + +import jalview.datamodel.SequenceFeature; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.testng.annotations.Test; + +public class FeatureStoreNCListBufferTest +{ + + private FeatureStoreI newFeatureStore() + { + return new FeatureStoreImpl( + FeatureStore.INTERVAL_STORE_NCLIST_BUFFER_PRESORT); + } + + @Test(groups = "Functional") + public void testFindFeatures_nonNested() + { + FeatureStoreI fs = newFeatureStore(); + fs.addFeature(new SequenceFeature("", "", 10, 20, Float.NaN, + null)); + // same range different description + fs.addFeature(new SequenceFeature("", "desc", 10, 20, Float.NaN, null)); + fs.addFeature(new SequenceFeature("", "", 15, 25, Float.NaN, null)); + fs.addFeature(new SequenceFeature("", "", 20, 35, Float.NaN, null)); + + List overlaps = fs.findOverlappingFeatures(1, 9); + assertTrue(overlaps.isEmpty()); + + overlaps = fs.findOverlappingFeatures(8, 10); + assertEquals(overlaps.size(), 2); + assertEquals(overlaps.get(0).getEnd(), 20); + assertEquals(overlaps.get(1).getEnd(), 20); + + overlaps = fs.findOverlappingFeatures(12, 16); + assertEquals(overlaps.size(), 3); + assertEquals(overlaps.get(0).getEnd(), 20); + assertEquals(overlaps.get(1).getEnd(), 20); + assertEquals(overlaps.get(2).getEnd(), 25); + + overlaps = fs.findOverlappingFeatures(33, 33); + assertEquals(overlaps.size(), 1); + assertEquals(overlaps.get(0).getEnd(), 35); + } + + @Test(groups = "Functional") + public void testFindFeatures_nested() + { + FeatureStoreI fs = newFeatureStore(); + SequenceFeature sf1 = addFeature(fs, 10, 50); + SequenceFeature sf2 = addFeature(fs, 10, 40); + SequenceFeature sf3 = addFeature(fs, 20, 30); + // fudge feature at same location but different group (so is added) + SequenceFeature sf4 = new SequenceFeature("", "", 20, 30, Float.NaN, + "different group"); + fs.addFeature(sf4); + SequenceFeature sf5 = addFeature(fs, 35, 36); + + List overlaps = fs.findOverlappingFeatures(1, 9); + assertTrue(overlaps.isEmpty()); + + overlaps = fs.findOverlappingFeatures(10, 15); + assertEquals(overlaps.size(), 2); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + + overlaps = fs.findOverlappingFeatures(45, 60); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf1)); + + overlaps = fs.findOverlappingFeatures(32, 38); + assertEquals(overlaps.size(), 3); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + assertTrue(overlaps.contains(sf5)); + + overlaps = fs.findOverlappingFeatures(15, 25); + assertEquals(overlaps.size(), 4); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + assertTrue(overlaps.contains(sf3)); + assertTrue(overlaps.contains(sf4)); + } + + @Test(groups = "Functional") + public void testFindFeatures_mixed() + { + FeatureStoreI fs = newFeatureStore(); + SequenceFeature sf1 = addFeature(fs, 10, 50); + SequenceFeature sf2 = addFeature(fs, 1, 15); + SequenceFeature sf3 = addFeature(fs, 20, 30); + SequenceFeature sf4 = addFeature(fs, 40, 100); + SequenceFeature sf5 = addFeature(fs, 60, 100); + SequenceFeature sf6 = addFeature(fs, 70, 70); + + List overlaps = fs.findOverlappingFeatures(200, 200); + assertTrue(overlaps.isEmpty()); + + overlaps = fs.findOverlappingFeatures(1, 9); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf2)); + + overlaps = fs.findOverlappingFeatures(5, 18); + assertEquals(overlaps.size(), 2); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf2)); + + overlaps = fs.findOverlappingFeatures(30, 40); + assertEquals(overlaps.size(), 3); + assertTrue(overlaps.contains(sf1)); + assertTrue(overlaps.contains(sf3)); + assertTrue(overlaps.contains(sf4)); + + overlaps = fs.findOverlappingFeatures(80, 90); + assertEquals(overlaps.size(), 2); + assertTrue(overlaps.contains(sf4)); + assertTrue(overlaps.contains(sf5)); + + overlaps = fs.findOverlappingFeatures(68, 70); + assertEquals(overlaps.size(), 3); + assertTrue(overlaps.contains(sf4)); + assertTrue(overlaps.contains(sf5)); + assertTrue(overlaps.contains(sf6)); + } + + /** + * Helper method to add a feature of no particular type + * + * @param fs + * @param from + * @param to + * @return + */ + SequenceFeature addFeature(FeatureStoreI fs, int from, int to) + { + SequenceFeature sf1 = new SequenceFeature("", "", from, to, Float.NaN, + null); + fs.addFeature(sf1); + return sf1; + } + + @Test(groups = "Functional") + public void testFindFeatures_contactFeatures() + { + FeatureStoreI fs = newFeatureStore(); + + SequenceFeature sf = new SequenceFeature("disulphide bond", "bond", 10, + 20, Float.NaN, null); + fs.addFeature(sf); + + /* + * neither contact point in range + */ + List overlaps = fs.findOverlappingFeatures(1, 9); + assertTrue(overlaps.isEmpty()); + + /* + * neither contact point in range + */ + overlaps = fs.findOverlappingFeatures(11, 19); + assertTrue(overlaps.isEmpty()); + + /* + * first contact point in range + */ + overlaps = fs.findOverlappingFeatures(5, 15); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf)); + + /* + * second contact point in range + */ + overlaps = fs.findOverlappingFeatures(15, 25); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf)); + + /* + * both contact points in range + */ + overlaps = fs.findOverlappingFeatures(5, 25); + assertEquals(overlaps.size(), 1); + assertTrue(overlaps.contains(sf)); + } + + @Test(groups = "Functional") + public void testGetPositionalFeatures() + { + FeatureStoreI store = newFeatureStore(); + SequenceFeature sf1 = new SequenceFeature("Metal", "desc", 10, 20, + Float.NaN, null); + store.addFeature(sf1); + // same range, different description + SequenceFeature sf2 = new SequenceFeature("Metal", "desc2", 10, 20, + Float.NaN, null); + store.addFeature(sf2); + // discontiguous range + SequenceFeature sf3 = new SequenceFeature("Metal", "desc", 30, 40, + Float.NaN, null); + store.addFeature(sf3); + // overlapping range + SequenceFeature sf4 = new SequenceFeature("Metal", "desc", 15, 35, + Float.NaN, null); + store.addFeature(sf4); + // enclosing range + SequenceFeature sf5 = new SequenceFeature("Metal", "desc", 5, 50, + Float.NaN, null); + store.addFeature(sf5); + // non-positional feature + SequenceFeature sf6 = new SequenceFeature("Metal", "desc", 0, 0, + Float.NaN, null); + store.addFeature(sf6); + // contact feature + SequenceFeature sf7 = new SequenceFeature("Disulphide bond", "desc", + 18, 45, Float.NaN, null); + store.addFeature(sf7); + + List features = store.getPositionalFeatures(); + assertEquals(features.size(), 6); + assertTrue(features.contains(sf1)); + assertTrue(features.contains(sf2)); + assertTrue(features.contains(sf3)); + assertTrue(features.contains(sf4)); + assertTrue(features.contains(sf5)); + assertFalse(features.contains(sf6)); + assertTrue(features.contains(sf7)); + + features = store.getNonPositionalFeatures(); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf6)); + } + + @Test(groups = "Functional") + public void testDelete() + { + FeatureStoreI store = newFeatureStore(); + SequenceFeature sf1 = addFeature(store, 10, 20); + assertTrue(store.getPositionalFeatures().contains(sf1)); + + /* + * simple deletion + */ + assertTrue(store.delete(sf1)); + assertTrue(store.getPositionalFeatures().isEmpty()); + + /* + * non-positional feature deletion + */ + SequenceFeature sf2 = addFeature(store, 0, 0); + assertFalse(store.getPositionalFeatures().contains(sf2)); + assertTrue(store.getNonPositionalFeatures().contains(sf2)); + assertTrue(store.delete(sf2)); + assertTrue(store.getNonPositionalFeatures().isEmpty()); + + /* + * contact feature deletion + */ + SequenceFeature sf3 = new SequenceFeature("", "Disulphide Bond", 11, + 23, Float.NaN, null); + store.addFeature(sf3); + assertEquals(store.getPositionalFeatures().size(), 1); + assertTrue(store.getPositionalFeatures().contains(sf3)); + assertTrue(store.delete(sf3)); + assertTrue(store.getPositionalFeatures().isEmpty()); + + /* + * nested feature deletion + */ + SequenceFeature sf4 = addFeature(store, 20, 30); + SequenceFeature sf5 = addFeature(store, 22, 26); // to NCList + SequenceFeature sf6 = addFeature(store, 23, 24); // child of sf5 + SequenceFeature sf7 = addFeature(store, 25, 25); // sibling of sf6 + SequenceFeature sf8 = addFeature(store, 24, 24); // child of sf6 + SequenceFeature sf9 = addFeature(store, 23, 23); // child of sf6 + + // SequenceFeature sf4 = addFeature(store, 20, 30); + //// SequenceFeature sf5 = addFeature(store, 22, 26); + ////// SequenceFeature sf6 = addFeature(store, 23, 24); // child of sf5 + //////// SequenceFeature sf9 = addFeature(store, 23, 23); // child of sf6 + //////// SequenceFeature sf8 = addFeature(store, 24, 24); // child of sf6 + ////// SequenceFeature sf7 = addFeature(store, 25, 25); // child of sf5 + // + assertEquals(store.getPositionalFeatures().size(), 6); + + // delete a node with children - they take its place + assertTrue(store.delete(sf6)); // sf8, sf9 should become children of sf5 + assertEquals(store.getPositionalFeatures().size(), 5); + assertFalse(store.getPositionalFeatures().contains(sf6)); + + // delete a node with no children + assertTrue(store.delete(sf7)); + assertEquals(store.getPositionalFeatures().size(), 4); + assertFalse(store.getPositionalFeatures().contains(sf7)); + + // delete root of NCList + assertTrue(store.delete(sf5)); + assertEquals(store.getPositionalFeatures().size(), 3); + assertFalse(store.getPositionalFeatures().contains(sf5)); + + // continue the killing fields + assertTrue(store.delete(sf4)); + assertEquals(store.getPositionalFeatures().size(), 2); + assertFalse(store.getPositionalFeatures().contains(sf4)); + + assertTrue(store.delete(sf9)); + assertEquals(store.getPositionalFeatures().size(), 1); + assertFalse(store.getPositionalFeatures().contains(sf9)); + + assertTrue(store.delete(sf8)); + assertTrue(store.getPositionalFeatures().isEmpty()); + } + + @Test(groups = "Functional") + public void testAddFeature() + { + FeatureStoreI fs = newFeatureStore(); + + SequenceFeature sf1 = new SequenceFeature("Cath", "", 10, 20, + Float.NaN, null); + SequenceFeature sf2 = new SequenceFeature("Cath", "", 10, 20, + Float.NaN, null); + + assertTrue(fs.addFeature(sf1)); + assertEquals(fs.getFeatureCount(true), 1); // positional + assertEquals(fs.getFeatureCount(false), 0); // non-positional + + /* + * re-adding the same or an identical feature should fail + */ + assertFalse(fs.addFeature(sf1)); + assertEquals(fs.getFeatureCount(true), 1); + assertFalse(fs.addFeature(sf2)); + assertEquals(fs.getFeatureCount(true), 1); + + /* + * add non-positional + */ + SequenceFeature sf3 = new SequenceFeature("Cath", "", 0, 0, Float.NaN, + null); + assertTrue(fs.addFeature(sf3)); + assertEquals(fs.getFeatureCount(true), 1); // positional + assertEquals(fs.getFeatureCount(false), 1); // non-positional + SequenceFeature sf4 = new SequenceFeature("Cath", "", 0, 0, Float.NaN, + null); + assertFalse(fs.addFeature(sf4)); // already stored + assertEquals(fs.getFeatureCount(true), 1); // positional + assertEquals(fs.getFeatureCount(false), 1); // non-positional + + /* + * add contact + */ + SequenceFeature sf5 = new SequenceFeature("Disulfide bond", "", 10, 20, + Float.NaN, null); + assertTrue(fs.addFeature(sf5)); + assertEquals(fs.getFeatureCount(true), 2); // positional - add 1 for contact + assertEquals(fs.getFeatureCount(false), 1); // non-positional + SequenceFeature sf6 = new SequenceFeature("Disulfide bond", "", 10, 20, + Float.NaN, null); + assertFalse(fs.addFeature(sf6)); // already stored + assertEquals(fs.getFeatureCount(true), 2); // no change + assertEquals(fs.getFeatureCount(false), 1); // no change + } + + @Test(groups = "Functional") + public void testIsEmpty() + { + FeatureStoreI fs = newFeatureStore(); + assertTrue(fs.isEmpty()); + assertEquals(fs.getFeatureCount(true), 0); + + /* + * non-nested feature + */ + SequenceFeature sf1 = new SequenceFeature("Cath", "", 10, 20, + Float.NaN, null); + fs.addFeature(sf1); + assertFalse(fs.isEmpty()); + assertEquals(fs.getFeatureCount(true), 1); + fs.delete(sf1); + assertTrue(fs.isEmpty()); + assertEquals(fs.getFeatureCount(true), 0); + + /* + * non-positional feature + */ + sf1 = new SequenceFeature("Cath", "", 0, 0, Float.NaN, null); + fs.addFeature(sf1); + assertFalse(fs.isEmpty()); + assertEquals(fs.getFeatureCount(false), 1); // non-positional + assertEquals(fs.getFeatureCount(true), 0); // positional + fs.delete(sf1); + assertTrue(fs.isEmpty()); + assertEquals(fs.getFeatureCount(false), 0); + + /* + * contact feature + */ + sf1 = new SequenceFeature("Disulfide bond", "", 19, 49, Float.NaN, null); + fs.addFeature(sf1); + assertFalse(fs.isEmpty()); + assertEquals(fs.getFeatureCount(true), 1); + fs.delete(sf1); + assertTrue(fs.isEmpty()); + assertEquals(fs.getFeatureCount(true), 0); + + /* + * sf2, sf3 added as nested features + */ + sf1 = new SequenceFeature("Cath", "", 19, 49, Float.NaN, null); + SequenceFeature sf2 = new SequenceFeature("Cath", "", 20, 40, + Float.NaN, null); + SequenceFeature sf3 = new SequenceFeature("Cath", "", 25, 35, + Float.NaN, null); + fs.addFeature(sf1); + fs.addFeature(sf2); + fs.addFeature(sf3); + assertEquals(fs.getFeatureCount(true), 3); + assertTrue(fs.delete(sf1)); + assertEquals(fs.getFeatureCount(true), 2); + assertEquals(fs.getFeatures().size(), 2); + assertFalse(fs.isEmpty()); + assertTrue(fs.delete(sf2)); + assertEquals(fs.getFeatureCount(true), 1); + assertFalse(fs.isEmpty()); + assertTrue(fs.delete(sf3)); + assertEquals(fs.getFeatureCount(true), 0); + assertTrue(fs.isEmpty()); // all gone + } + + @Test(groups = "Functional") + public void testGetFeatureGroups() + { + FeatureStoreI fs = newFeatureStore(); + assertTrue(fs.getFeatureGroups(true).isEmpty()); + assertTrue(fs.getFeatureGroups(false).isEmpty()); + + SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 10, 20, 1f, "group1"); + fs.addFeature(sf1); + Set groups = fs.getFeatureGroups(true); + assertEquals(groups.size(), 1); + assertTrue(groups.contains("group1")); + + /* + * add another feature of the same group, delete one, delete both + */ + SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 20, 30, 1f, "group1"); + fs.addFeature(sf2); + groups = fs.getFeatureGroups(true); + assertEquals(groups.size(), 1); + assertTrue(groups.contains("group1")); + fs.delete(sf2); + groups = fs.getFeatureGroups(true); + assertEquals(groups.size(), 1); + assertTrue(groups.contains("group1")); + fs.delete(sf1); + groups = fs.getFeatureGroups(true); + assertTrue(fs.getFeatureGroups(true).isEmpty()); + + SequenceFeature sf3 = new SequenceFeature("Cath", "desc", 20, 30, 1f, "group2"); + fs.addFeature(sf3); + SequenceFeature sf4 = new SequenceFeature("Cath", "desc", 20, 30, 1f, "Group2"); + fs.addFeature(sf4); + SequenceFeature sf5 = new SequenceFeature("Cath", "desc", 20, 30, 1f, null); + fs.addFeature(sf5); + groups = fs.getFeatureGroups(true); + assertEquals(groups.size(), 3); + assertTrue(groups.contains("group2")); + assertTrue(groups.contains("Group2")); // case sensitive + assertTrue(groups.contains(null)); // null allowed + assertTrue(fs.getFeatureGroups(false).isEmpty()); // non-positional + + fs.delete(sf3); + groups = fs.getFeatureGroups(true); + assertEquals(groups.size(), 2); + assertFalse(groups.contains("group2")); + fs.delete(sf4); + groups = fs.getFeatureGroups(true); + assertEquals(groups.size(), 1); + assertFalse(groups.contains("Group2")); + fs.delete(sf5); + groups = fs.getFeatureGroups(true); + assertTrue(groups.isEmpty()); + + /* + * add non-positional feature + */ + SequenceFeature sf6 = new SequenceFeature("Cath", "desc", 0, 0, 1f, + "CathGroup"); + fs.addFeature(sf6); + groups = fs.getFeatureGroups(false); + assertEquals(groups.size(), 1); + assertTrue(groups.contains("CathGroup")); + assertTrue(fs.delete(sf6)); + assertTrue(fs.getFeatureGroups(false).isEmpty()); + } + + @Test(groups = "Functional") + public void testGetTotalFeatureLength() + { + FeatureStoreI fs = newFeatureStore(); + assertEquals(fs.getTotalFeatureLength(), 0); + + addFeature(fs, 10, 20); // 11 + assertEquals(fs.getTotalFeatureLength(), 11); + addFeature(fs, 17, 37); // 21 + SequenceFeature sf1 = addFeature(fs, 14, 74); // 61 + assertEquals(fs.getTotalFeatureLength(), 93); + + // non-positional features don't count + SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 0, 0, 1f, + "group1"); + fs.addFeature(sf2); + assertEquals(fs.getTotalFeatureLength(), 93); + + // contact features count 1 + SequenceFeature sf3 = new SequenceFeature("disulphide bond", "desc", + 15, 35, 1f, "group1"); + fs.addFeature(sf3); + assertEquals(fs.getTotalFeatureLength(), 94); + + assertTrue(fs.delete(sf1)); + assertEquals(fs.getTotalFeatureLength(), 33); + assertFalse(fs.delete(sf1)); + assertEquals(fs.getTotalFeatureLength(), 33); + assertTrue(fs.delete(sf2)); + assertEquals(fs.getTotalFeatureLength(), 33); + assertTrue(fs.delete(sf3)); + assertEquals(fs.getTotalFeatureLength(), 32); + } + + @Test(groups = "Functional") + public void testGetFeatureLength() + { + /* + * positional feature + */ + SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 10, 20, 1f, "group1"); + assertEquals(FeatureStore.getFeatureLength(sf1), 11); + + /* + * non-positional feature + */ + SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 0, 0, 1f, + "CathGroup"); + assertEquals(FeatureStore.getFeatureLength(sf2), 0); + + /* + * contact feature counts 1 + */ + SequenceFeature sf3 = new SequenceFeature("Disulphide Bond", "desc", + 14, 28, 1f, "AGroup"); + assertEquals(FeatureStore.getFeatureLength(sf3), 1); + } + + @Test(groups = "Functional") + public void testMin() + { + assertEquals(FeatureStore.min(Float.NaN, Float.NaN), Float.NaN); + assertEquals(FeatureStore.min(Float.NaN, 2f), 2f); + assertEquals(FeatureStore.min(-2f, Float.NaN), -2f); + assertEquals(FeatureStore.min(2f, -3f), -3f); + } + + @Test(groups = "Functional") + public void testMax() + { + assertEquals(FeatureStore.max(Float.NaN, Float.NaN), Float.NaN); + assertEquals(FeatureStore.max(Float.NaN, 2f), 2f); + assertEquals(FeatureStore.max(-2f, Float.NaN), -2f); + assertEquals(FeatureStore.max(2f, -3f), 2f); + } + + @Test(groups = "Functional") + public void testGetMinimumScore_getMaximumScore() + { + FeatureStoreI fs = newFeatureStore(); + assertEquals(fs.getMinimumScore(true), Float.NaN); // positional + assertEquals(fs.getMaximumScore(true), Float.NaN); + assertEquals(fs.getMinimumScore(false), Float.NaN); // non-positional + assertEquals(fs.getMaximumScore(false), Float.NaN); + + // add features with no score + SequenceFeature sf1 = new SequenceFeature("type", "desc", 0, 0, + Float.NaN, "group"); + fs.addFeature(sf1); + SequenceFeature sf2 = new SequenceFeature("type", "desc", 10, 20, + Float.NaN, "group"); + fs.addFeature(sf2); + assertEquals(fs.getMinimumScore(true), Float.NaN); + assertEquals(fs.getMaximumScore(true), Float.NaN); + assertEquals(fs.getMinimumScore(false), Float.NaN); + assertEquals(fs.getMaximumScore(false), Float.NaN); + + // add positional features with score + SequenceFeature sf3 = new SequenceFeature("type", "desc", 10, 20, 1f, + "group"); + fs.addFeature(sf3); + SequenceFeature sf4 = new SequenceFeature("type", "desc", 12, 16, 4f, + "group"); + fs.addFeature(sf4); + assertEquals(fs.getMinimumScore(true), 1f); + assertEquals(fs.getMaximumScore(true), 4f); + assertEquals(fs.getMinimumScore(false), Float.NaN); + assertEquals(fs.getMaximumScore(false), Float.NaN); + + // add non-positional features with score + SequenceFeature sf5 = new SequenceFeature("type", "desc", 0, 0, 11f, + "group"); + fs.addFeature(sf5); + SequenceFeature sf6 = new SequenceFeature("type", "desc", 0, 0, -7f, + "group"); + fs.addFeature(sf6); + assertEquals(fs.getMinimumScore(true), 1f); + assertEquals(fs.getMaximumScore(true), 4f); + assertEquals(fs.getMinimumScore(false), -7f); + assertEquals(fs.getMaximumScore(false), 11f); + + // delete one positional and one non-positional + // min-max should be recomputed + assertTrue(fs.delete(sf6)); + assertTrue(fs.delete(sf3)); + assertEquals(fs.getMinimumScore(true), 4f); + assertEquals(fs.getMaximumScore(true), 4f); + assertEquals(fs.getMinimumScore(false), 11f); + assertEquals(fs.getMaximumScore(false), 11f); + + // delete remaining features with score + assertTrue(fs.delete(sf4)); + assertTrue(fs.delete(sf5)); + assertEquals(fs.getMinimumScore(true), Float.NaN); + assertEquals(fs.getMaximumScore(true), Float.NaN); + assertEquals(fs.getMinimumScore(false), Float.NaN); + assertEquals(fs.getMaximumScore(false), Float.NaN); + + // delete all features + assertTrue(fs.delete(sf1)); + assertTrue(fs.delete(sf2)); + assertTrue(fs.isEmpty()); + assertEquals(fs.getMinimumScore(true), Float.NaN); + assertEquals(fs.getMaximumScore(true), Float.NaN); + assertEquals(fs.getMinimumScore(false), Float.NaN); + assertEquals(fs.getMaximumScore(false), Float.NaN); + } + + @Test(groups = "Functional") + public void testListContains() + { + FeatureStoreI featureStore = newFeatureStore(); + assertFalse(featureStore.listContains(null, null)); + List features = new ArrayList<>(); + assertFalse(featureStore.listContains(features, null)); + + SequenceFeature sf1 = new SequenceFeature("type1", "desc1", 20, 30, 3f, + "group1"); + assertFalse(featureStore.listContains(null, sf1)); + assertFalse(featureStore.listContains(features, sf1)); + + features.add(sf1); + SequenceFeature sf2 = new SequenceFeature("type1", "desc1", 20, 30, 3f, + "group1"); + SequenceFeature sf3 = new SequenceFeature("type1", "desc1", 20, 40, 3f, + "group1"); + + // sf2.equals(sf1) so contains should return true + assertTrue(featureStore.listContains(features, sf2)); + assertFalse(featureStore.listContains(features, sf3)); + } + + @Test(groups = "Functional") + public void testGetFeaturesForGroup() + { + FeatureStoreI fs = newFeatureStore(); + + /* + * with no features + */ + assertTrue(fs.getFeaturesForGroup(true, null).isEmpty()); + assertTrue(fs.getFeaturesForGroup(false, null).isEmpty()); + assertTrue(fs.getFeaturesForGroup(true, "uniprot").isEmpty()); + assertTrue(fs.getFeaturesForGroup(false, "uniprot").isEmpty()); + + /* + * sf1: positional feature in the null group + */ + SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 4, 10, 0f, + null); + fs.addFeature(sf1); + assertTrue(fs.getFeaturesForGroup(true, "uniprot").isEmpty()); + assertTrue(fs.getFeaturesForGroup(false, "uniprot").isEmpty()); + assertTrue(fs.getFeaturesForGroup(false, null).isEmpty()); + List features = fs.getFeaturesForGroup(true, null); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf1)); + + /* + * sf2: non-positional feature in the null group + * sf3: positional feature in a non-null group + * sf4: non-positional feature in a non-null group + */ + SequenceFeature sf2 = new SequenceFeature("Pfam", "desc", 0, 0, 0f, + null); + SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 4, 10, 0f, + "Uniprot"); + SequenceFeature sf4 = new SequenceFeature("Pfam", "desc", 0, 0, 0f, + "Rfam"); + fs.addFeature(sf2); + fs.addFeature(sf3); + fs.addFeature(sf4); + + features = fs.getFeaturesForGroup(true, null); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf1)); + + features = fs.getFeaturesForGroup(false, null); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf2)); + + features = fs.getFeaturesForGroup(true, "Uniprot"); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf3)); + + features = fs.getFeaturesForGroup(false, "Rfam"); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf4)); + } + + @Test(groups = "Functional") + public void testShiftFeatures() + { + FeatureStoreI fs = newFeatureStore(); + assertFalse(fs.shiftFeatures(0, 1)); // nothing to do + + SequenceFeature sf1 = new SequenceFeature("Cath", "", 2, 5, 0f, null); + fs.addFeature(sf1); + // nested feature: + SequenceFeature sf2 = new SequenceFeature("Cath", "", 8, 14, 0f, null); + fs.addFeature(sf2); + // contact feature: + SequenceFeature sf3 = new SequenceFeature("Disulfide bond", "", 23, 32, + 0f, null); + fs.addFeature(sf3); + // non-positional feature: + SequenceFeature sf4 = new SequenceFeature("Cath", "", 0, 0, 0f, null); + fs.addFeature(sf4); + + /* + * shift all features right by 5 + */ + assertTrue(fs.shiftFeatures(0, 5)); + + // non-positional features untouched: + List nonPos = fs.getNonPositionalFeatures(); + assertEquals(nonPos.size(), 1); + assertTrue(nonPos.contains(sf4)); + + // positional features are replaced + List pos = fs.getPositionalFeatures(); + assertEquals(pos.size(), 3); + assertFalse(pos.contains(sf1)); + assertFalse(pos.contains(sf2)); + assertFalse(pos.contains(sf3)); + SequenceFeatures.sortFeatures(pos, true); // ascending start pos + assertEquals(pos.get(0).getBegin(), 7); + assertEquals(pos.get(0).getEnd(), 10); + assertEquals(pos.get(1).getBegin(), 13); + assertEquals(pos.get(1).getEnd(), 19); + assertEquals(pos.get(2).getBegin(), 28); + assertEquals(pos.get(2).getEnd(), 37); + + /* + * now shift left by 15 + * feature at [7-10] should be removed + * feature at [13-19] should become [1-4] + */ + assertTrue(fs.shiftFeatures(0, -15)); + pos = fs.getPositionalFeatures(); + assertEquals(pos.size(), 2); + SequenceFeatures.sortFeatures(pos, true); + assertEquals(pos.get(0).getBegin(), 1); + assertEquals(pos.get(0).getEnd(), 4); + assertEquals(pos.get(1).getBegin(), 13); + assertEquals(pos.get(1).getEnd(), 22); + + /* + * shift right by 4 from position 2 onwards + * feature at [1-4] unchanged, feature at [13-22] shifts + */ + assertTrue(fs.shiftFeatures(2, 4)); + pos = fs.getPositionalFeatures(); + assertEquals(pos.size(), 2); + SequenceFeatures.sortFeatures(pos, true); + assertEquals(pos.get(0).getBegin(), 1); + assertEquals(pos.get(0).getEnd(), 4); + assertEquals(pos.get(1).getBegin(), 17); + assertEquals(pos.get(1).getEnd(), 26); + + /* + * shift right by 4 from position 18 onwards + * should be no change + */ + SequenceFeature f1 = pos.get(0); + SequenceFeature f2 = pos.get(1); + assertFalse(fs.shiftFeatures(18, 4)); // no update + pos = fs.getPositionalFeatures(); + assertEquals(pos.size(), 2); + SequenceFeatures.sortFeatures(pos, true); + assertSame(pos.get(0), f1); + assertSame(pos.get(1), f2); + } + + @Test(groups = "Functional") + public void testDelete_readd() + { + /* + * add a feature and a nested feature + */ + FeatureStoreI store = newFeatureStore(); + SequenceFeature sf1 = addFeature(store, 10, 20); + // sf2 is nested in sf1 so will be stored in nestedFeatures + SequenceFeature sf2 = addFeature(store, 12, 14); + List features = store.getPositionalFeatures(); + assertEquals(features.size(), 2); + assertTrue(features.contains(sf1)); + assertTrue(features.contains(sf2)); + assertTrue(store.getFeatures().contains(sf1)); + assertTrue(store.getFeatures().contains(sf2)); + + /* + * delete the first feature + */ + assertTrue(store.delete(sf1)); + features = store.getPositionalFeatures(); + assertFalse(features.contains(sf1)); + assertTrue(features.contains(sf2)); + + /* + * re-add the 'nested' feature; is it now duplicated? + */ + store.addFeature(sf2); + features = store.getPositionalFeatures(); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf2)); + } + + @Test(groups = "Functional") + public void testContains() + { + FeatureStoreI fs = newFeatureStore(); + SequenceFeature sf1 = new SequenceFeature("Cath", "", 10, 20, + Float.NaN, "group1"); + SequenceFeature sf2 = new SequenceFeature("Cath", "", 10, 20, + Float.NaN, "group2"); + SequenceFeature sf3 = new SequenceFeature("Cath", "", 0, 0, Float.NaN, + "group1"); + SequenceFeature sf4 = new SequenceFeature("Cath", "", 0, 0, 0f, + "group1"); + SequenceFeature sf5 = new SequenceFeature("Disulphide Bond", "", 5, 15, + Float.NaN, "group1"); + SequenceFeature sf6 = new SequenceFeature("Disulphide Bond", "", 5, 15, + Float.NaN, "group2"); + + fs.addFeature(sf1); + fs.addFeature(sf3); + fs.addFeature(sf5); + assertTrue(fs.contains(sf1)); // positional feature + assertTrue(fs.contains(new SequenceFeature(sf1))); // identical feature + assertFalse(fs.contains(sf2)); // different group + assertTrue(fs.contains(sf3)); // non-positional + assertTrue(fs.contains(new SequenceFeature(sf3))); + assertFalse(fs.contains(sf4)); // different score + assertTrue(fs.contains(sf5)); // contact feature + assertTrue(fs.contains(new SequenceFeature(sf5))); + assertFalse(fs.contains(sf6)); // different group + + /* + * add a nested feature + */ + SequenceFeature sf7 = new SequenceFeature("Cath", "", 12, 16, + Float.NaN, "group1"); + fs.addFeature(sf7); + assertTrue(fs.contains(sf7)); + assertTrue(fs.contains(new SequenceFeature(sf7))); + + /* + * delete the outer (enclosing, non-nested) feature + */ + fs.delete(sf1); + assertFalse(fs.contains(sf1)); + assertTrue(fs.contains(sf7)); + } +} -- 1.7.10.2