From: gmungoc Date: Mon, 5 Jun 2017 08:03:10 +0000 (+0100) Subject: Merge branch 'features/JAL-2526sequenceCursor' into X-Git-Tag: Release_2_10_3b1~214 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=b5aea33b1a9dcb1831ef681342be55c8929cf080;p=jalview.git Merge branch 'features/JAL-2526sequenceCursor' into features/JAL-2446NCList Conflicts: src/jalview/datamodel/Sequence.java src/jalview/datamodel/SequenceI.java --- b5aea33b1a9dcb1831ef681342be55c8929cf080 diff --cc src/jalview/datamodel/Sequence.java index c53319e,ab6639a..8c53482 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@@ -702,6 -822,267 +823,103 @@@ public class Sequence extends ASequenc } /** + * Answers true if the given cursor is not null, is for this sequence object, + * and has a token value that matches this object's changeCount, else false. + * This allows us to ignore a cursor as 'stale' if the sequence has been + * modified since the cursor was created. + * + * @param curs + * @return + */ + protected boolean isValidCursor(SequenceCursor curs) + { + if (curs == null || curs.sequence != this || curs.token != changeCount) + { + return false; + } + /* + * sanity check against range + */ + if (curs.columnPosition < 0 || curs.columnPosition >= sequence.length) + { + return false; + } + if (curs.residuePosition < start || curs.residuePosition > end) + { + return false; + } + return true; + } + + /** + * Answers the sequence position (start..) for the given aligned column + * position (1..), given a hint of a cursor in the neighbourhood. The cursor + * may lie left of, at, or to the right of the column position. + * + * @param col + * @param curs + * @return + */ + protected int findPosition(final int col, SequenceCursor curs) + { + if (!isValidCursor(curs)) + { + /* + * wrong or invalidated cursor, compute de novo + */ + return findPosition(col - 1);// ugh back to base 0 + } + + if (curs.columnPosition == col) + { + cursor = curs; // in case this method becomes public + return curs.residuePosition; // easy case :-) + } + + /* + * move left or right to find pos from cursor position + */ + int column = curs.columnPosition - 1; // to base 0 + int newPos = curs.residuePosition; + int delta = curs.columnPosition > col ? -1 : 1; + boolean gapped = false; + int lastFoundPosition = curs.residuePosition; + int lastFoundPositionColumn = curs.columnPosition; + + while (column != col - 1) + { + column += delta; // shift one column left or right + if (column < 0 || column == sequence.length) + { + break; + } + gapped = Comparison.isGap(sequence[column]); + if (!gapped) + { + newPos += delta; + lastFoundPosition = newPos; + lastFoundPositionColumn = column + 1; + } + } + + if (cursor == null || lastFoundPosition != cursor.residuePosition) + { + updateCursor(lastFoundPosition, lastFoundPositionColumn); + } + + /* + * hack to give position to the right if on a gap + * or beyond the length of the sequence (see JAL-2562) + */ + if (delta > 0 && (gapped || column >= sequence.length)) + { + newPos++; + } + + return newPos; + } + + /** - * {@inheritDoc} - */ - @Override - public Range findPositions(int fromCol, int toCol) - { - if (cursor != null && cursor.sequence == this - && cursor.token == changeCount) - { - return findPositions(fromCol, toCol, cursor); - } - - /* - * count residues before fromCol - */ - int j = 0; - int count = 0; - int seqlen = sequence.length; - while (j < fromCol && j < seqlen) - { - if (!Comparison.isGap(sequence[j])) - { - count++; - } - j++; - } - - /* - * find first and last residues between fromCol and toCol - */ - int firstPos = 0; - int lastPos = 0; - boolean foundFirst = false; - - while (j <= toCol && j < seqlen) - { - if (!Comparison.isGap(sequence[j])) - { - count++; - if (!foundFirst) - { - firstPos = count; - foundFirst = true; - } - lastPos = count; - } - j++; - } - - if (firstPos == 0) - { - /* - * no residues in this range - */ - return null; - } - - /* - * adjust for sequence start coordinate - */ - firstPos += start - 1; - lastPos += start - 1; - - return new Range(firstPos, lastPos); - } - - /** - * Returns the range of sequence positions included in the given alignment - * position range. If no positions are included (the range is entirely gaps), - * then returns null. The cursor parameter may provide a starting position in - * the neighbourhood of the search (which may be left of, right of, or - * overlapping the search region). - * - * @param fromCol - * start column of region (0..) - * @param toCol - * end column of region (0..) - * @param curs - * @return - */ - protected Range findPositions(int fromCol, int toCol, SequenceCursor curs) - { - if (!isValidCursor(curs)) - { - /* - * wrong or invalidated cursor, compute de novo - */ - return findPositions(fromCol, toCol); - } - - /* - * keep this simple...first step from cursor to fromCol... - */ - final int seqlen = sequence.length; - int resNo = curs.residuePosition; - int col = curs.columnPosition - 1; // from base 1 to base 0 - if (col != fromCol) - { - int delta = col > fromCol ? -1 : 1; - while (col != fromCol && col >= 0 && col < seqlen) - { - if (!Comparison.isGap(sequence[col])) - { - resNo += delta; - } - col += delta; - } - } - - if (col < fromCol || col == seqlen) - { - /* - * sequence lies to the left of the target region - */ - return null; - } - - /* - * resNo is now the residue at fromCol (if not gapped), else the one - * before it (if delta == 1), else the one after (if delta == -1); - * we want the residue before fromCol - */ - if (!Comparison.isGap(sequence[fromCol])) - { - resNo--; - } - else if (curs.columnPosition > fromCol) - { - resNo -= 2; - } - - /* - * now first and last residues between fromCol and toCol - */ - int firstPos = 0; - int lastPos = 0; - boolean foundFirst = false; - - while (col <= toCol && col < seqlen) - { - if (!Comparison.isGap(sequence[col])) - { - resNo++; - if (!foundFirst) - { - firstPos = resNo; - foundFirst = true; - } - lastPos = resNo; - } - col++; - } - - if (firstPos == 0) - { - /* - * no residues in this range - */ - return null; - } - - return new Range(firstPos, lastPos); - } - - /** * Returns an int array where indices correspond to each residue in the * sequence and the element value gives its position in the alignment * @@@ -1479,55 -1862,23 +1699,65 @@@ * {@inheritDoc} */ @Override - public List findFeatures(int from, int to, + public List findFeatures(int fromColumn, int toColumn, String... types) { + int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0 + int endPos = findPosition(toColumn - 1); + + List result = new ArrayList<>(); if (datasetSequence != null) { - return datasetSequence.findFeatures(from, to, types); + result = datasetSequence.getFeatures().findFeatures(startPos, endPos, + types); } - return sequenceFeatureStore.findFeatures(from, to, types); + else + { + result = sequenceFeatureStore.findFeatures(startPos, endPos, types); + } + + /* + * if the start or end column is gapped, startPos or endPos may be to the + * left or right, and we may have included adjacent or enclosing features; + * remove any that are not enclosing, non-contact features + */ + if (endPos > this.end || Comparison.isGap(sequence[fromColumn - 1]) + || Comparison.isGap(sequence[toColumn - 1])) + { + ListIterator it = result.listIterator(); + while (it.hasNext()) + { + SequenceFeature sf = it.next(); + int featureStartColumn = findIndex(sf.getBegin()); + int featureEndColumn = findIndex(sf.getEnd()); + boolean noOverlap = featureStartColumn > toColumn + || featureEndColumn < fromColumn; + + /* + * reject an 'enclosing' feature if it is actually a contact feature + */ + if (sf.isContactFeature() && featureStartColumn < fromColumn + && featureEndColumn > toColumn) + { + noOverlap = true; + } + if (noOverlap) + { + it.remove(); + } + } + } + + return result; } + + /** + * Invalidates any stale cursors (forcing recalculation) by incrementing the + * token that has to match the one presented by the cursor + */ + @Override + public void sequenceChanged() + { + changeCount++; + } } diff --cc src/jalview/datamodel/SequenceI.java index 1615bee,38be37f..f49039c --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@@ -487,20 -511,23 +487,27 @@@ public interface SequenceI extends ASeq public List getPrimaryDBRefs(); /** - * Returns a (possibly empty) list of sequence features that overlap the range - * from-to (inclusive), optionally restricted to one or more specified feature - * types + * Returns a (possibly empty) list of sequence features that overlap the given + * alignment column range, optionally restricted to one or more specified + * feature types. If the range is all gaps, then features which enclose it are + * included (but not contact features). * - * @param from - * @param to + * @param fromCol + * start column of range inclusive (1..) + * @param toCol + * end column of range inclusive (1..) * @param types + * optional feature types to restrict results to * @return */ - List findFeatures(int from, int to, String... types); + List findFeatures(int fromCol, int toCol, String... types); + + /** + * Method to call to indicate that the sequence (characters or alignment/gaps) + * has been modified. Provided to allow any cursors on residue/column + * positions to be invalidated. + */ + void sequenceChanged(); /** * diff --cc test/jalview/datamodel/SequenceTest.java index cff4883,c5850dc..97a75f9 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@@ -1175,57 -1265,246 +1265,238 @@@ public class SequenceTes seq.setDatasetSequence(seq2); } - @Test - public void testFindPositions() + @Test(groups = { "Functional" }) + public void testFindFeatures() { - SequenceI sq = new Sequence("Seq", "ABC--DE-F", 8, 13); - - Range range = sq.findPositions(1, 4); // BC - assertEquals(new Range(9, 10), range); - - range = sq.findPositions(2, 4); // C - assertEquals(new Range(10, 10), range); - - assertNull(sq.findPositions(3, 4)); // all gaps + SequenceI sq = new Sequence("test/8-16", "-ABC--DEF--GHI--"); + sq.createDatasetSequence(); - range = sq.findPositions(2, 6); // CDE - assertEquals(new Range(10, 12), range); + assertTrue(sq.findFeatures(1, 99).isEmpty()); - range = sq.findPositions(3, 7); // DE - assertEquals(new Range(11, 12), range); + // add non-positional feature + SequenceFeature sf0 = new SequenceFeature("Cath", "desc", 0, 0, 2f, + null); + sq.addSequenceFeature(sf0); + // add feature on BCD + SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 9, 11, 2f, + null); + sq.addSequenceFeature(sf1); + // add feature on DE + SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 11, 12, 2f, + null); + sq.addSequenceFeature(sf2); + // add contact feature at [B, H] + SequenceFeature sf3 = new SequenceFeature("Disulphide bond", "desc", 9, + 15, 2f, + null); + sq.addSequenceFeature(sf3); + // add contact feature at [F, G] + SequenceFeature sf4 = new SequenceFeature("Disulfide Bond", "desc", 13, + 14, 2f, + null); + sq.addSequenceFeature(sf4); + + // no features in columns 1-2 (-A) + List found = sq.findFeatures(1, 2); + assertTrue(found.isEmpty()); + + // columns 1-6 (-ABC--) includes BCD and B/H feature but not DE + found = sq.findFeatures(1, 6); + assertEquals(2, found.size()); + assertTrue(found.contains(sf1)); + assertTrue(found.contains(sf3)); + + // columns 5-6 (--) includes (enclosing) BCD but not (contact) B/H feature + found = sq.findFeatures(5, 6); + assertEquals(1, found.size()); + assertTrue(found.contains(sf1)); + + // columns 7-10 (DEF-) includes BCD, DE, F/G but not B/H feature + found = sq.findFeatures(7, 10); + assertEquals(3, found.size()); + assertTrue(found.contains(sf1)); + assertTrue(found.contains(sf2)); + assertTrue(found.contains(sf4)); } + + @Test(groups = { "Functional" }) + public void testFindIndex_withCursor() + { + Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--"); + + // find F given A + assertEquals(10, sq.findIndex(13, new SequenceCursor(sq, 8, 2, 0))); + + // find A given F + assertEquals(2, sq.findIndex(8, new SequenceCursor(sq, 13, 10, 0))); + + // find C given C + assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, 0))); + } + + @Test(groups = { "Functional" }) + public void testFindPosition_withCursor() + { + Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--"); + + // find F pos given A + assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0))); + int token = (int) PA.getValue(sq, "changeCount"); // 0 + SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + + // find A pos given F + assertEquals(8, sq.findPosition(2, new SequenceCursor(sq, 13, 10, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 8, 2, token), cursor); + + // find C pos given C + assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 10, 6, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 10, 6, token), cursor); + + // now the grey area - what residue position for a gapped column? JAL-2562 + + // find 'residue' for column 3 given cursor for D (so working left) + // returns B9; cursor is updated to [B 5] + assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 9, 5, token), cursor); + + // find 'residue' for column 8 given cursor for D (so working right) + // returns E12; cursor is updated to [D 7] + assertEquals(12, sq.findPosition(8, new SequenceCursor(sq, 11, 7, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 11, 7, token), cursor); + + // find 'residue' for column 12 given cursor for B + // returns 1 more than last residue position; cursor is updated to [F 10] + assertEquals(14, sq.findPosition(12, new SequenceCursor(sq, 9, 5, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + + /* + * findPosition for column beyond length of sequence + * returns 1 more than the last residue position + * cursor is set to last real residue position [F 10] + */ + assertEquals(14, sq.findPosition(99, new SequenceCursor(sq, 8, 2, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + + /* + * and the case without a trailing gap + */ + sq = new Sequence("test/8-13", "-A--BCD-EF"); + // first find C from A + assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 8, 2, 0))); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 10, 6, token), cursor); + // now 'find' 99 from C + // cursor is set to [F 10] + assertEquals(14, sq.findPosition(99, cursor)); + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + } + + @Test - public void testFindPositions_withCursor() - { - Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13); - - // find positions for columns 1-4 (BC--) given E cursor - Range range = sq.findPositions(1, 4, new SequenceCursor(sq, 12, 7, 0)); // BC - assertEquals(new Range(9, 10), range); - - // repeat using B cursor - range = sq.findPositions(1, 4, new SequenceCursor(sq, 9, 2, 0)); // BC - assertEquals(new Range(9, 10), range); - - // find positions for columns 2-4 (C--) given A cursor - range = sq.findPositions(2, 4, new SequenceCursor(sq, 8, 1, 0)); // C - assertEquals(new Range(10, 10), range); - - // gapped region - assertNull(sq.findPositions(3, 4, new SequenceCursor(sq, 10, 3, 0))); - assertNull(sq.findPositions(3, 4, new SequenceCursor(sq, 12, 7, 0))); - - // find positions for columns 2-6 (C--DE) given B cursor - range = sq.findPositions(2, 6, new SequenceCursor(sq, 9, 2, 0)); // CDE - assertEquals(new Range(10, 12), range); - - // repeat using C as cursor - range = sq.findPositions(2, 6, new SequenceCursor(sq, 10, 3, 0)); - assertEquals(new Range(10, 12), range); - - // repeat using D as cursor - range = sq.findPositions(2, 6, new SequenceCursor(sq, 11, 6, 0)); - assertEquals(new Range(10, 12), range); - - // repeat using E as cursor - range = sq.findPositions(2, 6, new SequenceCursor(sq, 12, 7, 0)); - assertEquals(new Range(10, 12), range); - - // repeat using F as cursor - range = sq.findPositions(2, 6, new SequenceCursor(sq, 13, 9, 0)); - assertEquals(new Range(10, 12), range); - } - - @Test + public void testIsValidCursor() + { + Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13); + assertFalse(sq.isValidCursor(null)); + + /* + * cursor is valid if it has valid sequence ref and changeCount token + * and positions within the range of the sequence + */ + int changeCount = (int) PA.getValue(sq, "changeCount"); + SequenceCursor cursor = new SequenceCursor(sq, 13, 1, changeCount); + assertTrue(sq.isValidCursor(cursor)); + + /* + * column position outside [0 - length-1] is rejected + */ + cursor = new SequenceCursor(sq, 13, -1, changeCount); + assertFalse(sq.isValidCursor(cursor)); + cursor = new SequenceCursor(sq, 13, 9, changeCount); + assertFalse(sq.isValidCursor(cursor)); + cursor = new SequenceCursor(sq, 7, 8, changeCount); + assertFalse(sq.isValidCursor(cursor)); + cursor = new SequenceCursor(sq, 14, 2, changeCount); + assertFalse(sq.isValidCursor(cursor)); + + /* + * wrong sequence is rejected + */ + cursor = new SequenceCursor(null, 13, 1, changeCount); + assertFalse(sq.isValidCursor(cursor)); + cursor = new SequenceCursor(new Sequence("Seq", "abc"), 13, 1, + changeCount); + assertFalse(sq.isValidCursor(cursor)); + + /* + * wrong token value is rejected + */ + cursor = new SequenceCursor(sq, 13, 1, changeCount + 1); + assertFalse(sq.isValidCursor(cursor)); + cursor = new SequenceCursor(sq, 13, 1, changeCount - 1); + assertFalse(sq.isValidCursor(cursor)); + } + + @Test(groups = { "Functional" }) + public void testFindPosition_withCursorAndEdits() + { + Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--"); + + // find F pos given A + assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0))); + int token = (int) PA.getValue(sq, "changeCount"); // 0 + SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + + /* + * setSequence should invalidate the cursor cached by the sequence + */ + sq.setSequence("-A-BCD-EF---"); // one gap removed + assertEquals(8, sq.getStart()); // sanity check + assertEquals(11, sq.findPosition(5)); // D11 + // cursor should now be at [D 6] + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor); + + /* + * deleteChars should invalidate the cached cursor + */ + sq.deleteChars(2, 5); // delete -BC + assertEquals("-AD-EF---", sq.getSequenceAsString()); + assertEquals(8, sq.getStart()); // sanity check + assertEquals(10, sq.findPosition(4)); // E10 + // cursor should now be at [E 5] + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 10, 5, ++token), cursor); + + /* + * Edit to insert gaps should invalidate the cached cursor + * insert 2 gaps at column[3] to make -AD---EF--- + */ + SequenceI[] seqs = new SequenceI[] { sq }; + AlignmentI al = new Alignment(seqs); + new EditCommand().appendEdit(Action.INSERT_GAP, seqs, 3, 2, al, true); + assertEquals("-AD---EF---", sq.getSequenceAsString()); + assertEquals(10, sq.findPosition(4)); // E10 + // cursor should now be at [D 3] + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 9, 3, ++token), cursor); + + /* + * insertCharAt should invalidate the cached cursor + * insert CC at column[4] to make -AD-CC--EF--- + */ + sq.insertCharAt(4, 2, 'C'); + assertEquals("-AD-CC--EF---", sq.getSequenceAsString()); + assertEquals(13, sq.findPosition(9)); // F13 + // cursor should now be at [F 10] + cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor); + } }