From 0e65ec37e2c956e0ad94feda6fd07b7c3a9003ee Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 30 May 2017 08:54:59 +0100 Subject: [PATCH] JAL-2526 first draft of findIndex/findPosition with SequenceCursor --- src/jalview/commands/EditCommand.java | 2 + src/jalview/datamodel/Sequence.java | 274 ++++++++++++++++++++++++++++- src/jalview/datamodel/SequenceCursor.java | 37 ++++ src/jalview/datamodel/SequenceI.java | 5 + test/jalview/datamodel/SequenceTest.java | 93 +++++++++- 5 files changed, 405 insertions(+), 6 deletions(-) create mode 100644 src/jalview/datamodel/SequenceCursor.java diff --git a/src/jalview/commands/EditCommand.java b/src/jalview/commands/EditCommand.java index bba0dfb..0a8dc7d 100644 --- a/src/jalview/commands/EditCommand.java +++ b/src/jalview/commands/EditCommand.java @@ -798,6 +798,8 @@ public class EditCommand implements CommandI AlignmentAnnotation[] tmp; for (int s = 0; s < command.seqs.length; s++) { + command.seqs[s].zapCursor(); + if (modifyVisibility) { // Rows are only removed or added to sequence object. diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index dd7c24a..7ecb07a 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -89,6 +89,21 @@ public class Sequence extends ASequence implements SequenceI private SequenceFeatures sequenceFeatureStore; + /* + * A cursor holding the approximate current view position to the sequence, + * as determined by findIndex or findPosition or findPositions. + * Using a cursor as a hint allows these methods to be more performant for + * large sequences. + */ + private SequenceCursor cursor; + + /* + * A number that should be incremented whenever the sequence is edited. + * If the value matches the cursor token, then we can trust the cursor, + * if not then it should be recomputed. + */ + private int changeCount; + /** * Creates a new Sequence object. * @@ -656,7 +671,15 @@ public class Sequence extends ASequence implements SequenceI @Override public int findIndex(int pos) { - // returns the alignment position for a residue + /* + * use a valid nearby cursor if available + */ + if (cursor != null && cursor.sequence == this + && cursor.token == changeCount) + { + return findIndex(pos, cursor); + } + int j = start; int i = 0; // Rely on end being at least as long as the length of the sequence. @@ -666,7 +689,6 @@ public class Sequence extends ASequence implements SequenceI { j++; } - i++; } @@ -676,13 +698,80 @@ public class Sequence extends ASequence implements SequenceI } else { + updateCursor(pos, i); return i; } } + protected void updateCursor(int residuePos, int column) + { + // TODO probably want to synchronize this on something + cursor = new SequenceCursor(this, residuePos, column, this.changeCount); + } + + /** + * Answers the aligned column position (1..) for the given residue position + * (start..) given a 'hint' of a residue/column location in the neighbourhood. + * The hint may be left of, at, or to the right of the required position. + * + * @param pos + * @param curs + * @return + */ + protected int findIndex(int pos, SequenceCursor curs) + { + if (curs.sequence != this || curs.token != changeCount) + { + /* + * wrong or invalidated cursor, compute de novo + */ + return findIndex(pos); + } + + if (curs.residuePosition == pos) + { + return curs.columnPosition; + } + + /* + * move left or right to find pos from hint.position + */ + int col = curs.columnPosition - 1; // convert from base 1 to 0-based array + // index + int newPos = curs.residuePosition; + int delta = newPos > pos ? -1 : 1; + + while (newPos != pos) + { + col += delta; // shift one column left or right + if (col < 0 || col == sequence.length) + { + break; + } + if (!Comparison.isGap(sequence[col])) + { + newPos += delta; + } + } + + col++; // convert back to base 1 + updateCursor(pos, col); + + return col; + } + @Override - public int findPosition(int i) + public int findPosition(final int i) { + /* + * use a valid nearby cursor if available + */ + if (cursor != null && cursor.sequence == this + && cursor.token == changeCount) + { + return findPosition(i + 1, cursor); + } + int j = 0; int pos = start; int seqlen = sequence.length; @@ -696,15 +785,84 @@ public class Sequence extends ASequence implements SequenceI j++; } + if (j == i && !Comparison.isGap(sequence[i])) + { + updateCursor(pos, i + 1); + } + return pos; } /** + * Answers the sequence position (start..) for the given aligned column + * position (1..), given a hint of a cursor in the neighbourhood. The cursor + * may lie left of, at, or to the right of the column position. + * + * @param col + * @param curs + * @return + */ + protected int findPosition(final int col, SequenceCursor curs) + { + if (curs.sequence != this || curs.token != changeCount) + { + /* + * wrong or invalidated cursor, compute de novo + */ + return findPosition(col - 1);// ugh back to base 0 + } + + if (curs.columnPosition == col) + { + return curs.residuePosition; // easy case :-) + } + + /* + * move left or right to find pos from cursor position + */ + int column = curs.columnPosition - 1; // to base 0 + int newPos = curs.residuePosition; + int delta = curs.columnPosition > col ? -1 : 1; + boolean gapped = false; + + while (column != col - 1) + { + column += delta; // shift one column left or right + if (column < 0 || column == sequence.length) + { + break; + } + gapped = Comparison.isGap(sequence[column]); + if (!gapped) + { + newPos += delta; + } + } + + /* + * hack to give position to the right if on a gap + * pending resolution of JAL-2562 + */ + if (delta > 0 && gapped) + { + newPos++; + } + + return newPos; + } + + /** * {@inheritDoc} */ @Override public Range findPositions(int fromCol, int toCol) { + if (cursor != null && cursor.sequence == this + && cursor.token == changeCount) + { + return findPositions(fromCol, toCol, cursor); + } + /* * count residues before fromCol */ @@ -725,7 +883,6 @@ public class Sequence extends ASequence implements SequenceI */ int firstPos = 0; int lastPos = 0; - int firstPosCol = 0; boolean foundFirst = false; while (j <= toCol && j < seqlen) @@ -736,7 +893,6 @@ public class Sequence extends ASequence implements SequenceI if (!foundFirst) { firstPos = count; - firstPosCol = j; foundFirst = true; } lastPos = count; @@ -762,6 +918,104 @@ public class Sequence extends ASequence implements SequenceI } /** + * Returns the range of sequence positions included in the given alignment + * position range. If no positions are included (the range is entirely gaps), + * then returns null. The cursor parameter may provide a starting position in + * the neighbourhood of the search (which may be left of, right of, or + * overlapping the search region). + * + * @param fromCol + * start column of region (0..) + * @param toCol + * end column of region (0..) + * @param curs + * @return + */ + protected Range findPositions(int fromCol, int toCol, SequenceCursor curs) + { + if (curs.sequence != this || curs.token != changeCount) + { + /* + * wrong or invalidated cursor, compute de novo + */ + return findPositions(fromCol, toCol); + } + + /* + * keep this simple...first step from cursor to fromCol... + */ + final int seqlen = sequence.length; + int resNo = curs.residuePosition; + int col = curs.columnPosition - 1; // from base 1 to base 0 + if (col != fromCol) + { + int delta = col > fromCol ? -1 : 1; + while (col != fromCol && col >= 0 && col < seqlen) + { + if (!Comparison.isGap(sequence[col])) + { + resNo += delta; + } + col += delta; + } + } + + if (col < fromCol || col == seqlen) + { + /* + * sequence lies to the left of the target region + */ + return null; + } + + /* + * resNo is now the residue at fromCol (if not gapped), else the one + * before it (if delta == 1), else the one after (if delta == -1); + * we want the residue before fromCol + */ + if (!Comparison.isGap(sequence[fromCol])) + { + resNo--; + } + else if (curs.columnPosition > fromCol) + { + resNo -= 2; + } + + /* + * now first and last residues between fromCol and toCol + */ + int firstPos = 0; + int lastPos = 0; + boolean foundFirst = false; + + while (col <= toCol && col < seqlen) + { + if (!Comparison.isGap(sequence[col])) + { + resNo++; + if (!foundFirst) + { + firstPos = resNo; + foundFirst = true; + } + lastPos = resNo; + } + col++; + } + + if (firstPos == 0) + { + /* + * no residues in this range + */ + return null; + } + + return new Range(firstPos, lastPos); + } + + /** * Returns an int array where indices correspond to each residue in the * sequence and the element value gives its position in the alignment * @@ -1514,4 +1768,14 @@ public class Sequence extends ASequence implements SequenceI } return sequenceFeatureStore.findFeatures(from, to, types); } + + /** + * Invalidates any stale cursors (forcing recalculation) by incrementing the + * token that has to match the one presented by the cursor + */ + @Override + public void zapCursor() + { + changeCount++; + } } diff --git a/src/jalview/datamodel/SequenceCursor.java b/src/jalview/datamodel/SequenceCursor.java new file mode 100644 index 0000000..482e0fa --- /dev/null +++ b/src/jalview/datamodel/SequenceCursor.java @@ -0,0 +1,37 @@ +package jalview.datamodel; + +/** + * An immutable object representing one or more residue and corresponding + * alignment column positions for a sequence + */ +public class SequenceCursor +{ + /** + * the aligned sequence this cursor applies to + */ + public final SequenceI sequence; + + /** + * residue position in sequence (start...), 0 if undefined + */ + public final int residuePosition; + + /** + * column position (1...) corresponding to residuePosition, or 0 if undefined + */ + public final int columnPosition; + + /** + * a token which may be used to check whether this cursor is still valid for + * its sequence (allowing it to be ignored if the sequence has changed) + */ + public final int token; + + public SequenceCursor(SequenceI seq, int resPos, int column, int tok) + { + sequence = seq; + residuePosition = resPos; + columnPosition = column; + token = tok; + } +} diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index dbf3ed3..0f78bdb 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -520,4 +520,9 @@ public interface SequenceI extends ASequenceI * @return */ List findFeatures(int from, int to, String... types); + + /** + * Invalidate any cursors on the sequence (e.g. after an edit) + */ + public void zapCursor(); } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 4da11eb..e2e14b4 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -226,23 +226,31 @@ public class SequenceTest { SequenceI sq = new Sequence("test", "ABCDEF"); assertEquals(0, sq.findIndex(0)); + PA.setValue(sq, "cursor", null); assertEquals(1, sq.findIndex(1)); + PA.setValue(sq, "cursor", null); assertEquals(5, sq.findIndex(5)); + PA.setValue(sq, "cursor", null); assertEquals(6, sq.findIndex(6)); + PA.setValue(sq, "cursor", null); assertEquals(6, sq.findIndex(9)); sq = new Sequence("test/8-13", "-A--B-C-D-E-F--"); assertEquals(2, sq.findIndex(8)); + PA.setValue(sq, "cursor", null); assertEquals(5, sq.findIndex(9)); + PA.setValue(sq, "cursor", null); assertEquals(7, sq.findIndex(10)); // before start returns 0 + PA.setValue(sq, "cursor", null); assertEquals(0, sq.findIndex(0)); + PA.setValue(sq, "cursor", null); assertEquals(0, sq.findIndex(-1)); // beyond end returns last residue column + PA.setValue(sq, "cursor", null); assertEquals(13, sq.findIndex(99)); - } /** @@ -1181,4 +1189,87 @@ public class SequenceTest range = sq.findPositions(3, 7); // DE assertEquals(new Range(11, 12), range); } + + @Test(groups = { "Functional" }) + public void testFindIndex_withCursor() + { + Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--"); + + // find F given A + assertEquals(10, sq.findIndex(13, new SequenceCursor(sq, 8, 2, 0))); + + // find A given F + assertEquals(2, sq.findIndex(8, new SequenceCursor(sq, 13, 10, 0))); + + // find C given C + assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, 0))); + } + + @Test(groups = { "Functional" }) + public void testFindPosition_withCursor() + { + Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--"); + + // find F pos given A + assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0))); + + // find A pos given F + assertEquals(8, sq.findPosition(2, new SequenceCursor(sq, 13, 10, 0))); + + // find C pos given C + assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 10, 6, 0))); + + // now the grey area - what residue position for a gapped column? JAL-2562 + + // find 'residue' for column 3 given cursor for D (so working left) + assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, 0))); + + // find 'residue' for column 8 given cursor for D (so working right) + assertEquals(12, sq.findPosition(8, new SequenceCursor(sq, 11, 7, 0))); + + // find 'residue' for column 12 given cursor for B + assertEquals(14, sq.findPosition(12, new SequenceCursor(sq, 9, 5, 0))); + } + + @Test + public void testFindPositions_withCursor() + { + Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13); + + // find positions for columns 1-4 (BC--) given E cursor + Range range = sq.findPositions(1, 4, new SequenceCursor(sq, 12, 7, 0)); // BC + assertEquals(new Range(9, 10), range); + + // repeat using B cursor + range = sq.findPositions(1, 4, new SequenceCursor(sq, 9, 2, 0)); // BC + assertEquals(new Range(9, 10), range); + + // find positions for columns 2-4 (C--) given A cursor + range = sq.findPositions(2, 4, new SequenceCursor(sq, 8, 1, 0)); // C + assertEquals(new Range(10, 10), range); + + // gapped region + assertNull(sq.findPositions(3, 4, new SequenceCursor(sq, 10, 3, 0))); + assertNull(sq.findPositions(3, 4, new SequenceCursor(sq, 12, 7, 0))); + + // find positions for columns 2-6 (C--DE) given B cursor + range = sq.findPositions(2, 6, new SequenceCursor(sq, 9, 2, 0)); // CDE + assertEquals(new Range(10, 12), range); + + // repeat using C as cursor + range = sq.findPositions(2, 6, new SequenceCursor(sq, 10, 3, 0)); + assertEquals(new Range(10, 12), range); + + // repeat using D as cursor + range = sq.findPositions(2, 6, new SequenceCursor(sq, 11, 6, 0)); + assertEquals(new Range(10, 12), range); + + // repeat using E as cursor + range = sq.findPositions(2, 6, new SequenceCursor(sq, 12, 7, 0)); + assertEquals(new Range(10, 12), range); + + // repeat using F as cursor + range = sq.findPositions(2, 6, new SequenceCursor(sq, 13, 9, 0)); + assertEquals(new Range(10, 12), range); + } } -- 1.7.10.2