From a22ecca7f6262d7795d36629198e7531e2805daf Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 8 Jun 2017 09:27:34 +0100 Subject: [PATCH] JAL-2526 cache first/last residue column positions in cursor --- src/jalview/datamodel/Sequence.java | 83 ++++++++++++++++++++--- src/jalview/datamodel/SequenceCursor.java | 43 +++++++++++- test/jalview/datamodel/SequenceTest.java | 104 ++++++++++++++++++++++------- 3 files changed, 194 insertions(+), 36 deletions(-) diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 27a4163..8ea6ca3 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -682,11 +682,20 @@ public class Sequence extends ASequence implements SequenceI int j = start; int i = 0; - // Rely on end being at least as long as the length of the sequence. + int startColumn = 0; + + /* + * traverse sequence from the start counting gaps; make a note of + * the column of the first residue to save in the cursor + */ while ((i < sequence.length) && (j <= end) && (j <= pos)) { if (!Comparison.isGap(sequence[i])) { + if (j == start) + { + startColumn = i; + } j++; } i++; @@ -697,7 +706,7 @@ public class Sequence extends ASequence implements SequenceI return end + 1; } - updateCursor(pos, i); + updateCursor(pos, i, startColumn); return i; } @@ -708,10 +717,19 @@ public class Sequence extends ASequence implements SequenceI * (start..) * @param column * (1..) + * @param startColumn + * column position of the first sequence residue */ - protected void updateCursor(int residuePos, int column) + protected void updateCursor(int residuePos, int column, int startColumn) { - cursor = new SequenceCursor(this, residuePos, column, this.changeCount); + int endColumn = cursor == null ? 0 : cursor.lastColumnPosition; + if (residuePos == this.end) + { + endColumn = column; + } + + cursor = new SequenceCursor(this, residuePos, column, startColumn, + endColumn, this.changeCount); } /** @@ -760,7 +778,7 @@ public class Sequence extends ASequence implements SequenceI } col++; // convert back to base 1 - updateCursor(pos, col); + updateCursor(pos, col, curs.firstColumnPosition); return col; } @@ -778,13 +796,19 @@ public class Sequence extends ASequence implements SequenceI { return findPosition(column + 1, cursor); } - + // TODO recode this more naturally i.e. count residues only // as they are found, not 'in anticipation' + /* + * traverse the sequence counting gaps; note the column position + * of the first residue, to save in the cursor + */ + int firstResidueColumn = 0; int lastPosFound = 0; int lastPosFoundColumn = 0; int seqlen = sequence.length; + if (seqlen > 0 && !Comparison.isGap(sequence[0])) { lastPosFound = start; @@ -800,6 +824,10 @@ public class Sequence extends ASequence implements SequenceI { lastPosFound = pos; lastPosFoundColumn = j; + if (pos == this.start) + { + firstResidueColumn = j; + } pos++; } j++; @@ -808,6 +836,10 @@ public class Sequence extends ASequence implements SequenceI { lastPosFound = pos; lastPosFoundColumn = j; + if (pos == this.start) + { + firstResidueColumn = j; + } } /* @@ -816,7 +848,8 @@ public class Sequence extends ASequence implements SequenceI */ if (lastPosFound != 0) { - updateCursor(lastPosFound, lastPosFoundColumn + 1); + updateCursor(lastPosFound, lastPosFoundColumn + 1, + firstResidueColumn + 1); } return pos; @@ -876,9 +909,31 @@ public class Sequence extends ASequence implements SequenceI return curs.residuePosition; // easy case :-) } + if (curs.lastColumnPosition > 0 && curs.lastColumnPosition <= col) + { + /* + * sequence lies entirely to the left of col + * - return last residue + 1 + */ + return end + 1; + } + + if (curs.firstColumnPosition > 0 && curs.firstColumnPosition >= col) + { + /* + * sequence lies entirely to the right of col + * - return first residue + */ + return start; + } + + // todo could choose closest to col out of column, + // firstColumnPosition, lastColumnPosition as a start point + /* * move left or right to find pos from cursor position */ + int firstResidueColumn = curs.firstColumnPosition; int column = curs.columnPosition - 1; // to base 0 int newPos = curs.residuePosition; int delta = curs.columnPosition > col ? -1 : 1; @@ -899,12 +954,17 @@ public class Sequence extends ASequence implements SequenceI newPos += delta; lastFoundPosition = newPos; lastFoundPositionColumn = column + 1; + if (lastFoundPosition == this.start) + { + firstResidueColumn = column + 1; + } } } if (cursor == null || lastFoundPosition != cursor.residuePosition) { - updateCursor(lastFoundPosition, lastFoundPositionColumn); + updateCursor(lastFoundPosition, lastFoundPositionColumn, + firstResidueColumn); } /* @@ -1704,7 +1764,12 @@ public class Sequence extends ASequence implements SequenceI { int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0 int endPos = findPosition(toColumn - 1); - + // to trace / debug behaviour: + // System.out + // .println(String + // .format("%s.findFeatures columns [%d-%d] positions [%d-%d] leaves cursor %s", + // getName(), fromColumn, toColumn, startPos, + // endPos, cursor)); List result = new ArrayList<>(); if (datasetSequence != null) { diff --git a/src/jalview/datamodel/SequenceCursor.java b/src/jalview/datamodel/SequenceCursor.java index f439ee1..b5929bf 100644 --- a/src/jalview/datamodel/SequenceCursor.java +++ b/src/jalview/datamodel/SequenceCursor.java @@ -22,6 +22,16 @@ public class SequenceCursor public final int columnPosition; /** + * column position (1...) of first residue in the sequence, or 0 if undefined + */ + public final int firstColumnPosition; + + /** + * column position (1...) of last residue in the sequence, or 0 if undefined + */ + public final int lastColumnPosition; + + /** * a token which may be used to check whether this cursor is still valid for * its sequence (allowing it to be ignored if the sequence has changed) */ @@ -42,9 +52,36 @@ public class SequenceCursor */ public SequenceCursor(SequenceI seq, int resPos, int column, int tok) { + this(seq, resPos, column, 0, 0, tok); + } + + /** + * Constructor + * + * @param seq + * sequence this cursor applies to + * @param resPos + * residue position in sequence (start..) + * @param column + * column position in alignment (1..) + * @param firstResCol + * column position of the first residue in the sequence (1..), or 0 + * if not known + * @param lastResCol + * column position of the last residue in the sequence (1..), or 0 if + * not known + * @param tok + * a token that may be validated by the sequence to check the cursor + * is not stale + */ + public SequenceCursor(SequenceI seq, int resPos, int column, int firstResCol, + int lastResCol, int tok) + { sequence = seq; residuePosition = resPos; columnPosition = column; + firstColumnPosition = firstResCol; + lastColumnPosition = lastResCol; token = tok; } @@ -80,7 +117,9 @@ public class SequenceCursor @Override public String toString() { - return (sequence == null ? "" : sequence.getName()) + ":Pos" - + residuePosition + ":Col" + columnPosition + ":tok" + token; + String name = sequence == null ? "" : sequence.getName(); + return String.format("%s:Pos%d:Col%d:startCol%d:endCol%d:tok%d", name, + residuePosition, columnPosition, firstColumnPosition, + lastColumnPosition, token); } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 97a75f9..e657599 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -286,6 +286,8 @@ public class SequenceTest SequenceI sq = new Sequence("test/8-13", "ABCDEF"); assertEquals(8, sq.findPosition(0)); // Sequence should now hold a cursor at [8, 0] + assertEquals("test:Pos8:Col1:startCol1:endCol0:tok0", + PA.getValue(sq, "cursor").toString()); SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor"); int token = (int) PA.getValue(sq, "changeCount"); assertEquals(new SequenceCursor(sq, 8, 1, token), cursor); @@ -294,11 +296,14 @@ public class SequenceTest /* * find F13 at column offset 5, cursor should update to [13, 6] + * endColumn is found and saved in cursor */ assertEquals(13, sq.findPosition(5)); cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(++token, (int) PA.getValue(sq, "changeCount")); assertEquals(new SequenceCursor(sq, 13, 6, token), cursor); + assertEquals("test:Pos13:Col6:startCol1:endCol6:tok1", + PA.getValue(sq, "cursor").toString()); // assertEquals(-1, seq.findPosition(6)); // fails @@ -307,11 +312,15 @@ public class SequenceTest assertEquals(8, sq.findPosition(0)); cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 8, 1, token), cursor); + assertEquals("test:Pos8:Col1:startCol1:endCol0:tok0", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(9, sq.findPosition(1)); cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor); + assertEquals("test:Pos9:Col2:startCol1:endCol0:tok1", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); // gap position 'finds' residue to the right (not the left as per javadoc) @@ -319,11 +328,15 @@ public class SequenceTest assertEquals(10, sq.findPosition(2)); cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor); + assertEquals("test:Pos9:Col2:startCol1:endCol0:tok2", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(10, sq.findPosition(3)); cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor); + assertEquals("test:Pos10:Col4:startCol1:endCol0:tok3", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); // column[4] is the gap after C - returns D11 @@ -331,11 +344,16 @@ public class SequenceTest assertEquals(11, sq.findPosition(4)); cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor); + assertEquals("test:Pos10:Col4:startCol1:endCol0:tok4", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(11, sq.findPosition(5)); // D cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor); + // lastCol has been found and saved in the cursor + assertEquals("test:Pos11:Col6:startCol1:endCol6:tok5", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); // returns 1 more than sequence length if off the end ?!? @@ -344,38 +362,68 @@ public class SequenceTest sq.sequenceChanged(); assertEquals(12, sq.findPosition(7)); + /* + * first findPosition should also set firstResCol in cursor + */ sq = new Sequence("test/8-13", "--AB-C-DEF--"); assertEquals(8, sq.findPosition(0)); + assertNull(PA.getValue(sq, "cursor")); sq.sequenceChanged(); assertEquals(8, sq.findPosition(1)); + assertNull(PA.getValue(sq, "cursor")); sq.sequenceChanged(); assertEquals(8, sq.findPosition(2)); + assertEquals("test:Pos8:Col3:startCol3:endCol0:tok2", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(9, sq.findPosition(3)); + assertEquals("test:Pos9:Col4:startCol3:endCol0:tok3", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); + // column[4] is a gap, returns next residue pos (C10) + // cursor is set to last residue found [B] assertEquals(10, sq.findPosition(4)); + assertEquals("test:Pos9:Col4:startCol3:endCol0:tok4", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(10, sq.findPosition(5)); + assertEquals("test:Pos10:Col6:startCol3:endCol0:tok5", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); + // column[6] is a gap, returns next residue pos (D11) + // cursor is set to last residue found [C] assertEquals(11, sq.findPosition(6)); + assertEquals("test:Pos10:Col6:startCol3:endCol0:tok6", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(11, sq.findPosition(7)); + assertEquals("test:Pos11:Col8:startCol3:endCol0:tok7", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(12, sq.findPosition(8)); + assertEquals("test:Pos12:Col9:startCol3:endCol0:tok8", + PA.getValue(sq, "cursor").toString()); + /* + * when the last residue column is found, it is set in the cursor + */ sq.sequenceChanged(); assertEquals(13, sq.findPosition(9)); + assertEquals("test:Pos13:Col10:startCol3:endCol10:tok9", + PA.getValue(sq, "cursor").toString()); sq.sequenceChanged(); assertEquals(14, sq.findPosition(10)); + assertEquals("test:Pos13:Col10:startCol3:endCol10:tok10", + PA.getValue(sq, "cursor").toString()); /* * findPosition for column beyond sequence length @@ -383,8 +431,13 @@ public class SequenceTest */ sq.sequenceChanged(); assertEquals(14, sq.findPosition(11)); + assertEquals("test:Pos13:Col10:startCol3:endCol10:tok11", + PA.getValue(sq, "cursor").toString()); + sq.sequenceChanged(); assertEquals(14, sq.findPosition(99)); + assertEquals("test:Pos13:Col10:startCol3:endCol10:tok12", + PA.getValue(sq, "cursor").toString()); } @Test(groups = { "Functional" }) @@ -1339,41 +1392,41 @@ public class SequenceTest { Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--"); - // find F pos given A + // find F pos given A - lastCol gets set in cursor assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0))); - int token = (int) PA.getValue(sq, "changeCount"); // 0 - SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0", + PA.getValue(sq, "cursor").toString()); - // find A pos given F + // find A pos given F - first residue column is saved in cursor assertEquals(8, sq.findPosition(2, new SequenceCursor(sq, 13, 10, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 8, 2, token), cursor); + assertEquals("test:Pos8:Col2:startCol2:endCol10:tok0", + PA.getValue(sq, "cursor").toString()); - // find C pos given C + // find C pos given C (neither startCol nor endCol is set) assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 10, 6, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 10, 6, token), cursor); + assertEquals("test:Pos10:Col6:startCol0:endCol0:tok0", + PA.getValue(sq, "cursor").toString()); // now the grey area - what residue position for a gapped column? JAL-2562 // find 'residue' for column 3 given cursor for D (so working left) // returns B9; cursor is updated to [B 5] assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 9, 5, token), cursor); + assertEquals("test:Pos9:Col5:startCol0:endCol0:tok0", + PA.getValue(sq, "cursor").toString()); // find 'residue' for column 8 given cursor for D (so working right) // returns E12; cursor is updated to [D 7] assertEquals(12, sq.findPosition(8, new SequenceCursor(sq, 11, 7, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 11, 7, token), cursor); + assertEquals("test:Pos11:Col7:startCol0:endCol0:tok0", + PA.getValue(sq, "cursor").toString()); // find 'residue' for column 12 given cursor for B // returns 1 more than last residue position; cursor is updated to [F 10] + // lastCol position is saved in cursor assertEquals(14, sq.findPosition(12, new SequenceCursor(sq, 9, 5, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0", + PA.getValue(sq, "cursor").toString()); /* * findPosition for column beyond length of sequence @@ -1381,8 +1434,8 @@ public class SequenceTest * cursor is set to last real residue position [F 10] */ assertEquals(14, sq.findPosition(99, new SequenceCursor(sq, 8, 2, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0", + PA.getValue(sq, "cursor").toString()); /* * and the case without a trailing gap @@ -1390,13 +1443,14 @@ public class SequenceTest sq = new Sequence("test/8-13", "-A--BCD-EF"); // first find C from A assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 8, 2, 0))); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 10, 6, token), cursor); + SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor"); + assertEquals("test:Pos10:Col6:startCol0:endCol0:tok0", + cursor.toString()); // now 'find' 99 from C - // cursor is set to [F 10] + // cursor is set to [F 10] and saved lastCol assertEquals(14, sq.findPosition(99, cursor)); - cursor = (SequenceCursor) PA.getValue(sq, "cursor"); - assertEquals(new SequenceCursor(sq, 13, 10, token), cursor); + assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0", + PA.getValue(sq, "cursor").toString()); } @Test @@ -1414,11 +1468,11 @@ public class SequenceTest assertTrue(sq.isValidCursor(cursor)); /* - * column position outside [0 - length-1] is rejected + * column position outside [0 - length] is rejected */ cursor = new SequenceCursor(sq, 13, -1, changeCount); assertFalse(sq.isValidCursor(cursor)); - cursor = new SequenceCursor(sq, 13, 9, changeCount); + cursor = new SequenceCursor(sq, 13, 10, changeCount); assertFalse(sq.isValidCursor(cursor)); cursor = new SequenceCursor(sq, 7, 8, changeCount); assertFalse(sq.isValidCursor(cursor)); -- 1.7.10.2