}
/**
+ * Answers true if the given cursor is not null, is for this sequence object,
+ * and has a token value that matches this object's changeCount, else false.
+ * This allows us to ignore a cursor as 'stale' if the sequence has been
+ * modified since the cursor was created.
+ *
+ * @param curs
+ * @return
+ */
+ protected boolean isValidCursor(SequenceCursor curs)
+ {
+ if (curs == null || curs.sequence != this || curs.token != changeCount)
+ {
+ return false;
+ }
+ /*
+ * sanity check against range
+ */
+ if (curs.columnPosition < 0 || curs.columnPosition >= sequence.length)
+ {
+ return false;
+ }
+ if (curs.residuePosition < start || curs.residuePosition > end)
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Answers the sequence position (start..) for the given aligned column
+ * position (1..), given a hint of a cursor in the neighbourhood. The cursor
+ * may lie left of, at, or to the right of the column position.
+ *
+ * @param col
+ * @param curs
+ * @return
+ */
+ protected int findPosition(final int col, SequenceCursor curs)
+ {
+ if (!isValidCursor(curs))
+ {
+ /*
+ * wrong or invalidated cursor, compute de novo
+ */
+ return findPosition(col - 1);// ugh back to base 0
+ }
+
+ if (curs.columnPosition == col)
+ {
+ cursor = curs; // in case this method becomes public
+ return curs.residuePosition; // easy case :-)
+ }
+
+ /*
+ * move left or right to find pos from cursor position
+ */
+ int column = curs.columnPosition - 1; // to base 0
+ int newPos = curs.residuePosition;
+ int delta = curs.columnPosition > col ? -1 : 1;
+ boolean gapped = false;
+ int lastFoundPosition = curs.residuePosition;
+ int lastFoundPositionColumn = curs.columnPosition;
+
+ while (column != col - 1)
+ {
+ column += delta; // shift one column left or right
+ if (column < 0 || column == sequence.length)
+ {
+ break;
+ }
+ gapped = Comparison.isGap(sequence[column]);
+ if (!gapped)
+ {
+ newPos += delta;
+ lastFoundPosition = newPos;
+ lastFoundPositionColumn = column + 1;
+ }
+ }
+
+ if (cursor == null || lastFoundPosition != cursor.residuePosition)
+ {
+ updateCursor(lastFoundPosition, lastFoundPositionColumn);
+ }
+
+ /*
+ * hack to give position to the right if on a gap
+ * or beyond the length of the sequence (see JAL-2562)
+ */
+ if (delta > 0 && (gapped || column >= sequence.length))
+ {
+ newPos++;
+ }
+
+ return newPos;
+ }
+
+ /**
- * {@inheritDoc}
- */
- @Override
- public Range findPositions(int fromCol, int toCol)
- {
- if (cursor != null && cursor.sequence == this
- && cursor.token == changeCount)
- {
- return findPositions(fromCol, toCol, cursor);
- }
-
- /*
- * count residues before fromCol
- */
- int j = 0;
- int count = 0;
- int seqlen = sequence.length;
- while (j < fromCol && j < seqlen)
- {
- if (!Comparison.isGap(sequence[j]))
- {
- count++;
- }
- j++;
- }
-
- /*
- * find first and last residues between fromCol and toCol
- */
- int firstPos = 0;
- int lastPos = 0;
- boolean foundFirst = false;
-
- while (j <= toCol && j < seqlen)
- {
- if (!Comparison.isGap(sequence[j]))
- {
- count++;
- if (!foundFirst)
- {
- firstPos = count;
- foundFirst = true;
- }
- lastPos = count;
- }
- j++;
- }
-
- if (firstPos == 0)
- {
- /*
- * no residues in this range
- */
- return null;
- }
-
- /*
- * adjust for sequence start coordinate
- */
- firstPos += start - 1;
- lastPos += start - 1;
-
- return new Range(firstPos, lastPos);
- }
-
- /**
- * Returns the range of sequence positions included in the given alignment
- * position range. If no positions are included (the range is entirely gaps),
- * then returns null. The cursor parameter may provide a starting position in
- * the neighbourhood of the search (which may be left of, right of, or
- * overlapping the search region).
- *
- * @param fromCol
- * start column of region (0..)
- * @param toCol
- * end column of region (0..)
- * @param curs
- * @return
- */
- protected Range findPositions(int fromCol, int toCol, SequenceCursor curs)
- {
- if (!isValidCursor(curs))
- {
- /*
- * wrong or invalidated cursor, compute de novo
- */
- return findPositions(fromCol, toCol);
- }
-
- /*
- * keep this simple...first step from cursor to fromCol...
- */
- final int seqlen = sequence.length;
- int resNo = curs.residuePosition;
- int col = curs.columnPosition - 1; // from base 1 to base 0
- if (col != fromCol)
- {
- int delta = col > fromCol ? -1 : 1;
- while (col != fromCol && col >= 0 && col < seqlen)
- {
- if (!Comparison.isGap(sequence[col]))
- {
- resNo += delta;
- }
- col += delta;
- }
- }
-
- if (col < fromCol || col == seqlen)
- {
- /*
- * sequence lies to the left of the target region
- */
- return null;
- }
-
- /*
- * resNo is now the residue at fromCol (if not gapped), else the one
- * before it (if delta == 1), else the one after (if delta == -1);
- * we want the residue before fromCol
- */
- if (!Comparison.isGap(sequence[fromCol]))
- {
- resNo--;
- }
- else if (curs.columnPosition > fromCol)
- {
- resNo -= 2;
- }
-
- /*
- * now first and last residues between fromCol and toCol
- */
- int firstPos = 0;
- int lastPos = 0;
- boolean foundFirst = false;
-
- while (col <= toCol && col < seqlen)
- {
- if (!Comparison.isGap(sequence[col]))
- {
- resNo++;
- if (!foundFirst)
- {
- firstPos = resNo;
- foundFirst = true;
- }
- lastPos = resNo;
- }
- col++;
- }
-
- if (firstPos == 0)
- {
- /*
- * no residues in this range
- */
- return null;
- }
-
- return new Range(firstPos, lastPos);
- }
-
- /**
* Returns an int array where indices correspond to each residue in the
* sequence and the element value gives its position in the alignment
*
* {@inheritDoc}
*/
@Override
- public List<SequenceFeature> findFeatures(int from, int to,
+ public List<SequenceFeature> findFeatures(int fromColumn, int toColumn,
String... types)
{
+ int startPos = findPosition(fromColumn - 1); // convert base 1 to base 0
+ int endPos = findPosition(toColumn - 1);
+
+ List<SequenceFeature> result = new ArrayList<>();
if (datasetSequence != null)
{
- return datasetSequence.findFeatures(from, to, types);
+ result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
+ types);
}
- return sequenceFeatureStore.findFeatures(from, to, types);
+ else
+ {
+ result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
+ }
+
+ /*
+ * if the start or end column is gapped, startPos or endPos may be to the
+ * left or right, and we may have included adjacent or enclosing features;
+ * remove any that are not enclosing, non-contact features
+ */
+ if (endPos > this.end || Comparison.isGap(sequence[fromColumn - 1])
+ || Comparison.isGap(sequence[toColumn - 1]))
+ {
+ ListIterator<SequenceFeature> it = result.listIterator();
+ while (it.hasNext())
+ {
+ SequenceFeature sf = it.next();
+ int featureStartColumn = findIndex(sf.getBegin());
+ int featureEndColumn = findIndex(sf.getEnd());
+ boolean noOverlap = featureStartColumn > toColumn
+ || featureEndColumn < fromColumn;
+
+ /*
+ * reject an 'enclosing' feature if it is actually a contact feature
+ */
+ if (sf.isContactFeature() && featureStartColumn < fromColumn
+ && featureEndColumn > toColumn)
+ {
+ noOverlap = true;
+ }
+ if (noOverlap)
+ {
+ it.remove();
+ }
+ }
+ }
+
+ return result;
}
+
+ /**
+ * Invalidates any stale cursors (forcing recalculation) by incrementing the
+ * token that has to match the one presented by the cursor
+ */
+ @Override
+ public void sequenceChanged()
+ {
+ changeCount++;
+ }
}
seq.setDatasetSequence(seq2);
}
- @Test
- public void testFindPositions()
+ @Test(groups = { "Functional" })
+ public void testFindFeatures()
{
- SequenceI sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
-
- Range range = sq.findPositions(1, 4); // BC
- assertEquals(new Range(9, 10), range);
-
- range = sq.findPositions(2, 4); // C
- assertEquals(new Range(10, 10), range);
-
- assertNull(sq.findPositions(3, 4)); // all gaps
+ SequenceI sq = new Sequence("test/8-16", "-ABC--DEF--GHI--");
+ sq.createDatasetSequence();
- range = sq.findPositions(2, 6); // CDE
- assertEquals(new Range(10, 12), range);
+ assertTrue(sq.findFeatures(1, 99).isEmpty());
- range = sq.findPositions(3, 7); // DE
- assertEquals(new Range(11, 12), range);
+ // add non-positional feature
+ SequenceFeature sf0 = new SequenceFeature("Cath", "desc", 0, 0, 2f,
+ null);
+ sq.addSequenceFeature(sf0);
+ // add feature on BCD
+ SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 9, 11, 2f,
+ null);
+ sq.addSequenceFeature(sf1);
+ // add feature on DE
+ SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 11, 12, 2f,
+ null);
+ sq.addSequenceFeature(sf2);
+ // add contact feature at [B, H]
+ SequenceFeature sf3 = new SequenceFeature("Disulphide bond", "desc", 9,
+ 15, 2f,
+ null);
+ sq.addSequenceFeature(sf3);
+ // add contact feature at [F, G]
+ SequenceFeature sf4 = new SequenceFeature("Disulfide Bond", "desc", 13,
+ 14, 2f,
+ null);
+ sq.addSequenceFeature(sf4);
+
+ // no features in columns 1-2 (-A)
+ List<SequenceFeature> found = sq.findFeatures(1, 2);
+ assertTrue(found.isEmpty());
+
+ // columns 1-6 (-ABC--) includes BCD and B/H feature but not DE
+ found = sq.findFeatures(1, 6);
+ assertEquals(2, found.size());
+ assertTrue(found.contains(sf1));
+ assertTrue(found.contains(sf3));
+
+ // columns 5-6 (--) includes (enclosing) BCD but not (contact) B/H feature
+ found = sq.findFeatures(5, 6);
+ assertEquals(1, found.size());
+ assertTrue(found.contains(sf1));
+
+ // columns 7-10 (DEF-) includes BCD, DE, F/G but not B/H feature
+ found = sq.findFeatures(7, 10);
+ assertEquals(3, found.size());
+ assertTrue(found.contains(sf1));
+ assertTrue(found.contains(sf2));
+ assertTrue(found.contains(sf4));
}
+
+ @Test(groups = { "Functional" })
+ public void testFindIndex_withCursor()
+ {
+ Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
+
+ // find F given A
+ assertEquals(10, sq.findIndex(13, new SequenceCursor(sq, 8, 2, 0)));
+
+ // find A given F
+ assertEquals(2, sq.findIndex(8, new SequenceCursor(sq, 13, 10, 0)));
+
+ // find C given C
+ assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, 0)));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFindPosition_withCursor()
+ {
+ Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
+
+ // find F pos given A
+ assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
+ int token = (int) PA.getValue(sq, "changeCount"); // 0
+ SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
+
+ // find A pos given F
+ assertEquals(8, sq.findPosition(2, new SequenceCursor(sq, 13, 10, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 8, 2, token), cursor);
+
+ // find C pos given C
+ assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 10, 6, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 10, 6, token), cursor);
+
+ // now the grey area - what residue position for a gapped column? JAL-2562
+
+ // find 'residue' for column 3 given cursor for D (so working left)
+ // returns B9; cursor is updated to [B 5]
+ assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 9, 5, token), cursor);
+
+ // find 'residue' for column 8 given cursor for D (so working right)
+ // returns E12; cursor is updated to [D 7]
+ assertEquals(12, sq.findPosition(8, new SequenceCursor(sq, 11, 7, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 11, 7, token), cursor);
+
+ // find 'residue' for column 12 given cursor for B
+ // returns 1 more than last residue position; cursor is updated to [F 10]
+ assertEquals(14, sq.findPosition(12, new SequenceCursor(sq, 9, 5, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
+
+ /*
+ * findPosition for column beyond length of sequence
+ * returns 1 more than the last residue position
+ * cursor is set to last real residue position [F 10]
+ */
+ assertEquals(14, sq.findPosition(99, new SequenceCursor(sq, 8, 2, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
+
+ /*
+ * and the case without a trailing gap
+ */
+ sq = new Sequence("test/8-13", "-A--BCD-EF");
+ // first find C from A
+ assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 8, 2, 0)));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 10, 6, token), cursor);
+ // now 'find' 99 from C
+ // cursor is set to [F 10]
+ assertEquals(14, sq.findPosition(99, cursor));
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
+ }
+
+ @Test
- public void testFindPositions_withCursor()
- {
- Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
-
- // find positions for columns 1-4 (BC--) given E cursor
- Range range = sq.findPositions(1, 4, new SequenceCursor(sq, 12, 7, 0)); // BC
- assertEquals(new Range(9, 10), range);
-
- // repeat using B cursor
- range = sq.findPositions(1, 4, new SequenceCursor(sq, 9, 2, 0)); // BC
- assertEquals(new Range(9, 10), range);
-
- // find positions for columns 2-4 (C--) given A cursor
- range = sq.findPositions(2, 4, new SequenceCursor(sq, 8, 1, 0)); // C
- assertEquals(new Range(10, 10), range);
-
- // gapped region
- assertNull(sq.findPositions(3, 4, new SequenceCursor(sq, 10, 3, 0)));
- assertNull(sq.findPositions(3, 4, new SequenceCursor(sq, 12, 7, 0)));
-
- // find positions for columns 2-6 (C--DE) given B cursor
- range = sq.findPositions(2, 6, new SequenceCursor(sq, 9, 2, 0)); // CDE
- assertEquals(new Range(10, 12), range);
-
- // repeat using C as cursor
- range = sq.findPositions(2, 6, new SequenceCursor(sq, 10, 3, 0));
- assertEquals(new Range(10, 12), range);
-
- // repeat using D as cursor
- range = sq.findPositions(2, 6, new SequenceCursor(sq, 11, 6, 0));
- assertEquals(new Range(10, 12), range);
-
- // repeat using E as cursor
- range = sq.findPositions(2, 6, new SequenceCursor(sq, 12, 7, 0));
- assertEquals(new Range(10, 12), range);
-
- // repeat using F as cursor
- range = sq.findPositions(2, 6, new SequenceCursor(sq, 13, 9, 0));
- assertEquals(new Range(10, 12), range);
- }
-
- @Test
+ public void testIsValidCursor()
+ {
+ Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
+ assertFalse(sq.isValidCursor(null));
+
+ /*
+ * cursor is valid if it has valid sequence ref and changeCount token
+ * and positions within the range of the sequence
+ */
+ int changeCount = (int) PA.getValue(sq, "changeCount");
+ SequenceCursor cursor = new SequenceCursor(sq, 13, 1, changeCount);
+ assertTrue(sq.isValidCursor(cursor));
+
+ /*
+ * column position outside [0 - length-1] is rejected
+ */
+ cursor = new SequenceCursor(sq, 13, -1, changeCount);
+ assertFalse(sq.isValidCursor(cursor));
+ cursor = new SequenceCursor(sq, 13, 9, changeCount);
+ assertFalse(sq.isValidCursor(cursor));
+ cursor = new SequenceCursor(sq, 7, 8, changeCount);
+ assertFalse(sq.isValidCursor(cursor));
+ cursor = new SequenceCursor(sq, 14, 2, changeCount);
+ assertFalse(sq.isValidCursor(cursor));
+
+ /*
+ * wrong sequence is rejected
+ */
+ cursor = new SequenceCursor(null, 13, 1, changeCount);
+ assertFalse(sq.isValidCursor(cursor));
+ cursor = new SequenceCursor(new Sequence("Seq", "abc"), 13, 1,
+ changeCount);
+ assertFalse(sq.isValidCursor(cursor));
+
+ /*
+ * wrong token value is rejected
+ */
+ cursor = new SequenceCursor(sq, 13, 1, changeCount + 1);
+ assertFalse(sq.isValidCursor(cursor));
+ cursor = new SequenceCursor(sq, 13, 1, changeCount - 1);
+ assertFalse(sq.isValidCursor(cursor));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFindPosition_withCursorAndEdits()
+ {
+ Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
+
+ // find F pos given A
+ assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
+ int token = (int) PA.getValue(sq, "changeCount"); // 0
+ SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
+
+ /*
+ * setSequence should invalidate the cursor cached by the sequence
+ */
+ sq.setSequence("-A-BCD-EF---"); // one gap removed
+ assertEquals(8, sq.getStart()); // sanity check
+ assertEquals(11, sq.findPosition(5)); // D11
+ // cursor should now be at [D 6]
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
+
+ /*
+ * deleteChars should invalidate the cached cursor
+ */
+ sq.deleteChars(2, 5); // delete -BC
+ assertEquals("-AD-EF---", sq.getSequenceAsString());
+ assertEquals(8, sq.getStart()); // sanity check
+ assertEquals(10, sq.findPosition(4)); // E10
+ // cursor should now be at [E 5]
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 10, 5, ++token), cursor);
+
+ /*
+ * Edit to insert gaps should invalidate the cached cursor
+ * insert 2 gaps at column[3] to make -AD---EF---
+ */
+ SequenceI[] seqs = new SequenceI[] { sq };
+ AlignmentI al = new Alignment(seqs);
+ new EditCommand().appendEdit(Action.INSERT_GAP, seqs, 3, 2, al, true);
+ assertEquals("-AD---EF---", sq.getSequenceAsString());
+ assertEquals(10, sq.findPosition(4)); // E10
+ // cursor should now be at [D 3]
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 9, 3, ++token), cursor);
+
+ /*
+ * insertCharAt should invalidate the cached cursor
+ * insert CC at column[4] to make -AD-CC--EF---
+ */
+ sq.insertCharAt(4, 2, 'C');
+ assertEquals("-AD-CC--EF---", sq.getSequenceAsString());
+ assertEquals(13, sq.findPosition(9)); // F13
+ // cursor should now be at [F 10]
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor);
+ }
}