import java.util.ListIterator;
import java.util.Vector;
-import com.stevesoft.pat.Regex;
-
import fr.orsay.lri.varna.models.rna.RNA;
/**
*
- * Implements the SequenceI interface for a char[] based sequence object.
- *
- * @author $author$
- * @version $Revision$
+ * Implements the SequenceI interface for a char[] based sequence object
*/
public class Sequence extends ASequence implements SequenceI
{
- private static final Regex limitrx = new Regex(
- "[/][0-9]{1,}[-][0-9]{1,}$");
-
- private static final Regex endrx = new Regex("[0-9]{1,}$");
-
SequenceI datasetSequence;
String name;
*/
int index = -1;
- private SequenceFeatures sequenceFeatureStore;
+ private SequenceFeaturesI sequenceFeatureStore;
/*
* A cursor holding the approximate current view position to the sequence,
checkValidRange();
}
+ /**
+ * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
+ * start and end respectively and removes the suffix from the name
+ */
void parseId()
{
if (name == null)
"POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
name = "";
}
- // Does sequence have the /start-end signature?
- if (limitrx.search(name))
+ int slashPos = name.lastIndexOf('/');
+ if (slashPos > -1 && slashPos < name.length() - 1)
{
- name = limitrx.left();
- endrx.search(limitrx.stringMatched());
- setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
- endrx.matchedFrom() - 1)));
- setEnd(Integer.parseInt(endrx.stringMatched()));
+ String suffix = name.substring(slashPos + 1);
+ String[] range = suffix.split("-");
+ if (range.length == 2)
+ {
+ try
+ {
+ int from = Integer.valueOf(range[0]);
+ int to = Integer.valueOf(range[1]);
+ if (from > 0 && to >= from)
+ {
+ name = name.substring(0, slashPos);
+ setStart(from);
+ setEnd(to);
+ checkValidRange();
+ }
+ } catch (NumberFormatException e)
+ {
+ // leave name unchanged if suffix is invalid
+ }
+ }
}
}
+ /**
+ * Ensures that 'end' is not before the end of the sequence, that is,
+ * (end-start+1) is at least as long as the count of ungapped positions. Note
+ * that end is permitted to be beyond the end of the sequence data.
+ */
void checkValidRange()
{
// Note: JAL-774 :
int endRes = 0;
for (int j = 0; j < sequence.length; j++)
{
- if (!jalview.util.Comparison.isGap(sequence[j]))
+ if (!Comparison.isGap(sequence[j]))
{
endRes++;
}
}
/**
- * DOCUMENT ME!
+ * Sets the sequence name. If the name ends in /start-end, then the start-end
+ * values are parsed out and set, and the suffix is removed from the name.
*
- * @param name
- * DOCUMENT ME!
+ * @param theName
*/
@Override
- public void setName(String name)
+ public void setName(String theName)
{
- this.name = name;
+ this.name = theName;
this.parseId();
}
* preserve end residue column provided cursor was valid
*/
int endColumn = isValidCursor(cursor) ? cursor.lastColumnPosition : 0;
+
if (residuePos == this.end)
{
endColumn = column;
/*
* move left or right to find pos from hint.position
*/
- int col = curs.columnPosition - 1; // convert from base 1 to 0-based array
- // index
+ int col = curs.columnPosition - 1; // convert from base 1 to base 0
int newPos = curs.residuePosition;
int delta = newPos > pos ? -1 : 1;
// the very large sequence case
int eindex = -1, sindex = -1;
boolean ecalc = false, scalc = false;
- for (int s = i; s < j; s++)
+ for (int s = i; s < j && s < sequence.length; s++)
{
- if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
+ if (!Comparison.isGap(sequence[s]))
{
if (createNewDs)
{
List<SequenceFeature> result = getFeatures().findFeatures(startPos,
endPos, types);
+ if (datasetSequence != null)
+ {
+ result = datasetSequence.getFeatures().findFeatures(startPos, endPos,
+ types);
+ }
+ else
+ {
+ result = sequenceFeatureStore.findFeatures(startPos, endPos, types);
+ }
/*
* if end column is gapped, endPos may be to the right,
* and we may have included adjacent or enclosing features;
* remove any that are not enclosing, non-contact features
*/
- if (endPos > this.end || Comparison.isGap(sequence[toColumn - 1]))
+ boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
+ && Comparison.isGap(sequence[toColumn - 1]);
+ if (endPos > this.end || endColumnIsGapped)
{
ListIterator<SequenceFeature> it = result.listIterator();
while (it.hasNext())
assertEquals(13, sq.findIndex(99));
}
+ @Test(groups = { "Functional" })
+ public void testFindPositions()
+ {
+ SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
+
+ /*
+ * invalid inputs
+ */
+ assertNull(sq.findPositions(6, 5));
+ assertNull(sq.findPositions(0, 5));
+ assertNull(sq.findPositions(-1, 5));
+
+ /*
+ * all gapped ranges
+ */
+ assertNull(sq.findPositions(1, 1)); // 1-based columns
+ assertNull(sq.findPositions(5, 5));
+ assertNull(sq.findPositions(5, 6));
+ assertNull(sq.findPositions(5, 7));
+
+ /*
+ * all ungapped ranges
+ */
+ assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
+ assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
+ assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
+ assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
+
+ /*
+ * gap to ungapped range
+ */
+ assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
+ assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
+
+ /*
+ * ungapped to gapped range
+ */
+ assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
+ assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
+
+ /*
+ * ungapped to ungapped enclosing gaps
+ */
+ assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
+ assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
+
+ /*
+ * gapped to gapped enclosing ungapped
+ */
+ assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
+ assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
+ assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
+ assertEquals(new Range(8, 13), sq.findPositions(1, 99));
+ }
+
/**
* Tests for the method that returns a dataset sequence position (start..) for
* an aligned column position (base 0).
assertEquals("test:Pos13:Col10:startCol3:endCol10:tok0",
PA.getValue(sq, "cursor").toString());
sq.sequenceChanged();
- assertEquals(12, sq.findPosition(8));
- cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(12, sq.findPosition(8)); // E12
// sequenceChanged() invalidates cursor.lastResidueColumn
cursor = (SequenceCursor) PA.getValue(sq, "cursor");
assertEquals("test:Pos12:Col9:startCol3:endCol0:tok1",
assertEquals(6, sq.getEnd());
assertNull(PA.getValue(sq, "datasetSequence"));
+ sq = new Sequence("test", "ABCDE");
+ sq.deleteChars(0, 3);
+ assertEquals("DE", sq.getSequenceAsString());
+ assertEquals(4, sq.getStart());
+ assertEquals(5, sq.getEnd());
+ assertNull(PA.getValue(sq, "datasetSequence"));
+
/*
* delete at end
*/
assertEquals(1, sq.getStart());
assertEquals(4, sq.getEnd());
assertNull(PA.getValue(sq, "datasetSequence"));
+
+ /*
+ * delete more positions than there are
+ */
+ sq = new Sequence("test/8-11", "ABCD");
+ sq.deleteChars(0, 99);
+ assertEquals("", sq.getSequenceAsString());
+ assertEquals(12, sq.getStart()); // = findPosition(99) ?!?
+ assertEquals(11, sq.getEnd());
+
+ sq = new Sequence("test/8-11", "----");
+ sq.deleteChars(0, 99); // ArrayIndexOutOfBoundsException <= 2.10.2
+ assertEquals("", sq.getSequenceAsString());
+ assertEquals(8, sq.getStart());
+ assertEquals(11, sq.getEnd());
}
@Test(groups = { "Functional" })
// cursor should now be at [D 6]
cursor = (SequenceCursor) PA.getValue(sq, "cursor");
assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
+ assertEquals(0, cursor.lastColumnPosition); // not yet found
+ assertEquals(13, sq.findPosition(8)); // E13
+ cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+ assertEquals(9, cursor.lastColumnPosition); // found
/*
* deleteChars should invalidate the cached cursor
assertEquals(".K..BCD.EF..", sq.getSequenceAsString());
assertEquals(2, PA.getValue(sq, "changeCount"));
}
+
+ @Test(groups = { "Functional" })
- public void testFindPositions()
- {
- SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
-
- /*
- * invalid inputs
- */
- assertNull(sq.findPositions(6, 5));
- assertNull(sq.findPositions(0, 5));
- assertNull(sq.findPositions(-1, 5));
-
- /*
- * all gapped ranges
- */
- assertNull(sq.findPositions(1, 1)); // 1-based columns
- assertNull(sq.findPositions(5, 5));
- assertNull(sq.findPositions(5, 6));
- assertNull(sq.findPositions(5, 7));
-
- /*
- * all ungapped ranges
- */
- assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
- assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
- assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
- assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
-
- /*
- * gap to ungapped range
- */
- assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
- assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
-
- /*
- * ungapped to gapped range
- */
- assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
- assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
-
- /*
- * ungapped to ungapped enclosing gaps
- */
- assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
- assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
-
- /*
- * gapped to gapped enclosing ungapped
- */
- assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
- assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
- assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
- assertEquals(new Range(8, 13), sq.findPositions(1, 99));
- }
-
- @Test(groups = { "Functional" })
+ public void testFindFeatures_largeEndPos()
+ {
+ /*
+ * imitate a PDB sequence where end is larger than end position
+ */
+ SequenceI sq = new Sequence("test", "-ABC--DEF--", 1, 20);
+ sq.createDatasetSequence();
+
+ assertTrue(sq.findFeatures(1, 9).isEmpty());
+ // should be no array bounds exception - JAL-2772
+ assertTrue(sq.findFeatures(1, 15).isEmpty());
+
+ // add feature on BCD
+ SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 2, 4, 2f,
+ null);
+ sq.addSequenceFeature(sfBCD);
+
+ // no features in columns 1-2 (-A)
+ List<SequenceFeature> found = sq.findFeatures(1, 2);
+ assertTrue(found.isEmpty());
+
+ // columns 1-6 (-ABC--) includes BCD
+ found = sq.findFeatures(1, 6);
+ assertEquals(1, found.size());
+ assertTrue(found.contains(sfBCD));
+
+ // columns 10-11 (--) should find nothing
+ found = sq.findFeatures(10, 11);
+ assertEquals(0, found.size());
+ }
+
+ @Test(groups = { "Functional" })
+ public void testSetName()
+ {
+ SequenceI sq = new Sequence("test", "-ABC---DE-F--");
+ assertEquals("test", sq.getName());
+ assertEquals(1, sq.getStart());
+ assertEquals(6, sq.getEnd());
+
+ sq.setName("testing");
+ assertEquals("testing", sq.getName());
+
+ sq.setName("test/8-10");
+ assertEquals("test", sq.getName());
+ assertEquals(8, sq.getStart());
+ assertEquals(13, sq.getEnd()); // note end is recomputed
+
+ sq.setName("testing/7-99");
+ assertEquals("testing", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd()); // end may be beyond physical end
+
+ sq.setName("/2-3");
+ assertEquals("", sq.getName());
+ assertEquals(2, sq.getStart());
+ assertEquals(7, sq.getEnd());
+
+ sq.setName("test/"); // invalid
+ assertEquals("test/", sq.getName());
+ assertEquals(2, sq.getStart());
+ assertEquals(7, sq.getEnd());
+
+ sq.setName("test/6-13/7-99");
+ assertEquals("test/6-13", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/0-5"); // 0 is invalid - ignored
+ assertEquals("test/0-5", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/a-5"); // a is invalid - ignored
+ assertEquals("test/a-5", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/6-5"); // start > end is invalid - ignored
+ assertEquals("test/6-5", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/5"); // invalid - ignored
+ assertEquals("test/5", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/-5"); // invalid - ignored
+ assertEquals("test/-5", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/5-"); // invalid - ignored
+ assertEquals("test/5-", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName("test/5-6-7"); // invalid - ignored
+ assertEquals("test/5-6-7", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+
+ sq.setName(null); // invalid, gets converted to space
+ assertEquals("", sq.getName());
+ assertEquals(7, sq.getStart());
+ assertEquals(99, sq.getEnd());
+ }
+
+ @Test(groups = { "Functional" })
+ public void testCheckValidRange()
+ {
+ Sequence sq = new Sequence("test/7-12", "-ABC---DE-F--");
+ assertEquals(7, sq.getStart());
+ assertEquals(12, sq.getEnd());
+
+ /*
+ * checkValidRange ensures end is at least the last residue position
+ */
+ PA.setValue(sq, "end", 2);
+ sq.checkValidRange();
+ assertEquals(12, sq.getEnd());
+
+ /*
+ * end may be beyond the last residue position
+ */
+ PA.setValue(sq, "end", 22);
+ sq.checkValidRange();
+ assertEquals(22, sq.getEnd());
+ }
}
assertEquals(features.size(), 2);
assertTrue(features.contains(sf2));
assertTrue(features.contains(sf3));
+
+ features = store.getFeaturesByOntology("sequence_variant");
+ assertTrue(features.isEmpty());
}
@Test(groups = "Functional")
public void testShiftFeatures()
{
SequenceFeatures store = new SequenceFeatures();
- assertFalse(store.shiftFeatures(1));
+ assertFalse(store.shiftFeatures(0, 1));
SequenceFeature sf1 = new SequenceFeature("Cath", "", 2, 5, 0f, null);
store.add(sf1);
/*
* shift features right by 5
*/
- assertTrue(store.shiftFeatures(5));
+ assertTrue(store.shiftFeatures(0, 5));
// non-positional features untouched:
List<SequenceFeature> nonPos = store.getNonPositionalFeatures();
* feature at [7-10] should be removed
* feature at [13-19] should become [1-4]
*/
- assertTrue(store.shiftFeatures(-15));
+ assertTrue(store.shiftFeatures(0, -15));
pos = store.getPositionalFeatures();
assertEquals(pos.size(), 2);
SequenceFeatures.sortFeatures(pos, true);
assertEquals(pos.get(1).getBegin(), 13);
assertEquals(pos.get(1).getEnd(), 22);
assertEquals(pos.get(1).getType(), "Disulfide bond");
+
+ /*
+ * shift right by 4 from column 2
+ * feature at [1-4] should be unchanged
+ * feature at [13-22] should become [17-26]
+ */
+ assertTrue(store.shiftFeatures(2, 4));
+ pos = store.getPositionalFeatures();
+ assertEquals(pos.size(), 2);
+ SequenceFeatures.sortFeatures(pos, true);
+ assertEquals(pos.get(0).getBegin(), 1);
+ assertEquals(pos.get(0).getEnd(), 4);
+ assertEquals(pos.get(0).getType(), "Metal");
+ assertEquals(pos.get(1).getBegin(), 17);
+ assertEquals(pos.get(1).getEnd(), 26);
+ assertEquals(pos.get(1).getType(), "Disulfide bond");
+
+ /*
+ * shift right from column 18
+ * should be no updates
+ */
+ SequenceFeature f1 = pos.get(0);
+ SequenceFeature f2 = pos.get(1);
+ assertFalse(store.shiftFeatures(18, 6));
+ pos = store.getPositionalFeatures();
+ assertEquals(pos.size(), 2);
+ SequenceFeatures.sortFeatures(pos, true);
+ assertSame(pos.get(0), f1);
+ assertSame(pos.get(1), f2);
}
@Test(groups = "Functional")
assertTrue(store.isOntologyTerm("junk", new String[] {}));
assertTrue(store.isOntologyTerm("junk", (String[]) null));
}
+
+ @Test(groups = "Functional")
+ public void testDeleteAll()
+ {
+ SequenceFeaturesI store = new SequenceFeatures();
+ assertFalse(store.hasFeatures());
+ store.deleteAll();
+ assertFalse(store.hasFeatures());
+ store.add(new SequenceFeature("Cath", "Desc", 12, 20, 0f, "Group"));
+ store.add(new SequenceFeature("Pfam", "Desc", 6, 12, 2f, "Group2"));
+ assertTrue(store.hasFeatures());
+ store.deleteAll();
+ assertFalse(store.hasFeatures());
+ }
}