From f5e0f36fc8da4fd18f255bf3cc82005a2b94e245 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 23 Aug 2017 11:34:50 +0100 Subject: [PATCH] JAL-2682 non-regex sequence id parsing, and tests added --- src/jalview/datamodel/Sequence.java | 57 +++++++++++------ test/jalview/datamodel/SequenceTest.java | 99 ++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 20 deletions(-) diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 2f1da7f..1b91ee5 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -38,8 +38,6 @@ import java.util.List; import java.util.ListIterator; import java.util.Vector; -import com.stevesoft.pat.Regex; - import fr.orsay.lri.varna.models.rna.RNA; /** @@ -51,11 +49,6 @@ import fr.orsay.lri.varna.models.rna.RNA; */ public class Sequence extends ASequence implements SequenceI { - private static final Regex limitrx = new Regex( - "[/][0-9]{1,}[-][0-9]{1,}$"); - - private static final Regex endrx = new Regex("[0-9]{1,}$"); - SequenceI datasetSequence; String name; @@ -151,6 +144,10 @@ public class Sequence extends ASequence implements SequenceI checkValidRange(); } + /** + * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as + * start and end respectively and removes the suffix from the name + */ void parseId() { if (name == null) @@ -159,17 +156,37 @@ public class Sequence extends ASequence implements SequenceI "POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor."); name = ""; } - // Does sequence have the /start-end signature? - if (limitrx.search(name)) + int slashPos = name.lastIndexOf('/'); + if (slashPos > -1 && slashPos < name.length() - 1) { - name = limitrx.left(); - endrx.search(limitrx.stringMatched()); - setStart(Integer.parseInt(limitrx.stringMatched().substring(1, - endrx.matchedFrom() - 1))); - setEnd(Integer.parseInt(endrx.stringMatched())); + String suffix = name.substring(slashPos + 1); + String[] range = suffix.split("-"); + if (range.length == 2) + { + try + { + int from = Integer.valueOf(range[0]); + int to = Integer.valueOf(range[1]); + if (from > 0 && to >= from) + { + name = name.substring(0, slashPos); + setStart(from); + setEnd(to); + checkValidRange(); + } + } catch (NumberFormatException e) + { + // leave name unchanged if suffix is invalid + } + } } } + /** + * Ensures that 'end' is not before the end of the sequence, that is, + * (end-start+1) is at least as long as the count of ungapped positions. Note + * that end is permitted to be beyond the end of the sequence data. + */ void checkValidRange() { // Note: JAL-774 : @@ -178,7 +195,7 @@ public class Sequence extends ASequence implements SequenceI int endRes = 0; for (int j = 0; j < sequence.length; j++) { - if (!jalview.util.Comparison.isGap(sequence[j])) + if (!Comparison.isGap(sequence[j])) { endRes++; } @@ -453,15 +470,15 @@ public class Sequence extends ASequence implements SequenceI } /** - * DOCUMENT ME! + * Sets the sequence name. If the name ends in /start-end, then the start-end + * values are parsed out and set, and the suffix is removed from the name. * - * @param name - * DOCUMENT ME! + * @param theName */ @Override - public void setName(String name) + public void setName(String theName) { - this.name = name; + this.name = theName; this.parseId(); } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 2f824ca..fce5869 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -1663,4 +1663,103 @@ public class SequenceTest assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot assertEquals(new Range(8, 13), sq.findPositions(1, 99)); } + + @Test(groups = { "Functional" }) + public void testSetName() + { + SequenceI sq = new Sequence("test", "-ABC---DE-F--"); + assertEquals("test", sq.getName()); + assertEquals(1, sq.getStart()); + assertEquals(6, sq.getEnd()); + + sq.setName("testing"); + assertEquals("testing", sq.getName()); + + sq.setName("test/8-10"); + assertEquals("test", sq.getName()); + assertEquals(8, sq.getStart()); + assertEquals(13, sq.getEnd()); // note end is recomputed + + sq.setName("testing/7-99"); + assertEquals("testing", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); // end may be beyond physical end + + sq.setName("/2-3"); + assertEquals("", sq.getName()); + assertEquals(2, sq.getStart()); + assertEquals(7, sq.getEnd()); + + sq.setName("test/"); // invalid + assertEquals("test/", sq.getName()); + assertEquals(2, sq.getStart()); + assertEquals(7, sq.getEnd()); + + sq.setName("test/6-13/7-99"); + assertEquals("test/6-13", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/0-5"); // 0 is invalid - ignored + assertEquals("test/0-5", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/a-5"); // a is invalid - ignored + assertEquals("test/a-5", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/6-5"); // start > end is invalid - ignored + assertEquals("test/6-5", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/5"); // invalid - ignored + assertEquals("test/5", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/-5"); // invalid - ignored + assertEquals("test/-5", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/5-"); // invalid - ignored + assertEquals("test/5-", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName("test/5-6-7"); // invalid - ignored + assertEquals("test/5-6-7", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + + sq.setName(null); // invalid, gets converted to space + assertEquals("", sq.getName()); + assertEquals(7, sq.getStart()); + assertEquals(99, sq.getEnd()); + } + + @Test(groups = { "Functional" }) + public void testCheckValidRange() + { + Sequence sq = new Sequence("test/7-12", "-ABC---DE-F--"); + assertEquals(7, sq.getStart()); + assertEquals(12, sq.getEnd()); + + /* + * checkValidRange ensures end is at least the last residue position + */ + PA.setValue(sq, "end", 2); + sq.checkValidRange(); + assertEquals(12, sq.getEnd()); + + /* + * end may be beyond the last residue position + */ + PA.setValue(sq, "end", 22); + sq.checkValidRange(); + assertEquals(22, sq.getEnd()); + } } -- 1.7.10.2