JAL-2114 parser + tests for GenBank location descriptors
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 19 May 2016 16:01:14 +0000 (17:01 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 19 May 2016 16:01:14 +0000 (17:01 +0100)
src/jalview/util/DnaUtils.java [new file with mode: 0644]
test/jalview/util/DnaUtilsTest.java [new file with mode: 0644]

diff --git a/src/jalview/util/DnaUtils.java b/src/jalview/util/DnaUtils.java
new file mode 100644 (file)
index 0000000..639eb8e
--- /dev/null
@@ -0,0 +1,131 @@
+package jalview.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class DnaUtils
+{
+
+  /**
+   * Parses an ENA/GenBank format location specifier and returns a list of
+   * [start, end] ranges. Returns null if not able to parse.
+   * 
+   * @param location
+   * @return
+   * @see http://www.insdc.org/files/feature_table.html#3.4
+   */
+  public static List<int[]> parseLocation(String location)
+  {
+    if (location.startsWith("join("))
+    {
+      return parseJoin(location);
+    }
+    else if (location.startsWith("complement("))
+    {
+      return parseComplement(location);
+    }
+    String errorMessage = "Unable to process location specifier: "
+            + location;
+    if (location.startsWith("order("))
+    {
+      System.err.println(errorMessage);
+      return null;
+    }
+    String[] range = location.split("\\.\\.");
+    if (range.length == 2)
+    {
+      try
+      {
+        int start = Integer.valueOf(range[0]);
+        int end = Integer.valueOf(range[1]);
+        return Collections.singletonList(new int[] { start, end });
+      } catch (NumberFormatException e)
+      {
+        /*
+         * could be a location like <1..888 or 1..>888
+         */
+        System.err.println(errorMessage);
+        return null;
+      }
+    }
+    else
+    {
+      /*
+       * could be a location like 102.110 or 123^124
+       */
+      System.err.println(errorMessage);
+      return null;
+    }
+  }
+
+  /**
+   * Parses a complement(locationSpec) into a list of start-end ranges
+   * 
+   * @param location
+   * @return
+   */
+  static List<int[]> parseComplement(String location)
+  {
+    /*
+     * take what is inside complement()
+     */
+    String toComplement = location.substring("complement(".length(),
+            location.length() - 1);
+    List<int[]> ranges = parseLocation(toComplement);
+    if (ranges == null)
+    {
+      /*
+       * something bad in there
+       */
+      return null;
+    }
+
+    /*
+     * reverse the order and direction of ranges
+     */
+    Collections.reverse(ranges);
+    for (int[] range : ranges)
+    {
+      int temp = range[0];
+      range[0] = range[1];
+      range[1] = temp;
+    }
+    return ranges;
+  }
+
+  /**
+   * Parses a join(loc1,loc2,...,locn) into a list of start-end ranges
+   * 
+   * @param location
+   * @return
+   */
+  static List<int[]> parseJoin(String location)
+  {
+    List<int[]> ranges = new ArrayList<int[]>();
+
+    /*
+     * take what is inside join()
+     */
+    String joinedLocs = location.substring("join(".length(),
+            location.length() - 1);
+    String[] locations = joinedLocs.split(",");
+    for (String loc : locations)
+    {
+      List<int[]> range = parseLocation(loc);
+      if (range == null)
+      {
+        /*
+         * something bad in there
+         */
+        return null;
+      }
+      else
+      {
+        ranges.addAll(range);
+      }
+    }
+    return ranges;
+  }
+
+}
diff --git a/test/jalview/util/DnaUtilsTest.java b/test/jalview/util/DnaUtilsTest.java
new file mode 100644 (file)
index 0000000..af76885
--- /dev/null
@@ -0,0 +1,107 @@
+package jalview.util;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.fail;
+
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class DnaUtilsTest
+{
+  /**
+   * Tests for parsing an ENA/GenBank location specifier
+   * 
+   * @see http://www.insdc.org/files/feature_table.html#3.4
+   */
+  @Test(groups = { "Functional" })
+  public void testParseLocation()
+  {
+    /*
+     * simple range
+     */
+    List<int[]> ranges = DnaUtils.parseLocation("12..78");
+    assertEquals(1, ranges.size());
+    assertEquals(12, ranges.get(0)[0]);
+    assertEquals(78, ranges.get(0)[1]);
+
+    /*
+     * join of simple ranges
+     */
+    ranges = DnaUtils.parseLocation("join(12..78,134..202,322..345)");
+    assertEquals(3, ranges.size());
+    assertEquals(12, ranges.get(0)[0]);
+    assertEquals(78, ranges.get(0)[1]);
+    assertEquals(134, ranges.get(1)[0]);
+    assertEquals(202, ranges.get(1)[1]);
+    assertEquals(322, ranges.get(2)[0]);
+    assertEquals(345, ranges.get(2)[1]);
+
+    /*
+     * complement of a simple range
+     */
+    ranges = DnaUtils.parseLocation("complement(34..126)");
+    assertEquals(1, ranges.size());
+    assertEquals(126, ranges.get(0)[0]);
+    assertEquals(34, ranges.get(0)[1]);
+
+    /*
+     * complement of a join
+     */
+    ranges = DnaUtils
+            .parseLocation("complement(join(2691..4571,4918..5163))");
+    assertEquals(2, ranges.size());
+    assertEquals(5163, ranges.get(0)[0]);
+    assertEquals(4918, ranges.get(0)[1]);
+    assertEquals(4571, ranges.get(1)[0]);
+    assertEquals(2691, ranges.get(1)[1]);
+
+    /*
+     * join of two complements
+     */
+    ranges = DnaUtils
+            .parseLocation("join(complement(4918..5163),complement(2691..4571))");
+    assertEquals(2, ranges.size());
+    assertEquals(5163, ranges.get(0)[0]);
+    assertEquals(4918, ranges.get(0)[1]);
+    assertEquals(4571, ranges.get(1)[0]);
+    assertEquals(2691, ranges.get(1)[1]);
+
+    /*
+     * join complement to non-complement
+     * @see http://www.ncbi.nlm.nih.gov/genbank/genomesubmit_annotation/ Transpliced Genes
+     */
+    ranges = DnaUtils
+            .parseLocation("join(complement(36618..36700),86988..87064)");
+    assertEquals(2, ranges.size());
+    assertEquals(36700, ranges.get(0)[0]);
+    assertEquals(36618, ranges.get(0)[1]);
+    assertEquals(86988, ranges.get(1)[0]);
+    assertEquals(87064, ranges.get(1)[1]);
+
+    /*
+     * valid things we don't yet handle
+     */
+    assertNull(DnaUtils.parseLocation("<34..126"));
+    assertNull(DnaUtils.parseLocation("34..>126"));
+    assertNull(DnaUtils.parseLocation("34.126"));
+    assertNull(DnaUtils.parseLocation("34^126"));
+
+    /*
+     * invalid things
+     */
+    assertNull(DnaUtils.parseLocation(""));
+    assertNull(DnaUtils.parseLocation("JOIN(1..2)"));
+    assertNull(DnaUtils.parseLocation("join(1..2"));
+    try
+    {
+      assertNull(DnaUtils.parseLocation(null));
+      fail("Expected exception");
+    } catch (NullPointerException e)
+    {
+      // expected
+    }
+  }
+
+}