--- /dev/null
+package jalview.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class DnaUtils
+{
+
+ /**
+ * Parses an ENA/GenBank format location specifier and returns a list of
+ * [start, end] ranges. Returns null if not able to parse.
+ *
+ * @param location
+ * @return
+ * @see http://www.insdc.org/files/feature_table.html#3.4
+ */
+ public static List<int[]> parseLocation(String location)
+ {
+ if (location.startsWith("join("))
+ {
+ return parseJoin(location);
+ }
+ else if (location.startsWith("complement("))
+ {
+ return parseComplement(location);
+ }
+ String errorMessage = "Unable to process location specifier: "
+ + location;
+ if (location.startsWith("order("))
+ {
+ System.err.println(errorMessage);
+ return null;
+ }
+ String[] range = location.split("\\.\\.");
+ if (range.length == 2)
+ {
+ try
+ {
+ int start = Integer.valueOf(range[0]);
+ int end = Integer.valueOf(range[1]);
+ return Collections.singletonList(new int[] { start, end });
+ } catch (NumberFormatException e)
+ {
+ /*
+ * could be a location like <1..888 or 1..>888
+ */
+ System.err.println(errorMessage);
+ return null;
+ }
+ }
+ else
+ {
+ /*
+ * could be a location like 102.110 or 123^124
+ */
+ System.err.println(errorMessage);
+ return null;
+ }
+ }
+
+ /**
+ * Parses a complement(locationSpec) into a list of start-end ranges
+ *
+ * @param location
+ * @return
+ */
+ static List<int[]> parseComplement(String location)
+ {
+ /*
+ * take what is inside complement()
+ */
+ String toComplement = location.substring("complement(".length(),
+ location.length() - 1);
+ List<int[]> ranges = parseLocation(toComplement);
+ if (ranges == null)
+ {
+ /*
+ * something bad in there
+ */
+ return null;
+ }
+
+ /*
+ * reverse the order and direction of ranges
+ */
+ Collections.reverse(ranges);
+ for (int[] range : ranges)
+ {
+ int temp = range[0];
+ range[0] = range[1];
+ range[1] = temp;
+ }
+ return ranges;
+ }
+
+ /**
+ * Parses a join(loc1,loc2,...,locn) into a list of start-end ranges
+ *
+ * @param location
+ * @return
+ */
+ static List<int[]> parseJoin(String location)
+ {
+ List<int[]> ranges = new ArrayList<int[]>();
+
+ /*
+ * take what is inside join()
+ */
+ String joinedLocs = location.substring("join(".length(),
+ location.length() - 1);
+ String[] locations = joinedLocs.split(",");
+ for (String loc : locations)
+ {
+ List<int[]> range = parseLocation(loc);
+ if (range == null)
+ {
+ /*
+ * something bad in there
+ */
+ return null;
+ }
+ else
+ {
+ ranges.addAll(range);
+ }
+ }
+ return ranges;
+ }
+
+}
--- /dev/null
+package jalview.util;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.fail;
+
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class DnaUtilsTest
+{
+ /**
+ * Tests for parsing an ENA/GenBank location specifier
+ *
+ * @see http://www.insdc.org/files/feature_table.html#3.4
+ */
+ @Test(groups = { "Functional" })
+ public void testParseLocation()
+ {
+ /*
+ * simple range
+ */
+ List<int[]> ranges = DnaUtils.parseLocation("12..78");
+ assertEquals(1, ranges.size());
+ assertEquals(12, ranges.get(0)[0]);
+ assertEquals(78, ranges.get(0)[1]);
+
+ /*
+ * join of simple ranges
+ */
+ ranges = DnaUtils.parseLocation("join(12..78,134..202,322..345)");
+ assertEquals(3, ranges.size());
+ assertEquals(12, ranges.get(0)[0]);
+ assertEquals(78, ranges.get(0)[1]);
+ assertEquals(134, ranges.get(1)[0]);
+ assertEquals(202, ranges.get(1)[1]);
+ assertEquals(322, ranges.get(2)[0]);
+ assertEquals(345, ranges.get(2)[1]);
+
+ /*
+ * complement of a simple range
+ */
+ ranges = DnaUtils.parseLocation("complement(34..126)");
+ assertEquals(1, ranges.size());
+ assertEquals(126, ranges.get(0)[0]);
+ assertEquals(34, ranges.get(0)[1]);
+
+ /*
+ * complement of a join
+ */
+ ranges = DnaUtils
+ .parseLocation("complement(join(2691..4571,4918..5163))");
+ assertEquals(2, ranges.size());
+ assertEquals(5163, ranges.get(0)[0]);
+ assertEquals(4918, ranges.get(0)[1]);
+ assertEquals(4571, ranges.get(1)[0]);
+ assertEquals(2691, ranges.get(1)[1]);
+
+ /*
+ * join of two complements
+ */
+ ranges = DnaUtils
+ .parseLocation("join(complement(4918..5163),complement(2691..4571))");
+ assertEquals(2, ranges.size());
+ assertEquals(5163, ranges.get(0)[0]);
+ assertEquals(4918, ranges.get(0)[1]);
+ assertEquals(4571, ranges.get(1)[0]);
+ assertEquals(2691, ranges.get(1)[1]);
+
+ /*
+ * join complement to non-complement
+ * @see http://www.ncbi.nlm.nih.gov/genbank/genomesubmit_annotation/ Transpliced Genes
+ */
+ ranges = DnaUtils
+ .parseLocation("join(complement(36618..36700),86988..87064)");
+ assertEquals(2, ranges.size());
+ assertEquals(36700, ranges.get(0)[0]);
+ assertEquals(36618, ranges.get(0)[1]);
+ assertEquals(86988, ranges.get(1)[0]);
+ assertEquals(87064, ranges.get(1)[1]);
+
+ /*
+ * valid things we don't yet handle
+ */
+ assertNull(DnaUtils.parseLocation("<34..126"));
+ assertNull(DnaUtils.parseLocation("34..>126"));
+ assertNull(DnaUtils.parseLocation("34.126"));
+ assertNull(DnaUtils.parseLocation("34^126"));
+
+ /*
+ * invalid things
+ */
+ assertNull(DnaUtils.parseLocation(""));
+ assertNull(DnaUtils.parseLocation("JOIN(1..2)"));
+ assertNull(DnaUtils.parseLocation("join(1..2"));
+ try
+ {
+ assertNull(DnaUtils.parseLocation(null));
+ fail("Expected exception");
+ } catch (NullPointerException e)
+ {
+ // expected
+ }
+ }
+
+}