JAL-1796 parse ensembl genomic coordinates to set start position
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 21 Jan 2016 15:49:08 +0000 (15:49 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 21 Jan 2016 15:49:08 +0000 (15:49 +0000)
src/jalview/io/AlignFile.java

index eb43701..2b8f127 100755 (executable)
@@ -352,7 +352,27 @@ public abstract class AlignFile extends FileParse
     if (space > -1)
     {
       seq = new Sequence(id.substring(0, space), "");
-      seq.setDescription(id.substring(space + 1));
+      String desc = id.substring(space + 1);
+      seq.setDescription(desc);
+
+      if (desc.startsWith("chromosome"))
+      {
+        /*
+         * parse Ensembl style gene description e.g.
+         * chromosome:GRCh38:7:140696688:140721955:1
+         */
+        String[] tokens = desc.split(":");
+        if (tokens.length > 3)
+        {
+          try
+          {
+            seq.setStart(Integer.parseInt(tokens[3]));
+          } catch (NumberFormatException e)
+          {
+            // ignore
+          }
+        }
+      }
     }
     else
     {