From 1f9b82cddbfb470585d5f4b53c101f9ffcb84b5a Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 21 Jan 2016 15:49:08 +0000 Subject: [PATCH] JAL-1796 parse ensembl genomic coordinates to set start position --- src/jalview/io/AlignFile.java | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/jalview/io/AlignFile.java b/src/jalview/io/AlignFile.java index eb43701..2b8f127 100755 --- a/src/jalview/io/AlignFile.java +++ b/src/jalview/io/AlignFile.java @@ -352,7 +352,27 @@ public abstract class AlignFile extends FileParse if (space > -1) { seq = new Sequence(id.substring(0, space), ""); - seq.setDescription(id.substring(space + 1)); + String desc = id.substring(space + 1); + seq.setDescription(desc); + + if (desc.startsWith("chromosome")) + { + /* + * parse Ensembl style gene description e.g. + * chromosome:GRCh38:7:140696688:140721955:1 + */ + String[] tokens = desc.split(":"); + if (tokens.length > 3) + { + try + { + seq.setStart(Integer.parseInt(tokens[3])); + } catch (NumberFormatException e) + { + // ignore + } + } + } } else { -- 1.7.10.2