From b9cc00124dff0ad97f8b154f16d14ef530d03120 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 21 Sep 2017 14:13:28 +0100 Subject: [PATCH] JAL-2738 GeneLoci holds mapping of sequence to chromosome --- src/jalview/datamodel/GeneLoci.java | 80 +++++++++++++++++++++++++++++++++++ src/jalview/datamodel/Sequence.java | 57 +++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 src/jalview/datamodel/GeneLoci.java diff --git a/src/jalview/datamodel/GeneLoci.java b/src/jalview/datamodel/GeneLoci.java new file mode 100644 index 0000000..9f3520f --- /dev/null +++ b/src/jalview/datamodel/GeneLoci.java @@ -0,0 +1,80 @@ +package jalview.datamodel; + +import jalview.util.MapList; + +/** + * A data bean to model one or more contiguous regions on one chromosome + */ +public class GeneLoci +{ + /* + * implemented as an adapter over DBRefEntry with + * source -> species id + * version -> reference + * accession -> chromosome + */ + private DBRefEntry loci; + + boolean forwardStrand; + + /** + * Constructor + * + * @param taxon + * @param ref + * @param chrId + * @param map + * @param forward + */ + public GeneLoci(String taxon, String ref, String chrId, MapList map, + boolean forward) + { + loci = new DBRefEntry(taxon, ref, chrId, new Mapping(map)); + forwardStrand = forward; + } + + /** + * Answers the identifier for the species + * + * @return + */ + public String getSpecies() + { + return loci.getSource(); + } + + /** + * Answers the identifier for the genomic reference assembly + */ + public String getReference() + { + return loci.getVersion(); + } + + /** + * Answers the chromosome identifier + * + * @return + */ + public String getChromosome() + { + return loci.getAccessionId(); + } + + /** + * Answers the mapping from sequence positions (in sequence start..end + * coordinates) to the corresponding loci in the chromosome (in reference + * assembly coordinates, base 1) + * + * @return + */ + public MapList getMapping() + { + return loci.getMap().getMap(); + } + + public boolean isForwardStrand() + { + return forwardStrand; + } +} diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 2f1da7f..cf1cf94 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -106,6 +106,8 @@ public class Sequence extends ASequence implements SequenceI */ private int changeCount; + private GeneLoci geneLoci; + /** * Creates a new Sequence object. * @@ -645,21 +647,68 @@ public class Sequence extends ASequence implements SequenceI } /** - * DOCUMENT ME! + * Sets the sequence description, and also parses out any special formats of + * interest * * @param desc - * DOCUMENT ME! */ @Override public void setDescription(String desc) { this.description = desc; + parseDescription(); } /** - * DOCUMENT ME! + * Parses and saves fields of an Ensembl-style description e.g. + * chromosome:GRCh38:17:45051610:45109016:1 + */ + protected void parseDescription() + { + if (description == null) + { + return; + } + String[] tokens = description.split(":"); + if (tokens.length == 6 && "chromosome".equals(tokens[0])) { + String ref = tokens[1]; + String chrom = tokens[2]; + try { + int chStart = Integer.parseInt(tokens[3]); + int chEnd = Integer.parseInt(tokens[4]); + boolean forwardStrand = "1".equals(tokens[5]); + String species = ""; // dunno yet! + int[] from = new int[] { start, end }; + int[] to = new int[] { chStart, chEnd }; + MapList map = new MapList(from, to, 1, 1); + GeneLoci gl = new GeneLoci(species, ref, chrom, map, forwardStrand); + setGeneLoci(gl); + } catch (NumberFormatException e) + { + System.err.println("Bad integers in description " + description); + } + } + } + + public void setGeneLoci(GeneLoci gl) + { + geneLoci = gl; + } + + /** + * Returns the gene loci mapping for the sequence (may be null) * - * @return DOCUMENT ME! + * @return + */ + public GeneLoci getGeneLoci() + { + return geneLoci; + } + + /** + * Answers the description + * + * @return */ @Override public String getDescription() -- 1.7.10.2