From 353511c16c62e2f5a0bef407646b3f47dbb9e68c Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 24 Oct 2017 17:01:49 +0100 Subject: [PATCH] JAL-2743 look up species and gene loci for Ensembl gene --- src/jalview/ext/ensembl/EnsemblGene.java | 77 +++++++++++++++--------- src/jalview/ext/ensembl/EnsemblLookup.java | 88 ++++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 32 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index afff4c2..11322c8 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -147,10 +147,9 @@ public class EnsemblGene extends EnsemblSeqProxy continue; } - parseChromosomeLocations(geneAlignment); - if (geneAlignment.getHeight() == 1) { + findGeneLoci(geneAlignment.getSequenceAt(0), geneId); getTranscripts(geneAlignment, geneId); } if (al == null) @@ -166,42 +165,64 @@ public class EnsemblGene extends EnsemblSeqProxy } /** + * Calls the /lookup/id REST service, parses the response for gene + * coordinates, and if successful, adds these to the sequence + * + * @param seq + * @param geneId + */ + void findGeneLoci(SequenceI seq, String geneId) + { + /* + * if sequence description is in gene loci format, parse this + * - but try remote lookup anyway as 'better' if available + */ + parseChromosomeLocations(seq); + + GeneLociI geneLoci = new EnsemblLookup(getDomain()).getGeneLoci(geneId); + if (geneLoci != null) + { + seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(), + geneLoci.getChromosomeId(), geneLoci.getMap()); + } + } + + /** * Parses and saves fields of an Ensembl-style description e.g. * chromosome:GRCh38:17:45051610:45109016:1 * - * @param alignment + * @param seq */ - private void parseChromosomeLocations(AlignmentI alignment) + boolean parseChromosomeLocations(SequenceI seq) { - for (SequenceI seq : alignment.getSequences()) + String description = seq.getDescription(); + if (description == null) + { + return false; + } + String[] tokens = description.split(":"); + if (tokens.length == 6 && tokens[0].startsWith(DBRefEntry.CHROMOSOME)) { - String description = seq.getDescription(); - if (description == null) + String ref = tokens[1]; + String chrom = tokens[2]; + try { - continue; - } - String[] tokens = description.split(":"); - if (tokens.length == 6 && tokens[0].startsWith(DBRefEntry.CHROMOSOME)) + int chStart = Integer.parseInt(tokens[3]); + int chEnd = Integer.parseInt(tokens[4]); + boolean forwardStrand = "1".equals(tokens[5]); + String species = ""; // not known here + int[] from = new int[] { seq.getStart(), seq.getEnd() }; + int[] to = new int[] { forwardStrand ? chStart : chEnd, + forwardStrand ? chEnd : chStart }; + MapList map = new MapList(from, to, 1, 1); + seq.setGeneLoci(species, ref, chrom, map); + return true; + } catch (NumberFormatException e) { - String ref = tokens[1]; - String chrom = tokens[2]; - try - { - int chStart = Integer.parseInt(tokens[3]); - int chEnd = Integer.parseInt(tokens[4]); - boolean forwardStrand = "1".equals(tokens[5]); - String species = ""; // dunno yet! - int[] from = new int[] { seq.getStart(), seq.getEnd() }; - int[] to = new int[] { forwardStrand ? chStart : chEnd, - forwardStrand ? chEnd : chStart }; - MapList map = new MapList(from, to, 1, 1); - seq.setGeneLoci(species, ref, chrom, map); - } catch (NumberFormatException e) - { - System.err.println("Bad integers in description " + description); - } + System.err.println("Bad integers in description " + description); } } + return false; } /** diff --git a/src/jalview/ext/ensembl/EnsemblLookup.java b/src/jalview/ext/ensembl/EnsemblLookup.java index 5a616f0..0d1b554 100644 --- a/src/jalview/ext/ensembl/EnsemblLookup.java +++ b/src/jalview/ext/ensembl/EnsemblLookup.java @@ -20,13 +20,17 @@ */ package jalview.ext.ensembl; +import jalview.bin.Cache; import jalview.datamodel.AlignmentI; +import jalview.datamodel.GeneLociI; +import jalview.util.MapList; import java.io.BufferedReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.function.Function; @@ -137,7 +141,7 @@ public class EnsemblLookup extends EnsemblRestClient */ public String getGeneId(String identifier) { - return getResult(identifier, br -> parseGeneId(br)); + return (String) getResult(identifier, br -> parseGeneId(br)); } /** @@ -149,7 +153,7 @@ public class EnsemblLookup extends EnsemblRestClient */ public String getSpecies(String identifier) { - return getResult(identifier, br -> getAttribute(br, SPECIES)); + return (String) getResult(identifier, br -> getAttribute(br, SPECIES)); } /** @@ -160,8 +164,8 @@ public class EnsemblLookup extends EnsemblRestClient * @param parser * @return */ - protected String getResult(String identifier, - Function parser) + protected Object getResult(String identifier, + Function parser) { List ids = Arrays.asList(new String[] { identifier }); @@ -265,4 +269,80 @@ public class EnsemblLookup extends EnsemblRestClient return geneId; } + /** + * Calls the /lookup/id rest service for the given id, and if successful, + * parses and returns the gene's chromosomal coordinates + * + * @param geneId + * @return + */ + public GeneLociI getGeneLoci(String geneId) + { + return (GeneLociI) getResult(geneId, br -> parseGeneLoci(br)); + } + + /** + * Parses the /lookup/id response for species, asssembly_name, + * seq_region_name, start, end and returns an object that wraps them, or null + * if unsuccessful + * + * @param br + * @return + */ + GeneLociI parseGeneLoci(BufferedReader br) + { + JSONParser jp = new JSONParser(); + try + { + JSONObject val = (JSONObject) jp.parse(br); + final String species = val.get("species").toString(); + final String assembly = val.get("assembly_name").toString(); + final String chromosome = val.get("seq_region_name").toString(); + String strand = val.get("strand").toString(); + int start = Integer.parseInt(val.get("start").toString()); + int end = Integer.parseInt(val.get("end").toString()); + int fromEnd = end - start + 1; + boolean reverseStrand = "-1".equals(strand); + int toStart = reverseStrand ? end : start; + int toEnd = reverseStrand ? start : end; + List fromRange = Collections.singletonList(new int[] { 1, + fromEnd }); + List toRange = Collections.singletonList(new int[] { toStart, + toEnd }); + final MapList map = new MapList(fromRange, toRange, 1, 1); + return new GeneLociI() + { + + @Override + public String getSpeciesId() + { + return species == null ? "" : species; + } + + @Override + public String getAssemblyId() + { + return assembly; + } + + @Override + public String getChromosomeId() + { + return chromosome; + } + + @Override + public MapList getMap() + { + return map; + } + }; + } catch (ParseException | NullPointerException | IOException + | NumberFormatException | ClassCastException e) + { + Cache.log.error("Error looking up gene loci: " + e.getMessage()); + } + return null; + } + } -- 1.7.10.2