From a774a016b6c07368e9e99d4568199f768a77d42f Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 22 Sep 2017 15:03:07 +0100 Subject: [PATCH] JAL-2738 correct handling of reverse strand genes --- src/jalview/datamodel/GeneLoci.java | 78 ++++++++------------------ src/jalview/datamodel/Sequence.java | 5 +- src/jalview/ext/ensembl/EnsemblGene.java | 6 +- src/jalview/ext/ensembl/EnsemblMap.java | 43 +++++++++++++-- src/jalview/gui/AlignFrame.java | 2 +- src/jalview/io/vcf/VCFLoader.java | 89 +++++++++++++++++++++++------- 6 files changed, 135 insertions(+), 88 deletions(-) diff --git a/src/jalview/datamodel/GeneLoci.java b/src/jalview/datamodel/GeneLoci.java index 9f3520f..b4bd642 100644 --- a/src/jalview/datamodel/GeneLoci.java +++ b/src/jalview/datamodel/GeneLoci.java @@ -8,73 +8,39 @@ import jalview.util.MapList; public class GeneLoci { /* - * implemented as an adapter over DBRefEntry with - * source -> species id - * version -> reference - * accession -> chromosome + * an identifier for the species */ - private DBRefEntry loci; + public final String species; - boolean forwardStrand; - - /** - * Constructor - * - * @param taxon - * @param ref - * @param chrId - * @param map - * @param forward - */ - public GeneLoci(String taxon, String ref, String chrId, MapList map, - boolean forward) - { - loci = new DBRefEntry(taxon, ref, chrId, new Mapping(map)); - forwardStrand = forward; - } - - /** - * Answers the identifier for the species - * - * @return + /* + * an identifier for a genome assembly, e.g. GRCh38 */ - public String getSpecies() - { - return loci.getSource(); - } + public final String assembly; - /** - * Answers the identifier for the genomic reference assembly + /* + * a chromosome identifier, e.g. "5" or "X" */ - public String getReference() - { - return loci.getVersion(); - } + public final String chromosome; - /** - * Answers the chromosome identifier - * - * @return + /* + * mapping from sequence positions to chromosome locations; + * any regions with start > end are on the reverse strand */ - public String getChromosome() - { - return loci.getAccessionId(); - } + public final MapList mapping; /** - * Answers the mapping from sequence positions (in sequence start..end - * coordinates) to the corresponding loci in the chromosome (in reference - * assembly coordinates, base 1) + * Constructor * - * @return + * @param taxon + * @param ref + * @param chrId + * @param map */ - public MapList getMapping() - { - return loci.getMap().getMap(); - } - - public boolean isForwardStrand() + public GeneLoci(String taxon, String ref, String chrId, MapList map) { - return forwardStrand; + species = taxon; + assembly = ref; + chromosome = chrId; + mapping = map; } } diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index cf1cf94..a619d84 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -679,9 +679,10 @@ public class Sequence extends ASequence implements SequenceI boolean forwardStrand = "1".equals(tokens[5]); String species = ""; // dunno yet! int[] from = new int[] { start, end }; - int[] to = new int[] { chStart, chEnd }; + int[] to = new int[] { forwardStrand ? chStart : chEnd, + forwardStrand ? chEnd : chStart }; MapList map = new MapList(from, to, 1, 1); - GeneLoci gl = new GeneLoci(species, ref, chrom, map, forwardStrand); + GeneLoci gl = new GeneLoci(species, ref, chrom, map); setGeneLoci(gl); } catch (NumberFormatException e) { diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 32b7c7c..f975ac8 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -439,7 +439,7 @@ public class EnsemblGene extends EnsemblSeqProxy * patch to ensure gene to chromosome mapping is complete * (in case created before gene length was known) */ - MapList geneMapping = loci.getMapping(); + MapList geneMapping = loci.mapping; if (geneMapping.getFromRanges().get(0)[1] == 0) { geneMapping.getFromRanges().get(0)[0] = gene.getStart(); @@ -456,8 +456,8 @@ public class EnsemblGene extends EnsemblSeqProxy List transcriptRange = Arrays.asList(new int[] { transcript.getStart(), transcript.getEnd() }); MapList mapList = new MapList(transcriptRange, transcriptLoci, 1, 1); - GeneLoci gl = new GeneLoci(loci.getSpecies(), loci.getReference(), - loci.getChromosome(), mapList, loci.isForwardStrand()); + GeneLoci gl = new GeneLoci(loci.species, loci.assembly, + loci.chromosome, mapList); transcript.setGeneLoci(gl); } diff --git a/src/jalview/ext/ensembl/EnsemblMap.java b/src/jalview/ext/ensembl/EnsemblMap.java index d522ea8..05cc897 100644 --- a/src/jalview/ext/ensembl/EnsemblMap.java +++ b/src/jalview/ext/ensembl/EnsemblMap.java @@ -48,13 +48,35 @@ public class EnsemblMap extends EnsemblRestClient return null; // not used } + /** + * Constructs a URL of the format + * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json + * + * + * @param species + * @param chromosome + * @param fromRef + * @param toRef + * @param startPos + * @param endPos + * @return + * @throws MalformedURLException + */ protected URL getUrl(String species, String chromosome, String fromRef, String toRef, int startPos, int endPos) throws MalformedURLException { - String url = getDomain() + "/map/" + species + "/" + fromRef + "/" - + chromosome + ":" + startPos + ".." + endPos + ":1/" + toRef - + "?content-type=application/json"; + /* + * start-end might be reverse strand - present forwards to the service + */ + boolean forward = startPos <= endPos; + int start = forward ? startPos : endPos; + int end = forward ? endPos : startPos; + String strand = forward ? "1" : "-1"; + String url = String.format( + "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json", + getDomain(), species, fromRef, chromosome, start, end, strand, + toRef); try { return new URL(url); @@ -98,6 +120,7 @@ public class EnsemblMap extends EnsemblRestClient { url = getUrl(species, chromosome, fromRef, toRef, queryRange[0], queryRange[1]); + // System.out.println("Calling " + url); br = getHttpResponse(url, null); return (parseResponse(br)); } catch (Throwable t) @@ -138,9 +161,17 @@ public class EnsemblMap extends EnsemblRestClient // todo check for "mapped" JSONObject val = (JSONObject) rvals.next(); JSONObject mapped = (JSONObject) val.get("mapped"); - String start = mapped.get("start").toString(); - String end = mapped.get("end").toString(); - result = new int[] { Integer.parseInt(start), Integer.parseInt(end) }; + int start = Integer.parseInt(mapped.get("start").toString()); + int end = Integer.parseInt(mapped.get("end").toString()); + String strand = mapped.get("strand").toString(); + if ("1".equals(strand)) + { + result = new int[] { start, end }; + } + else + { + result = new int[] { end, start }; + } } } catch (IOException | ParseException | NumberFormatException e) { diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 3ea5481..95cabcd 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -5602,7 +5602,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { String choice = chooser.getSelectedFile().getPath(); Cache.setProperty("LAST_DIRECTORY", choice); - new VCFLoader(viewport.getAlignment()).loadVCF(choice); + new VCFLoader(viewport.getAlignment()).loadVCF(choice, this); } } diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 48f55d4..abbe139 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -7,6 +7,7 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import jalview.analysis.AlignmentUtils; +import jalview.api.AlignViewControllerGuiI; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.GeneLoci; @@ -19,6 +20,7 @@ import jalview.ext.htsjdk.VCFReader; import jalview.io.gff.SequenceOntologyI; import jalview.util.MapList; +import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -67,14 +69,15 @@ public class VCFLoader * instances of this class. * * @param filePath + * @param status */ - public void loadVCF(String filePath) + public void loadVCF(String filePath, AlignViewControllerGuiI status) { VCFReader reader = null; try { - long start = System.currentTimeMillis(); + // long start = System.currentTimeMillis(); reader = new VCFReader(filePath); VCFHeader header = reader.getFileHeader(); @@ -101,16 +104,29 @@ public class VCFLoader computePeptideVariants(seq); } } - long elapsed = System.currentTimeMillis() - start; - System.out.println(String.format( - "Added %d VCF variants to %d sequence(s) (%dms)", varCount, - seqCount, elapsed)); - - reader.close(); + // long elapsed = System.currentTimeMillis() - start; + String msg = String.format("Added %d VCF variants to %d sequence(s)", + varCount, seqCount); + if (status != null) + { + status.setStatus(msg); + } } catch (Throwable e) { System.err.println("Error processing VCF: " + e.getMessage()); e.printStackTrace(); + } finally + { + if (reader != null) + { + try + { + reader.close(); + } catch (IOException e) + { + // ignore + } + } } } @@ -164,8 +180,7 @@ public class VCFLoader return 0; } - MapList mapping = seqCoords.getMapping(); - List seqChromosomalContigs = mapping.getToRanges(); + List seqChromosomalContigs = seqCoords.mapping.getToRanges(); for (int[] range : seqChromosomalContigs) { count += addVcfVariants(seq, reader, range, isVcfRefGrch37); @@ -193,9 +208,9 @@ public class VCFLoader { GeneLoci seqCoords = ((Sequence) seq).getGeneLoci(); - String chromosome = seqCoords.getChromosome(); - String seqRef = seqCoords.getReference(); - String species = seqCoords.getSpecies(); + String chromosome = seqCoords.chromosome; + String seqRef = seqCoords.assembly; + String species = seqCoords.species; // TODO handle species properly if ("".equals(species)) @@ -228,12 +243,15 @@ public class VCFLoader /* * query the VCF for overlaps + * (convert a reverse strand range to forwards) */ int count = 0; - MapList mapping = seqCoords.getMapping(); + MapList mapping = seqCoords.mapping; + int fromLocus = Math.min(range[0], range[1]); + int toLocus = Math.max(range[0], range[1]); CloseableIterator variants = reader.query(chromosome, - range[0], range[1]); + fromLocus, toLocus); while (variants.hasNext()) { /* @@ -368,8 +386,7 @@ public class VCFLoader /* * save mapping for possible future re-use */ - String key = species + EXCL + chromosome + EXCL + fromRef + EXCL - + toRef; + String key = makeRangesKey(chromosome, species, fromRef, toRef); if (!assemblyMappings.containsKey(key)) { assemblyMappings.put(key, new HashMap()); @@ -404,8 +421,7 @@ public class VCFLoader protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome, String species, String fromRef, String toRef) { - String key = species + EXCL + chromosome + EXCL + fromRef + EXCL - + toRef; + String key = makeRangesKey(chromosome, species, fromRef, toRef); if (assemblyMappings.containsKey(key)) { Map mappedRanges = assemblyMappings.get(key); @@ -418,7 +434,7 @@ public class VCFLoader /* * mapping is 1:1 in length, so we trust it to have no discontinuities */ - if (fromRange[0] <= queryRange[0] && fromRange[1] >= queryRange[1]) + if (rangeContains(fromRange, queryRange)) { /* * fromRange subsumes our query range @@ -433,4 +449,37 @@ public class VCFLoader } return null; } + + /** + * Answers true if range's start-end positions include those of queryRange, + * where either range might be in reverse direction, else false + * + * @param range + * @param queryRange + * @return + */ + protected static boolean rangeContains(int[] range, int[] queryRange) + { + int min = Math.min(range[0], range[1]); + int max = Math.max(range[0], range[1]); + + return (min <= queryRange[0] && max >= queryRange[0] + && min <= queryRange[1] && max >= queryRange[1]); + } + + /** + * Formats a ranges map lookup key + * + * @param chromosome + * @param species + * @param fromRef + * @param toRef + * @return + */ + protected static String makeRangesKey(String chromosome, String species, + String fromRef, String toRef) + { + return species + EXCL + chromosome + EXCL + fromRef + EXCL + + toRef; + } } -- 1.7.10.2