From dc91411856157cb32a4eb3cb8b3e61de2754475e Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Tue, 29 Mar 2022 23:20:21 +0100 Subject: [PATCH] JAL-3982 JAL-3860 encapsulate genomic coordinates mapping/liftover logic --- src/jalview/datamodel/GenomicAssemblies.java | 64 +++++++++++++++++++++++--- src/jalview/io/vcf/VCFLoader.java | 36 +-------------- 2 files changed, 59 insertions(+), 41 deletions(-) diff --git a/src/jalview/datamodel/GenomicAssemblies.java b/src/jalview/datamodel/GenomicAssemblies.java index 9754b90..13d9a61 100644 --- a/src/jalview/datamodel/GenomicAssemblies.java +++ b/src/jalview/datamodel/GenomicAssemblies.java @@ -1,13 +1,24 @@ package jalview.datamodel; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Map.Entry; +import jalview.bin.Console; import jalview.ext.ensembl.EnsemblMap; import jalview.io.vcf.VCFLoader; +import jalview.util.MapList; import jalview.util.MappingUtils; +/** + * Holds mappings between gemomic assemblies Lazily populated as required from + * Ensembl Liftover and other datasources + * + * @author gmungoc + * + */ public class GenomicAssemblies { @@ -33,7 +44,7 @@ public class GenomicAssemblies * @param toRef * @return */ - public static String makeRangesKey(String chromosome, String species, + private static String makeRangesKey(String chromosome, String species, String fromRef, String toRef) { return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef; @@ -60,8 +71,8 @@ public class GenomicAssemblies * assembly reference we wish to translate to * @return the start-end range in 'toRef' coordinates */ - public static int[] mapReferenceRange(int[] queryRange, String chromosome, - String species, String fromRef, String toRef) + private static int[] mapReferenceRange(int[] queryRange, + String chromosome, String species, String fromRef, String toRef) { /* * first try shorcut of computing the mapping as a subregion of one @@ -90,7 +101,8 @@ public class GenomicAssemblies /* * save mapping for possible future re-use */ - String key = GenomicAssemblies.makeRangesKey(chromosome, species, fromRef, toRef); + String key = GenomicAssemblies.makeRangesKey(chromosome, species, + fromRef, toRef); if (!assemblyMappings.containsKey(key)) { assemblyMappings.put(key, new HashMap()); @@ -122,10 +134,11 @@ public class GenomicAssemblies * @param toRef * @return */ - protected static int[] findSubsumedRangeMapping(int[] queryRange, + private static int[] findSubsumedRangeMapping(int[] queryRange, String chromosome, String species, String fromRef, String toRef) { - String key = GenomicAssemblies.makeRangesKey(chromosome, species, fromRef, toRef); + String key = GenomicAssemblies.makeRangesKey(chromosome, species, + fromRef, toRef); if (assemblyMappings.containsKey(key)) { Map mappedRanges = assemblyMappings.get(key); @@ -155,4 +168,43 @@ public class GenomicAssemblies return null; } + /** + * query Ensembl to map chromosomal coordinates between different + * assemblies
+ * will most likely fail for species other than human + */ + public static MapList mapAssemblyFor(String seqRef, String species, + MapList map, String chromosome, String vcfAssembly) + { + List toVcfRanges = new ArrayList<>(); + List fromSequenceRanges = new ArrayList<>(); + + for (int[] range : map.getToRanges()) + { + int[] fromRange = map.locateInFrom(range[0], range[1]); + if (fromRange == null) + { + // corrupted map?!? + continue; + } + + int[] newRange = mapReferenceRange(range, chromosome, species, seqRef, + vcfAssembly); + if (newRange == null) + { + Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s", + species, chromosome, seqRef, range[0], range[1], + vcfAssembly)); + continue; + } + else + { + toVcfRanges.add(newRange); + fromSequenceRanges.add(fromRange); + } + } + + return new MapList(fromSequenceRanges, toVcfRanges, 1, 1); + } + } diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index bd03448..f76c2ef 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -796,42 +796,8 @@ public class VCFLoader return new VCFMap(chromosome, map); } - /* - * VCF data has a different reference assembly to the sequence: - * query Ensembl to map chromosomal coordinates from sequence to VCF - */ - List toVcfRanges = new ArrayList<>(); - List fromSequenceRanges = new ArrayList<>(); - - for (int[] range : map.getToRanges()) - { - int[] fromRange = map.locateInFrom(range[0], range[1]); - if (fromRange == null) - { - // corrupted map?!? - continue; - } - - int[] newRange = GenomicAssemblies.mapReferenceRange(range, chromosome, "human", seqRef, - vcfAssembly); - if (newRange == null) - { - Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s", - species, chromosome, seqRef, range[0], range[1], - vcfAssembly)); - continue; - } - else - { - toVcfRanges.add(newRange); - fromSequenceRanges.add(fromRange); - } - } - - return new VCFMap(chromosome, - new MapList(fromSequenceRanges, toVcfRanges, 1, 1)); + return new VCFMap(chromosome,GenomicAssemblies.mapAssemblyFor(seqRef,"human",map,chromosome,vcfAssembly)); } - /** * If the sequence id matches a contig declared in the VCF file, and the * sequence length matches the contig length, then returns a 1:1 map of the -- 1.7.10.2