1 package jalview.datamodel;
3 import java.util.HashMap;
5 import java.util.Map.Entry;
7 import jalview.ext.ensembl.EnsemblMap;
8 import jalview.io.vcf.VCFLoader;
9 import jalview.util.MappingUtils;
11 public class GenomicAssemblies
15 * mappings between VCF and sequence reference assembly regions, as
16 * key = "species!chromosome!fromAssembly!toAssembly
17 * value = Map{fromRange, toRange}
19 private static Map<String, Map<int[], int[]>> assemblyMappings = new HashMap<>();
22 * internal delimiter used to build keys for assemblyMappings
25 private static final String EXCL = "!";
28 * Formats a ranges map lookup key
36 public static String makeRangesKey(String chromosome, String species,
37 String fromRef, String toRef)
39 return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef;
43 * Determines the location of the query range (chromosome positions) in a
44 * different reference assembly.
46 * If the range is just a subregion of one for which we already have a mapping
47 * (for example, an exon sub-region of a gene), then the mapping is just
48 * computed arithmetically.
50 * Otherwise, calls the Ensembl REST service that maps from one assembly
51 * reference's coordinates to another's
54 * start-end chromosomal range in 'fromRef' coordinates
58 * assembly reference for the query coordinates
60 * assembly reference we wish to translate to
61 * @return the start-end range in 'toRef' coordinates
63 public static int[] mapReferenceRange(int[] queryRange, String chromosome,
64 String species, String fromRef, String toRef)
67 * first try shorcut of computing the mapping as a subregion of one
68 * we already have (e.g. for an exon, if we have the gene mapping)
70 int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome,
71 species, fromRef, toRef);
72 if (mappedRange != null)
78 * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37
80 EnsemblMap mapper = new EnsemblMap();
81 int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef,
86 // mapping service failure
91 * save mapping for possible future re-use
93 String key = GenomicAssemblies.makeRangesKey(chromosome, species, fromRef, toRef);
94 if (!assemblyMappings.containsKey(key))
96 assemblyMappings.put(key, new HashMap<int[], int[]>());
99 assemblyMappings.get(key).put(queryRange, mapping);
105 * If we already have a 1:1 contiguous mapping which subsumes the given query
106 * range, this method just calculates and returns the subset of that mapping,
107 * else it returns null. In practical terms, if a gene has a contiguous
108 * mapping between (for example) GRCh37 and GRCh38, then we assume that its
109 * subsidiary exons occupy unchanged relative positions, and just compute
110 * these as offsets, rather than do another lookup of the mapping.
112 * If in future these assumptions prove invalid (e.g. for bacterial dna?!),
113 * simply remove this method or let it always return null.
115 * Warning: many rapid calls to the /map service map result in a 429 overload
125 protected static int[] findSubsumedRangeMapping(int[] queryRange,
126 String chromosome, String species, String fromRef, String toRef)
128 String key = GenomicAssemblies.makeRangesKey(chromosome, species, fromRef, toRef);
129 if (assemblyMappings.containsKey(key))
131 Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
132 for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet())
134 int[] fromRange = mappedRange.getKey();
135 int[] toRange = mappedRange.getValue();
136 if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0])
139 * mapping is 1:1 in length, so we trust it to have no discontinuities
141 if (MappingUtils.rangeContains(fromRange, queryRange))
144 * fromRange subsumes our query range
146 int offset = queryRange[0] - fromRange[0];
147 int mappedRangeFrom = toRange[0] + offset;
148 int mappedRangeTo = mappedRangeFrom
149 + (queryRange[1] - queryRange[0]);
150 return new int[] { mappedRangeFrom, mappedRangeTo };