1 package jalview.datamodel;
3 import java.util.ArrayList;
4 import java.util.HashMap;
7 import java.util.Map.Entry;
9 import jalview.bin.Console;
10 import jalview.ext.ensembl.EnsemblMap;
11 import jalview.io.vcf.VCFLoader;
12 import jalview.util.MapList;
13 import jalview.util.MappingUtils;
16 * Holds mappings between gemomic assemblies Lazily populated as required from
17 * Ensembl Liftover and other datasources
22 public class GenomicAssemblies
26 * mappings between VCF and sequence reference assembly regions, as
27 * key = "species!chromosome!fromAssembly!toAssembly
28 * value = Map{fromRange, toRange}
30 private static Map<String, Map<int[], int[]>> assemblyMappings = new HashMap<>();
33 * internal delimiter used to build keys for assemblyMappings
36 private static final String EXCL = "!";
39 * Formats a ranges map lookup key
47 private static String makeRangesKey(String chromosome, String species,
48 String fromRef, String toRef)
50 return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef;
54 * Determines the location of the query range (chromosome positions) in a
55 * different reference assembly.
57 * If the range is just a subregion of one for which we already have a mapping
58 * (for example, an exon sub-region of a gene), then the mapping is just
59 * computed arithmetically.
61 * Otherwise, calls the Ensembl REST service that maps from one assembly
62 * reference's coordinates to another's
65 * start-end chromosomal range in 'fromRef' coordinates
69 * assembly reference for the query coordinates
71 * assembly reference we wish to translate to
72 * @return the start-end range in 'toRef' coordinates
74 private static int[] mapReferenceRange(int[] queryRange,
75 String chromosome, String species, String fromRef, String toRef)
78 * first try shorcut of computing the mapping as a subregion of one
79 * we already have (e.g. for an exon, if we have the gene mapping)
81 int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome,
82 species, fromRef, toRef);
83 if (mappedRange != null)
89 * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37
91 EnsemblMap mapper = new EnsemblMap();
92 int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef,
97 // mapping service failure
102 * save mapping for possible future re-use
104 String key = GenomicAssemblies.makeRangesKey(chromosome, species,
106 if (!assemblyMappings.containsKey(key))
108 assemblyMappings.put(key, new HashMap<int[], int[]>());
111 assemblyMappings.get(key).put(queryRange, mapping);
117 * If we already have a 1:1 contiguous mapping which subsumes the given query
118 * range, this method just calculates and returns the subset of that mapping,
119 * else it returns null. In practical terms, if a gene has a contiguous
120 * mapping between (for example) GRCh37 and GRCh38, then we assume that its
121 * subsidiary exons occupy unchanged relative positions, and just compute
122 * these as offsets, rather than do another lookup of the mapping.
124 * If in future these assumptions prove invalid (e.g. for bacterial dna?!),
125 * simply remove this method or let it always return null.
127 * Warning: many rapid calls to the /map service map result in a 429 overload
137 private static int[] findSubsumedRangeMapping(int[] queryRange,
138 String chromosome, String species, String fromRef, String toRef)
140 String key = GenomicAssemblies.makeRangesKey(chromosome, species,
142 if (assemblyMappings.containsKey(key))
144 Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
145 for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet())
147 int[] fromRange = mappedRange.getKey();
148 int[] toRange = mappedRange.getValue();
149 if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0])
152 * mapping is 1:1 in length, so we trust it to have no discontinuities
154 if (MappingUtils.rangeContains(fromRange, queryRange))
157 * fromRange subsumes our query range
159 int offset = queryRange[0] - fromRange[0];
160 int mappedRangeFrom = toRange[0] + offset;
161 int mappedRangeTo = mappedRangeFrom
162 + (queryRange[1] - queryRange[0]);
163 return new int[] { mappedRangeFrom, mappedRangeTo };
172 * query Ensembl to map chromosomal coordinates between different
174 * <em>will most likely fail for species other than human</em>
176 public static MapList mapAssemblyFor(String seqRef, String species,
177 MapList map, String chromosome, String vcfAssembly)
179 List<int[]> toVcfRanges = new ArrayList<>();
180 List<int[]> fromSequenceRanges = new ArrayList<>();
182 for (int[] range : map.getToRanges())
184 int[] fromRange = map.locateInFrom(range[0], range[1]);
185 if (fromRange == null)
191 int[] newRange = mapReferenceRange(range, chromosome, species, seqRef,
193 if (newRange == null)
195 Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s",
196 species, chromosome, seqRef, range[0], range[1],
202 toVcfRanges.add(newRange);
203 fromSequenceRanges.add(fromRange);
207 return new MapList(fromSequenceRanges, toVcfRanges, 1, 1);