package jalview.datamodel;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import jalview.bin.Console;
import jalview.ext.ensembl.EnsemblMap;
import jalview.io.vcf.VCFLoader;
+import jalview.util.MapList;
import jalview.util.MappingUtils;
+/**
+ * Holds mappings between gemomic assemblies Lazily populated as required from
+ * Ensembl Liftover and other datasources
+ *
+ * @author gmungoc
+ *
+ */
public class GenomicAssemblies
{
* @param toRef
* @return
*/
- public static String makeRangesKey(String chromosome, String species,
+ private static String makeRangesKey(String chromosome, String species,
String fromRef, String toRef)
{
return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef;
* assembly reference we wish to translate to
* @return the start-end range in 'toRef' coordinates
*/
- public static int[] mapReferenceRange(int[] queryRange, String chromosome,
- String species, String fromRef, String toRef)
+ private static int[] mapReferenceRange(int[] queryRange,
+ String chromosome, String species, String fromRef, String toRef)
{
/*
* first try shorcut of computing the mapping as a subregion of one
/*
* save mapping for possible future re-use
*/
- String key = GenomicAssemblies.makeRangesKey(chromosome, species, fromRef, toRef);
+ String key = GenomicAssemblies.makeRangesKey(chromosome, species,
+ fromRef, toRef);
if (!assemblyMappings.containsKey(key))
{
assemblyMappings.put(key, new HashMap<int[], int[]>());
* @param toRef
* @return
*/
- protected static int[] findSubsumedRangeMapping(int[] queryRange,
+ private static int[] findSubsumedRangeMapping(int[] queryRange,
String chromosome, String species, String fromRef, String toRef)
{
- String key = GenomicAssemblies.makeRangesKey(chromosome, species, fromRef, toRef);
+ String key = GenomicAssemblies.makeRangesKey(chromosome, species,
+ fromRef, toRef);
if (assemblyMappings.containsKey(key))
{
Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
return null;
}
+ /**
+ * query Ensembl to map chromosomal coordinates between different
+ * assemblies<br>
+ * <em>will most likely fail for species other than human</em>
+ */
+ public static MapList mapAssemblyFor(String seqRef, String species,
+ MapList map, String chromosome, String vcfAssembly)
+ {
+ List<int[]> toVcfRanges = new ArrayList<>();
+ List<int[]> fromSequenceRanges = new ArrayList<>();
+
+ for (int[] range : map.getToRanges())
+ {
+ int[] fromRange = map.locateInFrom(range[0], range[1]);
+ if (fromRange == null)
+ {
+ // corrupted map?!?
+ continue;
+ }
+
+ int[] newRange = mapReferenceRange(range, chromosome, species, seqRef,
+ vcfAssembly);
+ if (newRange == null)
+ {
+ Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s",
+ species, chromosome, seqRef, range[0], range[1],
+ vcfAssembly));
+ continue;
+ }
+ else
+ {
+ toVcfRanges.add(newRange);
+ fromSequenceRanges.add(fromRange);
+ }
+ }
+
+ return new MapList(fromSequenceRanges, toVcfRanges, 1, 1);
+ }
+
}
return new VCFMap(chromosome, map);
}
- /*
- * VCF data has a different reference assembly to the sequence:
- * query Ensembl to map chromosomal coordinates from sequence to VCF
- */
- List<int[]> toVcfRanges = new ArrayList<>();
- List<int[]> fromSequenceRanges = new ArrayList<>();
-
- for (int[] range : map.getToRanges())
- {
- int[] fromRange = map.locateInFrom(range[0], range[1]);
- if (fromRange == null)
- {
- // corrupted map?!?
- continue;
- }
-
- int[] newRange = GenomicAssemblies.mapReferenceRange(range, chromosome, "human", seqRef,
- vcfAssembly);
- if (newRange == null)
- {
- Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s",
- species, chromosome, seqRef, range[0], range[1],
- vcfAssembly));
- continue;
- }
- else
- {
- toVcfRanges.add(newRange);
- fromSequenceRanges.add(fromRange);
- }
- }
-
- return new VCFMap(chromosome,
- new MapList(fromSequenceRanges, toVcfRanges, 1, 1));
+ return new VCFMap(chromosome,GenomicAssemblies.mapAssemblyFor(seqRef,"human",map,chromosome,vcfAssembly));
}
-
/**
* If the sequence id matches a contig declared in the VCF file, and the
* sequence length matches the contig length, then returns a 1:1 map of the