--- /dev/null
+package jalview.datamodel;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import jalview.bin.Console;
+import jalview.ext.ensembl.EnsemblMap;
+import jalview.io.vcf.VCFLoader;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+
+/**
+ * Holds mappings between gemomic assemblies Lazily populated as required from
+ * Ensembl Liftover and other datasources
+ *
+ * @author gmungoc
+ *
+ */
+public class GenomicAssemblies
+{
+
+ /*
+ * mappings between VCF and sequence reference assembly regions, as
+ * key = "species!chromosome!fromAssembly!toAssembly
+ * value = Map{fromRange, toRange}
+ */
+ private static Map<String, Map<int[], int[]>> assemblyMappings = new HashMap<>();
+
+ /*
+ * internal delimiter used to build keys for assemblyMappings
+ *
+ */
+ private static final String EXCL = "!";
+
+ /**
+ * Formats a ranges map lookup key
+ *
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ private static String makeRangesKey(String chromosome, String species,
+ String fromRef, String toRef)
+ {
+ return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef;
+ }
+
+ /**
+ * Determines the location of the query range (chromosome positions) in a
+ * different reference assembly.
+ * <p>
+ * If the range is just a subregion of one for which we already have a mapping
+ * (for example, an exon sub-region of a gene), then the mapping is just
+ * computed arithmetically.
+ * <p>
+ * Otherwise, calls the Ensembl REST service that maps from one assembly
+ * reference's coordinates to another's
+ *
+ * @param queryRange
+ * start-end chromosomal range in 'fromRef' coordinates
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * assembly reference for the query coordinates
+ * @param toRef
+ * assembly reference we wish to translate to
+ * @return the start-end range in 'toRef' coordinates
+ */
+ private static int[] mapReferenceRange(int[] queryRange,
+ String chromosome, String species, String fromRef, String toRef)
+ {
+ /*
+ * first try shorcut of computing the mapping as a subregion of one
+ * we already have (e.g. for an exon, if we have the gene mapping)
+ */
+ int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome,
+ species, fromRef, toRef);
+ if (mappedRange != null)
+ {
+ return mappedRange;
+ }
+
+ /*
+ * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37
+ */
+ EnsemblMap mapper = new EnsemblMap();
+ int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef,
+ toRef, queryRange);
+
+ if (mapping == null)
+ {
+ // mapping service failure
+ return null;
+ }
+
+ /*
+ * save mapping for possible future re-use
+ */
+ String key = GenomicAssemblies.makeRangesKey(chromosome, species,
+ fromRef, toRef);
+ if (!assemblyMappings.containsKey(key))
+ {
+ assemblyMappings.put(key, new HashMap<int[], int[]>());
+ }
+
+ assemblyMappings.get(key).put(queryRange, mapping);
+
+ return mapping;
+ }
+
+ /**
+ * If we already have a 1:1 contiguous mapping which subsumes the given query
+ * range, this method just calculates and returns the subset of that mapping,
+ * else it returns null. In practical terms, if a gene has a contiguous
+ * mapping between (for example) GRCh37 and GRCh38, then we assume that its
+ * subsidiary exons occupy unchanged relative positions, and just compute
+ * these as offsets, rather than do another lookup of the mapping.
+ * <p>
+ * If in future these assumptions prove invalid (e.g. for bacterial dna?!),
+ * simply remove this method or let it always return null.
+ * <p>
+ * Warning: many rapid calls to the /map service map result in a 429 overload
+ * error response
+ *
+ * @param queryRange
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ private static int[] findSubsumedRangeMapping(int[] queryRange,
+ String chromosome, String species, String fromRef, String toRef)
+ {
+ String key = GenomicAssemblies.makeRangesKey(chromosome, species,
+ fromRef, toRef);
+ if (assemblyMappings.containsKey(key))
+ {
+ Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
+ for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet())
+ {
+ int[] fromRange = mappedRange.getKey();
+ int[] toRange = mappedRange.getValue();
+ if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0])
+ {
+ /*
+ * mapping is 1:1 in length, so we trust it to have no discontinuities
+ */
+ if (MappingUtils.rangeContains(fromRange, queryRange))
+ {
+ /*
+ * fromRange subsumes our query range
+ */
+ int offset = queryRange[0] - fromRange[0];
+ int mappedRangeFrom = toRange[0] + offset;
+ int mappedRangeTo = mappedRangeFrom
+ + (queryRange[1] - queryRange[0]);
+ return new int[] { mappedRangeFrom, mappedRangeTo };
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * query Ensembl to map chromosomal coordinates between different
+ * assemblies<br>
+ * <em>will most likely fail for species other than human</em>
+ */
+ public static MapList mapAssemblyFor(String localAssembly, String species,
+ MapList map, String chromosome, String vcfAssembly)
+ {
+ List<int[]> toVcfRanges = new ArrayList<>();
+ List<int[]> fromSequenceRanges = new ArrayList<>();
+
+ for (int[] range : map.getToRanges())
+ {
+ int[] fromRange = map.locateInFrom(range[0], range[1]);
+ if (fromRange == null)
+ {
+ // corrupted map?!?
+ continue;
+ }
+
+ int[] newRange = mapReferenceRange(range, chromosome, species, localAssembly,
+ vcfAssembly);
+ if (newRange == null)
+ {
+ Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s",
+ species, chromosome, localAssembly, range[0], range[1],
+ vcfAssembly));
+ continue;
+ }
+ else
+ {
+ toVcfRanges.add(newRange);
+ fromSequenceRanges.add(fromRange);
+ }
+ }
+
+ return new MapList(fromSequenceRanges, toVcfRanges, 1, 1);
+ }
+
+ /**
+ * get a maplist for positions in the given assembly for the given locus
+ * @param locus
+ * @param assembly
+ * @return
+ */
+ public static MapList mapAssemblyFor(GeneLociI locus, String assembly)
+ {
+ return mapAssemblyFor(assembly,locus.getSpeciesId(),locus.getMapping(),locus.getChromosomeId(),locus.getAssemblyId());
+ }
+
+}
import jalview.bin.Console;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.GeneLociI;
+import jalview.datamodel.GenomicAssemblies;
import jalview.datamodel.Mapping;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
private static final String FEATURE_GROUP_VCF = "VCF";
/*
- * internal delimiter used to build keys for assemblyMappings
- *
- */
- private static final String EXCL = "!";
-
- /*
* the VCF file we are processing
*/
protected String vcfFilePath;
- /*
- * mappings between VCF and sequence reference assembly regions, as
- * key = "species!chromosome!fromAssembly!toAssembly
- * value = Map{fromRange, toRange}
- */
- private Map<String, Map<int[], int[]>> assemblyMappings;
-
private VCFReader reader;
/*
{
System.err.println("Error opening VCF file: " + e.getMessage());
}
-
- // map of species!chromosome!fromAssembly!toAssembly to {fromRange, toRange}
- assemblyMappings = new HashMap<>();
}
/**
return new VCFMap(chromosome, map);
}
- /*
- * VCF data has a different reference assembly to the sequence:
- * query Ensembl to map chromosomal coordinates from sequence to VCF
- */
- List<int[]> toVcfRanges = new ArrayList<>();
- List<int[]> fromSequenceRanges = new ArrayList<>();
-
- for (int[] range : map.getToRanges())
- {
- int[] fromRange = map.locateInFrom(range[0], range[1]);
- if (fromRange == null)
- {
- // corrupted map?!?
- continue;
- }
-
- int[] newRange = mapReferenceRange(range, chromosome, "human", seqRef,
- vcfAssembly);
- if (newRange == null)
- {
- Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s",
- species, chromosome, seqRef, range[0], range[1],
- vcfAssembly));
- continue;
- }
- else
- {
- toVcfRanges.add(newRange);
- fromSequenceRanges.add(fromRange);
- }
- }
-
- return new VCFMap(chromosome,
- new MapList(fromSequenceRanges, toVcfRanges, 1, 1));
+ return new VCFMap(chromosome,GenomicAssemblies.mapAssemblyFor(seqRef,"human",map,chromosome,vcfAssembly));
}
-
/**
* If the sequence id matches a contig declared in the VCF file, and the
* sequence length matches the contig length, then returns a 1:1 map of the
}
/**
- * Determines the location of the query range (chromosome positions) in a
- * different reference assembly.
- * <p>
- * If the range is just a subregion of one for which we already have a mapping
- * (for example, an exon sub-region of a gene), then the mapping is just
- * computed arithmetically.
- * <p>
- * Otherwise, calls the Ensembl REST service that maps from one assembly
- * reference's coordinates to another's
- *
- * @param queryRange
- * start-end chromosomal range in 'fromRef' coordinates
- * @param chromosome
- * @param species
- * @param fromRef
- * assembly reference for the query coordinates
- * @param toRef
- * assembly reference we wish to translate to
- * @return the start-end range in 'toRef' coordinates
- */
- protected int[] mapReferenceRange(int[] queryRange, String chromosome,
- String species, String fromRef, String toRef)
- {
- /*
- * first try shorcut of computing the mapping as a subregion of one
- * we already have (e.g. for an exon, if we have the gene mapping)
- */
- int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome,
- species, fromRef, toRef);
- if (mappedRange != null)
- {
- return mappedRange;
- }
-
- /*
- * call (e.g.) http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37
- */
- EnsemblMap mapper = new EnsemblMap();
- int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef,
- toRef, queryRange);
-
- if (mapping == null)
- {
- // mapping service failure
- return null;
- }
-
- /*
- * save mapping for possible future re-use
- */
- String key = makeRangesKey(chromosome, species, fromRef, toRef);
- if (!assemblyMappings.containsKey(key))
- {
- assemblyMappings.put(key, new HashMap<int[], int[]>());
- }
-
- assemblyMappings.get(key).put(queryRange, mapping);
-
- return mapping;
- }
-
- /**
- * If we already have a 1:1 contiguous mapping which subsumes the given query
- * range, this method just calculates and returns the subset of that mapping,
- * else it returns null. In practical terms, if a gene has a contiguous
- * mapping between (for example) GRCh37 and GRCh38, then we assume that its
- * subsidiary exons occupy unchanged relative positions, and just compute
- * these as offsets, rather than do another lookup of the mapping.
- * <p>
- * If in future these assumptions prove invalid (e.g. for bacterial dna?!),
- * simply remove this method or let it always return null.
- * <p>
- * Warning: many rapid calls to the /map service map result in a 429 overload
- * error response
- *
- * @param queryRange
- * @param chromosome
- * @param species
- * @param fromRef
- * @param toRef
- * @return
- */
- protected int[] findSubsumedRangeMapping(int[] queryRange,
- String chromosome, String species, String fromRef, String toRef)
- {
- String key = makeRangesKey(chromosome, species, fromRef, toRef);
- if (assemblyMappings.containsKey(key))
- {
- Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
- for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet())
- {
- int[] fromRange = mappedRange.getKey();
- int[] toRange = mappedRange.getValue();
- if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0])
- {
- /*
- * mapping is 1:1 in length, so we trust it to have no discontinuities
- */
- if (MappingUtils.rangeContains(fromRange, queryRange))
- {
- /*
- * fromRange subsumes our query range
- */
- int offset = queryRange[0] - fromRange[0];
- int mappedRangeFrom = toRange[0] + offset;
- int mappedRangeTo = mappedRangeFrom
- + (queryRange[1] - queryRange[0]);
- return new int[] { mappedRangeFrom, mappedRangeTo };
- }
- }
- }
- }
- return null;
- }
-
- /**
* Transfers the sequence feature to the target sequence, locating its start
* and end range based on the mapping. Features which do not overlap the
* target sequence are ignored.
targetSequence.addSequenceFeature(copy);
}
}
-
- /**
- * Formats a ranges map lookup key
- *
- * @param chromosome
- * @param species
- * @param fromRef
- * @param toRef
- * @return
- */
- protected static String makeRangesKey(String chromosome, String species,
- String fromRef, String toRef)
- {
- return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef;
- }
}
addEndpoint(new HighlightSequenceEndpoint(this));
addEndpoint(new SelectSequencesEndpoint(this));
addEndpoint(new GetCrossReferencesEndpoint(this));
+ addEndpoint(new HighlightGenomicRangeEndpoint(this));
setPath(MY_PATH);
this.registerHandler();
--- /dev/null
+package jalview.rest;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import jalview.api.AlignmentViewPanel;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
+import jalview.datamodel.GeneLocus;
+import jalview.datamodel.GenomicAssemblies;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.structure.StructureSelectionManager;
+import jalview.util.MapList;
+
+public class HighlightGenomicRangeEndpoint extends AbstractEndpoint
+{
+ public HighlightGenomicRangeEndpoint(API api)
+ {
+ super(api, path, name, parameters, description);
+ }
+
+ protected static final String path = "highlightgenome";
+
+ private static final String name = "Highlight sequence positions for genomic regions";
+
+ private static final String parameters = "species/assembly/chromosome/<position>";
+
+ private static final String description = "Highlight positions in sequences mapped to a particular location in a species' genome. assembly can be just the species name if it is not known.";
+
+ public void processEndpoint(HttpServletRequest request,
+ HttpServletResponse response)
+ {
+ if (!checkParameters(request, response, 4))
+ {
+ return;
+ }
+ String[] parameters = getEndpointPathParameters(request);
+
+ String species=parameters[0], assembly=parameters[1],chromid=parameters[2],posString=parameters[3];
+
+ int pos = -1;
+ try
+ {
+ pos = Integer.parseInt(posString);
+ } catch (NumberFormatException e)
+ {
+ returnError(request, response,
+ "Could not parse postition integer " + posString);
+ }
+
+ AlignFrame[] alignFrames = getAlignFrames(request, true);
+ if (alignFrames == null)
+ {
+ returnError(request, response, "could not find results");
+ return;
+ }
+
+ Map<SequenceI, StructureSelectionManager> ssmMap = new HashMap<>();
+
+ for (int i = 0; i < alignFrames.length; i++)
+ {
+ AlignFrame af = alignFrames[i];
+ List<AlignmentViewPanel> aps = (List<AlignmentViewPanel>) af
+ .getAlignPanels();
+
+ for (AlignmentViewPanel ap : aps)
+ {
+ StructureSelectionManager ssm = ap.getStructureSelectionManager();
+ AlignmentI al = ap.getAlignment();
+ List<SequenceI> seqs = (List<SequenceI>) al.getSequences();
+ for (SequenceI seq : seqs)
+ {
+ GeneLociI locus = seq.getGeneLoci();
+ if (locus!=null && locus.getSpeciesId().equalsIgnoreCase(species))
+ {
+ if (locus.getAssemblyId() == null || assembly.equals(species)
+ || locus.getAssemblyId().equals(assembly))
+ {
+ if (locus.getChromosomeId() != null
+ && locus.getChromosomeId().equalsIgnoreCase(chromid))
+ {
+ MapList mp = locus.getMapping();
+ // TODO: find a better way to determine which is the sequence corresponding to the reference locus (ie the ENSG sequence)
+ if ((mp.getFromHighest()-mp.getFromLowest())==(mp.getToHighest()-mp.getToLowest()))
+ {
+ // genomic locus (we hope)
+ ssmMap.put(seq, ssm);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // Currently:
+ // seq is an ENSG.. sequence - so 'mouseOverVamsasSequence' will highlight that position, but not the mapped position(s) in transcripts/products.
+ // a couple of options:
+ // 1. broaden search above to record mapped regions in transcripts and either highlight directly, or add a 'highlight all regions' call to structure selection manager
+ // 2. create a new structureselectionmanager call highlightLocusPosition(..)
+
+ // highlight
+ for (SequenceI seq : ssmMap.keySet())
+ {
+ StructureSelectionManager ssm = ssmMap.get(seq);
+ if (ssm == null)
+ {
+ continue;
+ }
+ GeneLociI locus = seq.getGeneLoci();
+ MapList assmap=locus.getMapping();
+
+ if (assembly!=null && !assembly.equalsIgnoreCase(species) && locus.getAssemblyId()!=null && !locus.getAssemblyId().equalsIgnoreCase(assembly))
+ {
+ assmap = GenomicAssemblies.mapAssemblyFor(locus, assembly);
+ }
+ // locate local position(s) mapping to species:assembly:chromosome:pos
+ int[] mappedPos = assmap.locateInFrom(pos, pos);
+ if (mappedPos!=null && mappedPos.length>0)
+ {
+ // use vamsas sequence so the position in the reference locus is added to the region being highlighted, as well as the mapped sequence
+ ssm.mouseOverVamsasSequence(seq.getDatasetSequence(), mappedPos[0], null);
+ }
+ }
+
+ }
+}
\ No newline at end of file