*/
locateCsqFields();
- // check if reference is wrt assembly19 (GRCh37)
- // todo may need to allow user to specify reference assembly?
- boolean isRefGrch37 = (ref != null && ref.getValue().contains(
- "assembly19"));
+ String vcfAssembly = ref.getValue();
int varCount = 0;
int seqCount = 0;
*/
for (SequenceI seq : al.getSequences())
{
- int added = loadSequenceVCF(seq, reader, isRefGrch37);
+ int added = loadSequenceVCF(seq, reader, vcfAssembly);
if (added > 0)
{
seqCount++;
/**
* Tries to add overlapping variants read from a VCF file to the given
* sequence, and returns the number of variant features added. Note that this
- * requires the sequence to hold information as to its chromosomal positions
- * and reference, in order to be able to map the VCF variants to the sequence.
+ * requires the sequence to hold information as to its species, chromosomal
+ * positions and reference assembly, in order to be able to map the VCF
+ * variants to the sequence (or not)
*
* @param seq
* @param reader
- * @param isVcfRefGrch37
+ * @param vcfAssembly
* @return
*/
protected int loadSequenceVCF(SequenceI seq, VCFReader reader,
- boolean isVcfRefGrch37)
+ String vcfAssembly)
{
int count = 0;
GeneLociI seqCoords = seq.getGeneLoci();
return 0;
}
+ if (!vcfSpeciesMatchesSequence(vcfAssembly, seqCoords.getSpeciesId()))
+ {
+ return 0;
+ }
+
List<int[]> seqChromosomalContigs = seqCoords.getMap().getToRanges();
for (int[] range : seqChromosomalContigs)
{
- count += addVcfVariants(seq, reader, range, isVcfRefGrch37);
+ count += addVcfVariants(seq, reader, range, vcfAssembly);
}
return count;
}
/**
+ * Answers true if the species inferred from the VCF reference identifier
+ * matches that for the sequence
+ *
+ * @param vcfAssembly
+ * @param speciesId
+ * @return
+ */
+ boolean vcfSpeciesMatchesSequence(String vcfAssembly, String speciesId)
+ {
+ // PROBLEM 1
+ // there are many aliases for species - how to equate one with another?
+ // PROBLEM 2
+ // VCF ##reference header is an unstructured URI - how to extract species?
+ // perhaps check if ref includes any (Ensembl) alias of speciesId??
+ // TODO ask the user to confirm this??
+
+ if (vcfAssembly.contains("Homo_sapiens") // gnomAD exome data example
+ && "HOMO_SAPIENS".equals(speciesId)) // Ensembl species id
+ {
+ return true;
+ }
+
+ if (vcfAssembly.contains("c_elegans") // VEP VCF response example
+ && "CAENORHABDITIS_ELEGANS".equals(speciesId)) // Ensembl
+ {
+ return true;
+ }
+
+ // this is not a sustainable solution...
+
+ return false;
+ }
+
+ /**
* Queries the VCF reader for any variants that overlap the given chromosome
* region of the sequence, and adds as variant features. Returns the number of
* overlapping variants found.
* @param range
* start-end range of a sequence region in its chromosomal
* coordinates
- * @param isVcfRefGrch37
- * true if the VCF is with reference to GRCh37
+ * @param vcfAssembly
+ * the '##reference' identifier for the VCF reference assembly
* @return
*/
protected int addVcfVariants(SequenceI seq, VCFReader reader,
- int[] range, boolean isVcfRefGrch37)
+ int[] range, String vcfAssembly)
{
GeneLociI seqCoords = seq.getGeneLoci();
String species = seqCoords.getSpeciesId();
/*
- * map chromosomal coordinates from GRCh38 (sequence) to
- * GRCh37 (VCF) if necessary
+ * map chromosomal coordinates from sequence to VCF if the VCF
+ * data has a different reference assembly to the sequence
*/
- // TODO generalise for other assemblies and species
+ // TODO generalise for non-human species
+ // - or get the user to choose in a dialog
+
int offset = 0;
- String fromRef = "GRCh38";
- if (fromRef.equalsIgnoreCase(seqRef) && isVcfRefGrch37)
+ if ("GRCh38".equalsIgnoreCase(seqRef) // Ensembl
+ && vcfAssembly.contains("Homo_sapiens_assembly19")) // gnomAD
{
String toRef = "GRCh37";
int[] newRange = mapReferenceRange(range, chromosome, "human",
- fromRef, toRef);
+ seqRef, toRef);
if (newRange == null)
{
System.err.println(String.format(
"Failed to map %s:%s:%s:%d:%d to %s", species, chromosome,
- fromRef, range[0], range[1], toRef));
+ seqRef, range[0], range[1], toRef));
return 0;
}
offset = newRange[0] - range[0];