package jalview.io.vcf;
-import jalview.analysis.AlignmentUtils;
import jalview.analysis.Dna;
import jalview.api.AlignViewControllerGuiI;
import jalview.bin.Cache;
*/
private static final String VCF_ASSEMBLY = "VCF_ASSEMBLY";
- private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh38,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38";
+ private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh37,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38";
private static final String VCF_SPECIES = "VCF_SPECIES"; // default is human
+ private static final String DEFAULT_REFERENCE = "grch37"; // fallback default is human GRCh37
+
/*
* keys to fields of VEP CSQ consequence data
* see https://www.ensembl.org/info/docs/tools/vep/vep_formats.html
*/
public SequenceI loadVCFContig(String contig)
{
- String ref = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY)
- .getValue();
+ VCFHeaderLine headerLine = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
+ if (headerLine == null)
+ {
+ Cache.log.error("VCF reference header not found");
+ return null;
+ }
+ String ref = headerLine.getValue();
if (ref.startsWith("file://"))
{
ref = ref.substring(7);
}
else
{
- System.err.println("VCF reference not found: " + ref);
+ Cache.log.error("VCF reference not found: " + ref);
}
return seq;
{
VCFHeaderLine ref = header
.getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
- String reference = ref.getValue();
+ String reference = ref == null ? null : ref.getValue();
setSpeciesAndAssembly(reference);
*/
protected void setSpeciesAndAssembly(String reference)
{
- vcfSpecies = DEFAULT_SPECIES;
+ if (reference == null)
+ {
+ Cache.log.error("No VCF ##reference found, defaulting to "
+ + DEFAULT_REFERENCE + ":" + DEFAULT_SPECIES);
+ reference = DEFAULT_REFERENCE; // default to GRCh37 if not specified
+ }
+ reference = reference.toLowerCase();
/*
* for a non-human species, or other assembly identifier,
}
}
+ vcfSpecies = DEFAULT_SPECIES;
prop = Cache.getProperty(VCF_SPECIES);
if (prop != null)
{
/*
* dna-to-peptide product mapping
*/
- AlignmentUtils.computeProteinFeatures(seq, mapTo, map);
+ // JAL-3187 render on the fly instead
+ // AlignmentUtils.computeProteinFeatures(seq, mapTo, map);
}
else
{
String species = seqCoords.getSpeciesId();
String chromosome = seqCoords.getChromosomeId();
String seqRef = seqCoords.getAssemblyId();
- MapList map = seqCoords.getMap();
+ MapList map = seqCoords.getMapping();
// note this requires the configured species to match that
// returned with the Ensembl sequence; todo: support aliases?
}
/**
- * Answers true if the species inferred from the VCF reference identifier
- * matches that for the sequence
- *
- * @param vcfAssembly
- * @param speciesId
- * @return
- */
- boolean vcfSpeciesMatchesSequence(String vcfAssembly, String speciesId)
- {
- // PROBLEM 1
- // there are many aliases for species - how to equate one with another?
- // PROBLEM 2
- // VCF ##reference header is an unstructured URI - how to extract species?
- // perhaps check if ref includes any (Ensembl) alias of speciesId??
- // TODO ask the user to confirm this??
-
- if (vcfAssembly.contains("Homo_sapiens") // gnomAD exome data example
- && "HOMO_SAPIENS".equals(speciesId)) // Ensembl species id
- {
- return true;
- }
-
- if (vcfAssembly.contains("c_elegans") // VEP VCF response example
- && "CAENORHABDITIS_ELEGANS".equals(speciesId)) // Ensembl
- {
- return true;
- }
-
- // this is not a sustainable solution...
-
- return false;
- }
-
- /**
* Queries the VCF reader for any variants that overlap the mapped chromosome
* ranges of the sequence, and adds as variant features. Returns the number of
* overlapping variants found.