import htsjdk.variant.vcf.VCFHeaderLine;
import jalview.analysis.AlignmentUtils;
+import jalview.api.AlignViewControllerGuiI;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.GeneLoci;
import jalview.datamodel.SequenceI;
import jalview.ext.ensembl.EnsemblMap;
import jalview.ext.htsjdk.VCFReader;
+import jalview.io.gff.Gff3Helper;
import jalview.io.gff.SequenceOntologyI;
import jalview.util.MapList;
+import jalview.util.MappingUtils;
+import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
* instances of this class.
*
* @param filePath
+ * @param status
*/
- public void loadVCF(String filePath)
+ public void loadVCF(String filePath, AlignViewControllerGuiI status)
{
VCFReader reader = null;
try
{
- long start = System.currentTimeMillis();
+ // long start = System.currentTimeMillis();
reader = new VCFReader(filePath);
VCFHeader header = reader.getFileHeader();
computePeptideVariants(seq);
}
}
- long elapsed = System.currentTimeMillis() - start;
- System.out.println(String.format(
- "Added %d VCF variants to %d sequence(s) (%dms)", varCount,
- seqCount, elapsed));
-
- reader.close();
+ // long elapsed = System.currentTimeMillis() - start;
+ String msg = String.format("Added %d VCF variants to %d sequence(s)",
+ varCount, seqCount);
+ if (status != null)
+ {
+ status.setStatus(msg);
+ }
} catch (Throwable e)
{
System.err.println("Error processing VCF: " + e.getMessage());
e.printStackTrace();
+ } finally
+ {
+ if (reader != null)
+ {
+ try
+ {
+ reader.close();
+ } catch (IOException e)
+ {
+ // ignore
+ }
+ }
}
}
return 0;
}
- MapList mapping = seqCoords.getMapping();
- List<int[]> seqChromosomalContigs = mapping.getToRanges();
+ List<int[]> seqChromosomalContigs = seqCoords.mapping.getToRanges();
for (int[] range : seqChromosomalContigs)
{
count += addVcfVariants(seq, reader, range, isVcfRefGrch37);
{
GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
- String chromosome = seqCoords.getChromosome();
- String seqRef = seqCoords.getReference();
- String species = seqCoords.getSpecies();
+ String chromosome = seqCoords.chromosome;
+ String seqRef = seqCoords.assembly;
+ String species = seqCoords.species;
// TODO handle species properly
if ("".equals(species))
/*
* query the VCF for overlaps
+ * (convert a reverse strand range to forwards)
*/
int count = 0;
- MapList mapping = seqCoords.getMapping();
+ MapList mapping = seqCoords.mapping;
+ int fromLocus = Math.min(range[0], range[1]);
+ int toLocus = Math.max(range[0], range[1]);
CloseableIterator<VariantContext> variants = reader.query(chromosome,
- range[0], range[1]);
+ fromLocus, toLocus);
while (variants.hasNext())
{
/*
* get variant location in sequence chromosomal coordinates
*/
VariantContext variant = variants.next();
+
+ /*
+ * we can only process SNP variants (which can be reported
+ * as part of a MIXED variant record
+ */
+ if (!variant.isSNP() && !variant.isMixed())
+ {
+ continue;
+ }
+
count++;
int start = variant.getStart() - offset;
int end = variant.getEnd() - offset;
}
/**
- * Inspects the VCF variant record, and adds variant features to the sequence
+ * Inspects the VCF variant record, and adds variant features to the sequence.
+ * Only SNP variants are added, not INDELs.
*
* @param seq
* @param variant
protected void addVariantFeatures(SequenceI seq, VariantContext variant,
int featureStart, int featureEnd)
{
- StringBuilder sb = new StringBuilder();
- sb.append(variant.getReference().getBaseString());
-
- int alleleCount = 0;
- for (Allele allele : variant.getAlleles())
+ String reference = variant.getReference().getBaseString();
+ if (reference.length() != 1)
{
- if (!allele.isReference())
- {
- sb.append(",").append(allele.getBaseString());
- alleleCount++;
- }
+ /*
+ * sorry, we don't handle INDEL variants
+ */
+ return;
}
- String alleles = sb.toString(); // e.g. G,A,C
- String type = SequenceOntologyI.SEQUENCE_VARIANT;
+ /*
+ * for now we extract allele frequency as feature score; note
+ * this attribute is String for a simple SNP, but List<String> if
+ * multiple alleles at the locus; we extract for the simple case only,
+ * since not sure how to match allele order with AF values
+ */
+ Object af = variant.getAttribute("AF");
float score = 0f;
- if (alleleCount == 1)
+ if (af instanceof String)
{
try
{
- score = (float) variant.getAttributeAsDouble("AF", 0d);
+ score = Float.parseFloat((String) af);
} catch (NumberFormatException e)
{
- // leave score as 0
+ // leave as 0
}
}
- SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
- featureEnd, score, "VCF");
+
+ StringBuilder sb = new StringBuilder();
+ sb.append(reference);
+
+ /*
+ * inspect alleles and record SNP variants (as the variant
+ * record could be MIXED and include INDEL and SNP alleles)
+ */
+ int alleleCount = 0;
/*
- * only add 'alleles' property if a SNP, as we can
- * only handle SNPs when computing peptide variants
+ * inspect alleles; warning: getAlleles gives no guarantee
+ * as to the order in which they are returned
*/
- if (variant.isSNP())
+ for (Allele allele : variant.getAlleles())
{
- sf.setValue("alleles", alleles);
+ if (!allele.isReference())
+ {
+ String alleleBase = allele.getBaseString();
+ if (alleleBase.length() == 1)
+ {
+ sb.append(",").append(alleleBase);
+ alleleCount++;
+ }
+ }
}
+ String alleles = sb.toString(); // e.g. G,A,C
+
+ String type = SequenceOntologyI.SEQUENCE_VARIANT;
+
+ SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
+ featureEnd, score, "VCF");
+
+ sf.setValue(Gff3Helper.ALLELES, alleles);
Map<String, Object> atts = variant.getAttributes();
for (Entry<String, Object> att : atts.entrySet())
/*
* save mapping for possible future re-use
*/
- String key = species + EXCL + chromosome + EXCL + fromRef + EXCL
- + toRef;
+ String key = makeRangesKey(chromosome, species, fromRef, toRef);
if (!assemblyMappings.containsKey(key))
{
assemblyMappings.put(key, new HashMap<int[], int[]>());
protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome,
String species, String fromRef, String toRef)
{
- String key = species + EXCL + chromosome + EXCL + fromRef + EXCL
- + toRef;
+ String key = makeRangesKey(chromosome, species, fromRef, toRef);
if (assemblyMappings.containsKey(key))
{
Map<int[], int[]> mappedRanges = assemblyMappings.get(key);
/*
* mapping is 1:1 in length, so we trust it to have no discontinuities
*/
- if (fromRange[0] <= queryRange[0] && fromRange[1] >= queryRange[1])
+ if (MappingUtils.rangeContains(fromRange, queryRange))
{
/*
* fromRange subsumes our query range
}
return null;
}
+
+ /**
+ * Formats a ranges map lookup key
+ *
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ protected static String makeRangesKey(String chromosome, String species,
+ String fromRef, String toRef)
+ {
+ return species + EXCL + chromosome + EXCL + fromRef + EXCL
+ + toRef;
+ }
}