import jalview.io.NewickFile;
import jalview.io.ScoreMatrixFile;
import jalview.io.TCoffeeScoreFile;
+import jalview.io.vcf.VCFLoader;
import jalview.jbgui.GAlignFrame;
import jalview.schemes.ColourSchemeI;
import jalview.schemes.ColourSchemes;
AlignmentI al = getViewport().getAlignment();
boolean nucleotide = al.isNucleotide();
+ loadVcf.setVisible(nucleotide);
showTranslation.setVisible(nucleotide);
showReverse.setVisible(nucleotide);
showReverseComplement.setVisible(nucleotide);
new CalculationChooser(AlignFrame.this);
}
}
+
+ @Override
+ protected void loadVcf_actionPerformed()
+ {
+ JalviewFileChooser chooser = new JalviewFileChooser(
+ Cache.getProperty("LAST_DIRECTORY"));
+ chooser.setFileView(new JalviewFileView());
+ chooser.setDialogTitle(MessageManager.getString("label.load_vcf_file"));
+ chooser.setToolTipText(MessageManager.getString("label.load_vcf_file"));
+
+ int value = chooser.showOpenDialog(null);
+
+ if (value == JalviewFileChooser.APPROVE_OPTION)
+ {
+ String choice = chooser.getSelectedFile().getPath();
+ Cache.setProperty("LAST_DIRECTORY", choice);
+ new VCFLoader(viewport.getAlignment()).loadVCF(choice);
+ }
+
+ }
}
class PrintThread extends Thread
--- /dev/null
+package jalview.io.vcf;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.GeneLoci;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.ext.htsjdk.VCFReader;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.util.MapList;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+public class VCFLoader
+{
+ AlignmentI al;
+
+ /**
+ * Constructor given an alignment context
+ *
+ * @param alignment
+ */
+ public VCFLoader(AlignmentI alignment)
+ {
+ al = alignment;
+ }
+
+ /**
+ * Loads VCF on to an alignment - provided it can be related to one or more
+ * sequence's chromosomal coordinates
+ *
+ * @param filePath
+ */
+ public void loadVCF(String filePath)
+ {
+ VCFReader reader = null;
+
+ try
+ {
+ reader = new VCFReader(filePath);
+
+ VCFHeader header = reader.getFileHeader();
+ VCFHeaderLine ref = header
+ .getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
+ // check if reference is wrt assembly19 (GRCh37)
+ // todo may need to allow user to specify reference assembly?
+ boolean isRefGrch37 = (ref != null && ref.getValue().contains(
+ "assembly19"));
+
+ int varCount = 0;
+ int seqCount = 0;
+
+ for (SequenceI seq : al.getSequences())
+ {
+ int added = loadVCF(seq, reader, isRefGrch37);
+ if (added > 0)
+ {
+ seqCount++;
+ varCount += added;
+ }
+ }
+ System.out.println(String.format(
+"Added %d VCF variants to %d sequence(s)", varCount,
+ seqCount));
+
+ reader.close();
+ } catch (Throwable e)
+ {
+ System.err.println("Error processing VCF: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Tries to add overlapping variants read from a VCF file to the given
+ * sequence, and returns the number of overlapping variants found. Note that
+ * this requires the sequence to hold information as to its chromosomal
+ * positions and reference, in order to be able to map the VCF variants to the
+ * sequence.
+ *
+ * @param seq
+ * @param reader
+ * @param isVcfRefGrch37
+ * @return
+ */
+ protected int loadVCF(SequenceI seq, VCFReader reader, boolean isVcfRefGrch37)
+ {
+ int count = 0;
+ GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
+ if (seqCoords == null)
+ {
+ return 0;
+ }
+
+ /*
+ * fudge - ensure chromosomal mapping from range is sequence start/end
+ * (in case end == 0 when the mapping is first created)
+ */
+ MapList mapping = seqCoords.getMapping();
+ if (mapping.getFromRanges().get(0)[1] == 0)
+ {
+ mapping.getFromRanges().get(0)[1] = seq.getEnd();
+ }
+
+ List<int[]> seqChromosomalContigs = mapping.getToRanges();
+ for (int[] range : seqChromosomalContigs)
+ {
+ count += addVcfVariants(seq, reader, range, isVcfRefGrch37);
+ }
+
+ return count;
+ }
+
+ /**
+ * Queries the VCF reader for any variants that overlap the given chromosome
+ * region of the sequence, and adds as variant features. Returns the number of
+ * overlapping variants found.
+ *
+ * @param seq
+ * @param reader
+ * @param range
+ * start-end range of a sequence region in its chromosomal
+ * coordinates
+ * @param isVcfRefGrch37
+ * true if the VCF is with reference to GRCh37
+ * @return
+ */
+ protected int addVcfVariants(SequenceI seq, VCFReader reader,
+ int[] range, boolean isVcfRefGrch37)
+ {
+ GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
+
+ String chromosome = seqCoords.getChromosome();
+ String seqRef = seqCoords.getReference();
+ String species = seqCoords.getSpecies();
+
+ /*
+ * map chromosomal coordinates from GRCh38 (sequence) to
+ * GRCh37 (VCF) if necessary
+ */
+ int offset = 0;
+ if ("GRCh38".equalsIgnoreCase(seqRef) && isVcfRefGrch37)
+ {
+ int[] newRange = mapReferenceRange(range, chromosome, species,
+ "GRCh38",
+ "GRCh37");
+ offset = newRange[0] - range[0];
+ range = newRange;
+ }
+
+ /*
+ * query the VCF for overlaps
+ */
+ int count = 0;
+ MapList mapping = seqCoords.getMapping();
+
+ CloseableIterator<VariantContext> variants = reader.query(chromosome,
+ range[0], range[1]);
+ while (variants.hasNext())
+ {
+ /*
+ * get variant location in sequence chromosomal coordinates
+ */
+ VariantContext variant = variants.next();
+ count++;
+ int start = variant.getStart() - offset;
+ int end = variant.getEnd() - offset;
+
+ /*
+ * get location in sequence coordinates
+ */
+ int[] seqLocation = mapping.locateInFrom(start, end);
+ int featureStart = seqLocation[0];
+ int featureEnd = seqLocation[1];
+ addVariantFeatures(seq, variant, featureStart, featureEnd);
+ }
+
+ variants.close();
+
+ return count;
+ }
+
+ /**
+ * Inspects the VCF variant record, and adds variant features to the sequence
+ *
+ * @param seq
+ * @param variant
+ * @param featureStart
+ * @param featureEnd
+ */
+ protected void addVariantFeatures(SequenceI seq, VariantContext variant,
+ int featureStart, int featureEnd)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append(variant.getReference().getBaseString());
+
+ for (Allele allele : variant.getAlleles())
+ {
+ if (!allele.isReference())
+ {
+ sb.append(",").append(allele.getBaseString());
+ }
+ }
+ String alleles = sb.toString(); // e.g. G,A,C
+
+ String type = SequenceOntologyI.SEQUENCE_VARIANT;
+ SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
+ featureEnd, "VCF");
+
+ /*
+ * only add 'alleles' property if a SNP, as we can
+ * only handle SNPs when computing peptide variants
+ */
+ if (variant.isSNP())
+ {
+ sf.setValue("alleles", alleles);
+ }
+
+ Map<String, Object> atts = variant.getAttributes();
+ for (Entry<String, Object> att : atts.entrySet())
+ {
+ sf.setValue(att.getKey(), att.getValue());
+ }
+ seq.addSequenceFeature(sf);
+ }
+
+ /**
+ * Call the Ensembl REST service that maps from one assembly reference's
+ * coordinates to another's
+ *
+ * @param range
+ * @param chromosome
+ * @param species
+ * @param fromRef
+ * @param toRef
+ * @return
+ */
+ protected int[] mapReferenceRange(int[] range, String chromosome,
+ String species, String fromRef, String toRef)
+ {
+ // TODO call
+ // http://rest.ensembl.org/map/species/fromRef/chromosome:range[0]..range[1]:1/toRef?content-type=application/json
+ // and parse the JSON response
+
+ // 1922632 is the difference between 37 and 38 for chromosome 17
+ return new int[] { range[0] - 1922632, range[1] - 1922632 };
+ }
+}
// gnomAD exome variant dataset
private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
+ // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
/**
* A test to exercise some basic functionality of the htsjdk VCF reader
*
pw.close();
return f;
}
-
- // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
/**
* A 'test' that demonstrates querying an indexed VCF file for features in a
CloseableIterator<VariantContext> features = reader.query("17",
43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
- assertEquals(features.next().getStart(), 43138702);
- assertEquals(features.next().getStart(), 43138704);
- assertEquals(features.next().getStart(), 43138707);
- assertEquals(features.next().getStart(), 43138708);
- assertEquals(features.next().getStart(), 43138710);
- assertEquals(features.next().getStart(), 43138711);
+ assertEquals(printNext(features), 43138702);
+ assertEquals(printNext(features), 43138704);
+ assertEquals(printNext(features), 43138707);
+ assertEquals(printNext(features), 43138708);
+ assertEquals(printNext(features), 43138710);
+ assertEquals(printNext(features), 43138711);
assertFalse(features.hasNext());
features.close();
reader.close();
}
+
+ /**
+ * Prints the toString value of the next variant, and returns its start
+ * location
+ *
+ * @param features
+ * @return
+ */
+ protected int printNext(CloseableIterator<VariantContext> features)
+ {
+ VariantContext next = features.next();
+ System.out.println(next.toString());
+ return next.getStart();
+ }
}