From f5ddcb62310a9f32545231d0c5e1d7c364bfbb53 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 25 Sep 2017 16:38:51 +0100 Subject: [PATCH] JAL-2738 use complement base for a VCF variant on reverse strand gene --- src/jalview/ext/ensembl/EnsemblGene.java | 7 +- src/jalview/io/vcf/VCFLoader.java | 44 ++++++------ test/jalview/io/vcf/VCFLoaderTest.java | 109 +++++++++++++++++++++++++++--- 3 files changed, 128 insertions(+), 32 deletions(-) diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index f975ac8..45e8e34 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -429,7 +429,7 @@ public class EnsemblGene extends EnsemblSeqProxy protected void mapTranscriptToChromosome(Sequence transcript, SequenceI gene, MapList mapping) { - GeneLoci loci = ((Sequence) gene).getGeneLoci(); + GeneLoci loci = gene.getGeneLoci(); if (loci == null) { return; @@ -448,8 +448,9 @@ public class EnsemblGene extends EnsemblSeqProxy List exons = mapping.getFromRanges(); List transcriptLoci = new ArrayList<>(); - - for (int[] exon : exons) { + + for (int[] exon : exons) + { transcriptLoci.add(geneMapping.locateInTo(exon[0], exon[1])); } diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 13966f4..44e8e16 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -7,12 +7,12 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import jalview.analysis.AlignmentUtils; +import jalview.analysis.Dna; import jalview.api.AlignViewControllerGuiI; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.GeneLoci; import jalview.datamodel.Mapping; -import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.ext.ensembl.EnsemblMap; @@ -176,7 +176,7 @@ public class VCFLoader boolean isVcfRefGrch37) { int count = 0; - GeneLoci seqCoords = ((Sequence) seq).getGeneLoci(); + GeneLoci seqCoords = seq.getGeneLoci(); if (seqCoords == null) { return 0; @@ -208,7 +208,7 @@ public class VCFLoader protected int addVcfVariants(SequenceI seq, VCFReader reader, int[] range, boolean isVcfRefGrch37) { - GeneLoci seqCoords = ((Sequence) seq).getGeneLoci(); + GeneLoci seqCoords = seq.getGeneLoci(); String chromosome = seqCoords.chromosome; String seqRef = seqCoords.assembly; @@ -243,6 +243,8 @@ public class VCFLoader range = newRange; } + boolean forwardStrand = range[0] <= range[1]; + /* * query the VCF for overlaps * (convert a reverse strand range to forwards) @@ -281,7 +283,8 @@ public class VCFLoader int[] seqLocation = mapping.locateInFrom(start, end); if (seqLocation != null) { - addVariantFeatures(seq, variant, seqLocation[0], seqLocation[1]); + addVariantFeatures(seq, variant, seqLocation[0], seqLocation[1], + forwardStrand); } } @@ -293,17 +296,21 @@ public class VCFLoader /** * Inspects the VCF variant record, and adds variant features to the sequence. * Only SNP variants are added, not INDELs. + *

+ * If the sequence maps to the reverse strand of the chromosome, reference and + * variant bases are recorded as their complements (C/G, A/T). * * @param seq * @param variant * @param featureStart * @param featureEnd + * @param forwardStrand */ protected void addVariantFeatures(SequenceI seq, VariantContext variant, - int featureStart, int featureEnd) + int featureStart, int featureEnd, boolean forwardStrand) { - String reference = variant.getReference().getBaseString(); - if (reference.length() != 1) + byte[] reference = variant.getReference().getBases(); + if (reference.length != 1) { /* * sorry, we don't handle INDEL variants @@ -314,8 +321,7 @@ public class VCFLoader /* * for now we extract allele frequency as feature score; note * this attribute is String for a simple SNP, but List if - * multiple alleles at the locus; we extract for the simple case only, - * since not sure how to match allele order with AF values + * multiple alleles at the locus; we extract for the simple case only */ Object af = variant.getAttribute("AF"); float score = 0f; @@ -331,27 +337,25 @@ public class VCFLoader } StringBuilder sb = new StringBuilder(); - sb.append(reference); + sb.append(forwardStrand ? reference : Dna + .getComplement((char) reference[0])); /* * inspect alleles and record SNP variants (as the variant * record could be MIXED and include INDEL and SNP alleles) - */ - int alleleCount = 0; - - /* - * inspect alleles; warning: getAlleles gives no guarantee - * as to the order in which they are returned + * warning: getAlleles gives no guarantee as to the order + * in which they are returned */ for (Allele allele : variant.getAlleles()) { if (!allele.isReference()) { - String alleleBase = allele.getBaseString(); - if (alleleBase.length() == 1) + byte[] alleleBase = allele.getBases(); + if (alleleBase.length == 1) { - sb.append(",").append(alleleBase); - alleleCount++; + sb.append(",").append( + forwardStrand ? alleleBase : Dna + .getComplement((char) alleleBase[0])); } } } diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java index 7319123..db72941 100644 --- a/test/jalview/io/vcf/VCFLoaderTest.java +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -55,23 +55,35 @@ public class VCFLoaderTest /* * verify variant feature(s) added to gene */ - List geneFeatures = al.getSequenceAt(0).findFeatures( - 2, 2); - assertEquals(geneFeatures.size(), 1); + List geneFeatures = al.getSequenceAt(0) + .getSequenceFeatures(); + assertEquals(geneFeatures.size(), 2); SequenceFeature sf = geneFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES)); + sf = geneFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + /* * verify variant feature(s) added to transcript */ List transcriptFeatures = al.getSequenceAt(1) - .findFeatures(4, 4); + .getSequenceFeatures(); assertEquals(transcriptFeatures.size(), 1); sf = transcriptFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); @@ -82,12 +94,12 @@ public class VCFLoaderTest */ SequenceI peptide = al.getSequenceAt(1) .getDBRefs()[0].getMap().getTo(); - List proteinFeatures = peptide.findFeatures(1, 6); + List proteinFeatures = peptide.getSequenceFeatures(); assertEquals(proteinFeatures.size(), 1); sf = proteinFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 2); - assertEquals(sf.getEnd(), 2); + assertEquals(sf.getBegin(), 1); + assertEquals(sf.getEnd(), 1); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getDescription(), "p.Ser1Thr"); } @@ -155,10 +167,89 @@ public class VCFLoaderTest return alignment; } + /** + * Test with 'gene' and 'transcript' mapped to the reverse strand of the + * chromosome. The VCF variant positions (in forward coordinates) should get + * correctly located on sequence positions. + * + * @throws IOException + */ @Test(groups = "Functional") public void testLoadVCF_reverseStrand() throws IOException { - // TODO a test with reverse strand mapping of - // gene and transcript to chromosome + AlignmentI al = buildAlignment(); + + /* + * invert forward to reverse strand mappings + */ + List to = al.getSequenceAt(0).getGeneLoci().mapping + .getToRanges(); + int temp = to.get(0)[0]; + to.get(0)[0] = to.get(0)[1]; + to.get(0)[1] = temp; + to = al.getSequenceAt(1).getGeneLoci().mapping.getToRanges(); + to.get(0)[0] = to.get(0)[1]; + to.get(0)[1] = temp; + to.get(1)[0] = to.get(1)[1]; + to.get(1)[1] = temp; + int[] tmp2 = to.get(0); + to.set(0, to.get(1)); + to.set(1, tmp2); + + VCFLoader loader = new VCFLoader(al); + + File f = makeVcf(); + + loader.loadVCF(f.getPath(), null); + + /* + * verify variant feature(s) added to gene + */ + List geneFeatures = al.getSequenceAt(0) + .getSequenceFeatures(); + assertEquals(geneFeatures.size(), 2); + SequenceFeature sf = geneFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 5.08130e-03, 0.000001f); + assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES)); + + sf = geneFeatures.get(1); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 4); + assertEquals(sf.getEnd(), 4); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + + /* + * verify variant feature(s) added to transcript + */ + List transcriptFeatures = al.getSequenceAt(1) + .getSequenceFeatures(); + assertEquals(transcriptFeatures.size(), 1); + sf = transcriptFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 2); + assertEquals(sf.getEnd(), 2); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.08130e-03, 0.000001f); + assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES)); + + /* + * verify variant feature(s) computed and added to protein + * first codon AGC varies to ACC giving S/T + */ + SequenceI peptide = al.getSequenceAt(1).getDBRefs()[0].getMap().getTo(); + List proteinFeatures = peptide.getSequenceFeatures(); + assertEquals(proteinFeatures.size(), 1); + sf = proteinFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 1); + assertEquals(sf.getEnd(), 1); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getDescription(), "p.Ser1Thr"); } } -- 1.7.10.2