From b87ae5ac68939a1b964682046e8b07958fae219a Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 4 Dec 2017 11:31:41 +0000 Subject: [PATCH] JAL-2845 locate reverse strand insertion as complement insertion at a preceding position --- src/jalview/io/vcf/VCFLoader.java | 26 ++++++++++- test/jalview/io/vcf/VCFLoaderTest.java | 80 ++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 32 deletions(-) diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 421cf38..9d98b7e 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -556,7 +556,15 @@ public class VCFLoader return 0; } - return addVcfVariants(seq, reader, vcfMap, vcfAssembly); + /* + * work with the dataset sequence here + */ + SequenceI dss = seq.getDatasetSequence(); + if (dss == null) + { + dss = seq; + } + return addVcfVariants(dss, reader, vcfMap, vcfAssembly); } /** @@ -892,10 +900,24 @@ public class VCFLoader String allele = alt.getBaseString(); /* + * insertion after a genomic base, if on reverse strand, has to be + * converted to insertion of complement after the preceding position + */ + int referenceLength = reference.length(); + if (!forwardStrand && allele.length() > referenceLength + && allele.startsWith(reference)) + { + featureStart -= referenceLength; + featureEnd = featureStart; + char insertAfter = seq.getCharAt(featureStart - seq.getStart()); + reference = Dna.reverseComplement(String.valueOf(insertAfter)); + allele = allele.substring(referenceLength) + reference; + } + + /* * build the ref,alt allele description e.g. "G,A", using the base * complement if the sequence is on the reverse strand */ - // FIXME correctly handle insertions on reverse strand JAL-2845 StringBuilder sb = new StringBuilder(); sb.append(forwardStrand ? reference : Dna.reverseComplement(reference)); sb.append(COMMA); diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java index 246337d..29cf9c9 100644 --- a/test/jalview/io/vcf/VCFLoaderTest.java +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -321,69 +321,89 @@ public class VCFLoaderTest /* * verify variant feature(s) added to gene2 - * gene/1-25 maps to chromosome 45051634- reverse strand - * variants A/T, A/C at 45051611 and G/GA,G/C at 45051613 map to - * T/A, T/G and C/TC,C/G at gene positions 24 and 22 respectively + * gene2/1-25 maps to chromosome 45051634- reverse strand */ List geneFeatures = al.getSequenceAt(2) .getSequenceFeatures(); SequenceFeatures.sortFeatures(geneFeatures, true); assertEquals(geneFeatures.size(), 4); - SequenceFeature sf = geneFeatures.get(0); - assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 22); - assertEquals(sf.getEnd(), 22); - assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 2.0e-03, DELTA); - assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); - sf = geneFeatures.get(1); + /* + * variant A/T at 45051611 maps to T/A at gene position 24 + */ + SequenceFeature sf = geneFeatures.get(3); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 22); - assertEquals(sf.getEnd(), 22); + assertEquals(sf.getBegin(), 24); + assertEquals(sf.getEnd(), 24); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.0e-03, DELTA); - assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getScore(), 5.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A"); + /* + * variant A/C at 45051611 maps to T/G at gene position 24 + */ sf = geneFeatures.get(2); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 24); assertEquals(sf.getEnd(), 24); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); assertEquals(sf.getScore(), 4.0e-03, DELTA); - assertEquals("T,G", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G"); - sf = geneFeatures.get(3); + /* + * variant G/C at 45051613 maps to C/G at gene position 22 + */ + sf = geneFeatures.get(1); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 24); - assertEquals(sf.getEnd(), 24); + assertEquals(sf.getBegin(), 22); + assertEquals(sf.getEnd(), 22); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 5.0e-03, DELTA); - assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getScore(), 2.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G"); + + /* + * insertion G/GA at 45051613 maps to an insertion at + * the preceding position (21) on reverse strand gene + * reference: CAAGC -> GCTTG/21-25 + * genomic variant: CAAGAC (G/GA) + * gene variant: GTCTTG (G/GT at 21) + */ + sf = geneFeatures.get(0); + assertEquals(sf.getFeatureGroup(), "VCF"); + assertEquals(sf.getBegin(), 21); + assertEquals(sf.getEnd(), 21); + assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT"); /* - * verify variant feature(s) added to transcript2 - * variants G/GA,G/C at position 22 of gene overlap and map to - * C/TC,C/G at position 17 of transcript + * verify 2 variant features added to transcript2 */ List transcriptFeatures = al.getSequenceAt(3) .getSequenceFeatures(); assertEquals(transcriptFeatures.size(), 2); + + /* + * insertion G/GT at position 21 of gene maps to position 16 of transcript + */ sf = transcriptFeatures.get(0); assertEquals(sf.getFeatureGroup(), "VCF"); - assertEquals(sf.getBegin(), 17); - assertEquals(sf.getEnd(), 17); + assertEquals(sf.getBegin(), 16); + assertEquals(sf.getEnd(), 16); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 2.0e-03, DELTA); - assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getScore(), 3.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT"); + /* + * SNP C/G at position 22 of gene maps to position 17 of transcript + */ sf = transcriptFeatures.get(1); assertEquals(sf.getFeatureGroup(), "VCF"); assertEquals(sf.getBegin(), 17); assertEquals(sf.getEnd(), 17); assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT); - assertEquals(sf.getScore(), 3.0e-03, DELTA); - assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES)); + assertEquals(sf.getScore(), 2.0e-03, DELTA); + assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G"); /* * verify variant feature(s) computed and added to protein -- 1.7.10.2