From 1ef254a2c95c4226d9dc3a77af6ffcdfed675b7c Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 25 Sep 2017 10:02:03 +0100 Subject: [PATCH] JAL-2738 extract SNP variants from SNP or MIXED variant records --- resources/lang/Messages.properties | 2 +- src/jalview/io/vcf/VCFLoader.java | 80 +++++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 42baf30..f5154f1 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -490,7 +490,7 @@ label.settings_for_type = Settings for {0} label.view_full_application = View in Full Application label.load_associated_tree = Load Associated Tree... label.load_features_annotations = Load Features/Annotations... -label.load_vcf = Load plain text or indexed VCF data +label.load_vcf = Load SNP variants from plain text or indexed VCF data label.load_vcf_file = Load VCF File label.export_features = Export Features... label.export_annotations = Export Annotations... diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 4adc97c..e725a22 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -259,6 +259,16 @@ public class VCFLoader * get variant location in sequence chromosomal coordinates */ VariantContext variant = variants.next(); + + /* + * we can only process SNP variants (which can be reported + * as part of a MIXED variant record + */ + if (!variant.isSNP() && !variant.isMixed()) + { + continue; + } + count++; int start = variant.getStart() - offset; int end = variant.getEnd() - offset; @@ -280,7 +290,8 @@ public class VCFLoader } /** - * Inspects the VCF variant record, and adds variant features to the sequence + * Inspects the VCF variant record, and adds variant features to the sequence. + * Only SNP variants are added, not INDELs. * * @param seq * @param variant @@ -290,48 +301,67 @@ public class VCFLoader protected void addVariantFeatures(SequenceI seq, VariantContext variant, int featureStart, int featureEnd) { - StringBuilder sb = new StringBuilder(); - sb.append(variant.getReference().getBaseString()); - - int alleleCount = 0; - for (Allele allele : variant.getAlleles()) + String reference = variant.getReference().getBaseString(); + if (reference.length() != 1) { - if (!allele.isReference()) - { - sb.append(",").append(allele.getBaseString()); - alleleCount++; - } + /* + * sorry, we don't handle INDEL variants + */ + return; } - String alleles = sb.toString(); // e.g. G,A,C - - String type = SequenceOntologyI.SEQUENCE_VARIANT; /* - * extract allele frequency as feature score, but only if - * a simple SNP (not for >1 co-located SNPs as each has a score) + * for now we extract allele frequency as feature score; note + * this attribute is String for a simple SNP, but List if + * multiple alleles at the locus; we extract for the simple case only, + * since not sure how to match allele order with AF values */ + Object af = variant.getAttribute("AF"); float score = 0f; - if (alleleCount == 1) + if (af instanceof String) { try { - score = (float) variant.getAttributeAsDouble("AF", 0d); + score = Float.parseFloat((String) af); } catch (NumberFormatException e) { - // leave score as 0 + // leave as 0 } } - SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, - featureEnd, score, "VCF"); + + StringBuilder sb = new StringBuilder(); + sb.append(reference); + + /* + * inspect alleles and record SNP variants (as the variant + * record could be MIXED and include INDEL and SNP alleles) + */ + int alleleCount = 0; /* - * only add 'alleles' property if a SNP, as we can - * only handle SNPs when computing peptide variants + * inspect alleles; warning: getAlleles gives no guarantee + * as to the order in which they are returned */ - if (variant.isSNP()) + for (Allele allele : variant.getAlleles()) { - sf.setValue("alleles", alleles); + if (!allele.isReference()) + { + String alleleBase = allele.getBaseString(); + if (alleleBase.length() == 1) + { + sb.append(",").append(alleleBase); + alleleCount++; + } + } } + String alleles = sb.toString(); // e.g. G,A,C + + String type = SequenceOntologyI.SEQUENCE_VARIANT; + + SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, + featureEnd, score, "VCF"); + + sf.setValue("alleles", alleles); Map atts = variant.getAttributes(); for (Entry att : atts.entrySet()) -- 1.7.10.2