From 0ce82a282e2136977f7d403026840d3eed2e8671 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 4 Dec 2017 09:21:09 +0000 Subject: [PATCH] JAL-2835 small refactor to only extract transcript consequence once --- src/jalview/io/vcf/VCFLoader.java | 63 +++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 20e3ccd..421cf38 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -902,7 +902,24 @@ public class VCFLoader sb.append(forwardStrand ? allele : Dna.reverseComplement(allele)); String alleles = sb.toString(); // e.g. G,A - String type = getOntologyTerm(seq, variant, altAlleleIndex); + /* + * pick out the consequence data (if any) that is for the current allele + * and feature (transcript) that matches the current sequence + */ + String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD, + altAlleleIndex, csqAlleleFieldIndex, + csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), + csqFeatureFieldIndex); + + /* + * pick out the ontology term for the consequence type + */ + String type = SequenceOntologyI.SEQUENCE_VARIANT; + if (consequence != null) + { + type = getOntologyTerm(seq, variant, altAlleleIndex, + consequence); + } float score = getAlleleFrequency(variant, altAlleleIndex); @@ -912,7 +929,7 @@ public class VCFLoader sf.setValue(Gff3Helper.ALLELES, alleles); - addAlleleProperties(variant, seq, sf, altAlleleIndex); + addAlleleProperties(variant, seq, sf, altAlleleIndex, consequence); seq.addSequenceFeature(sf); @@ -931,11 +948,12 @@ public class VCFLoader * @param seq * @param variant * @param altAlleleIndex + * @param consequence * @return * @see http://www.sequenceontology.org/browser/current_svn/term/SO:0001060 */ String getOntologyTerm(SequenceI seq, VariantContext variant, - int altAlleleIndex) + int altAlleleIndex, String consequence) { String type = SequenceOntologyI.SEQUENCE_VARIANT; @@ -951,10 +969,6 @@ public class VCFLoader * can we associate Consequence data with this allele and feature (transcript)? * if so, prefer the consequence term from that data */ - String consequence = getConsequenceForAlleleAndFeature(variant, - CSQ_FIELD, - altAlleleIndex, csqAlleleFieldIndex, csqAlleleNumberFieldIndex, - seq.getName().toLowerCase(), csqFeatureFieldIndex); if (consequence != null) { String[] csqFields = consequence.split(PIPE_REGEX); @@ -1089,9 +1103,12 @@ public class VCFLoader * @param sf * @param altAlelleIndex * (0, 1..) + * @param consequence + * if not null, the consequence specific to this sequence (transcript + * feature) and allele */ protected void addAlleleProperties(VariantContext variant, SequenceI seq, - SequenceFeature sf, final int altAlelleIndex) + SequenceFeature sf, final int altAlelleIndex, String consequence) { Map atts = variant.getAttributes(); @@ -1105,7 +1122,7 @@ public class VCFLoader */ if (CSQ_FIELD.equals(key)) { - addConsequences(variant, seq, sf, altAlelleIndex); + addConsequences(variant, seq, sf, consequence); continue; } @@ -1163,36 +1180,22 @@ public class VCFLoader /** * Inspects CSQ data blocks (consequences) and adds attributes on the sequence - * feature for the current allele (and transcript if applicable) - *

- * Allele matching: if field ALLELE_NUM is present, it must match - * altAlleleIndex. If not present, then field Allele value must match the VCF - * Allele. + * feature. *

- * Transcript matching: if sequence name can be identified to at least one of - * the consequences' Feature values, then select only consequences that match - * the value (i.e. consequences for the current transcript sequence). If not, - * take all consequences (this is the case when adding features to the gene - * sequence). + * If myConsequence is not null, then this is the specific + * consequence data (pipe-delimited fields) that is for the current allele and + * transcript (sequence) being processed) * * @param variant * @param seq * @param sf - * @param altAlleleIndex - * (0, 1..) + * @param myConsequence */ protected void addConsequences(VariantContext variant, SequenceI seq, - SequenceFeature sf, int altAlleleIndex) + SequenceFeature sf, String myConsequence) { - /* - * first try to identify the matching consequence - */ - String myConsequence = getConsequenceForAlleleAndFeature(variant, - CSQ_FIELD, altAlleleIndex, csqAlleleFieldIndex, - csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), - csqFeatureFieldIndex); - Object value = variant.getAttribute(CSQ_FIELD); + // TODO if CSQ not present, try ANN (for SnpEff consequence data)? if (value == null || !(value instanceof List)) { -- 1.7.10.2