From: gmungoc Date: Mon, 13 Nov 2017 16:57:02 +0000 (+0000) Subject: JAL-2738 stubs for filtering VEP and VCF fields of interest X-Git-Tag: Release_2_11_0~136 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d8d9f50810cf347315bab5b7267416b3e5a8290d;p=jalview.git JAL-2738 stubs for filtering VEP and VCF fields of interest --- diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 5adc55c..e7c5a34 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -61,16 +61,14 @@ public class VCFLoader /* * default VCF INFO key for VEP consequence data * NB this can be overridden running VEP with --vcf_info_field - * - we don't handle this case (require CSQ identifier) + * - we don't handle this case (require identifier to be CSQ) */ - private static final String CSQ = "CSQ"; + private static final String CSQ_FIELD = "CSQ"; /* - * separator for fields in consequence data + * separator for fields in consequence data is '|' */ - private static final String PIPE = "|"; - - private static final String PIPE_REGEX = "\\" + PIPE; + private static final String PIPE_REGEX = "\\|"; /* * key for Allele Frequency output by VEP @@ -126,6 +124,10 @@ public class VCFLoader */ private String sourceId; + List vcfFieldsOfInterest; + + List vepFieldsOfInterest; + /** * Constructor given an alignment context * @@ -253,11 +255,13 @@ public class VCFLoader * Reads metadata (such as INFO field descriptions and datatypes) and saves * them for future reference * - * @param sourceId + * @param theSourceId */ - void saveMetadata(String sourceId) + void saveMetadata(String theSourceId) { - FeatureSource metadata = new FeatureSource(sourceId); + vcfFieldsOfInterest = new ArrayList<>(); + + FeatureSource metadata = new FeatureSource(theSourceId); for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) { @@ -285,9 +289,39 @@ public class VCFLoader } metadata.setAttributeName(attributeId, desc); metadata.setAttributeType(attributeId, attType); + + if (isVcfFieldWanted(attributeId)) + { + vcfFieldsOfInterest.add(attributeId); + } } - FeatureSources.getInstance().addSource(sourceId, metadata); + FeatureSources.getInstance().addSource(theSourceId, metadata); + } + + /** + * Answers true if the VCF id is one we wish to capture in Jalview, else false + * + * @param id + * @return + */ + private boolean isVcfFieldWanted(String id) + { + // TODO option to match patterns in a Preferences entry? + return true; + } + + /** + * Answers true if the VEP (CSQ) id is one we wish to capture in Jalview, else + * false + * + * @param id + * @return + */ + private boolean isVepFieldWanted(String id) + { + // TODO option to match patterns in a Preferences entry? + return true; } /** @@ -298,7 +332,9 @@ public class VCFLoader */ protected void locateCsqFields() { - VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ); + vepFieldsOfInterest = new ArrayList<>(); + + VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD); if (csqInfo == null) { return; @@ -332,6 +368,12 @@ public class VCFLoader { csqFeatureFieldIndex = index; } + + if (isVepFieldWanted(field)) + { + vepFieldsOfInterest.add(field); + } + index++; } } @@ -702,13 +744,21 @@ public class VCFLoader * extract Consequence data (if present) that we are able to * associated with the allele for this variant feature */ - if (CSQ.equals(key)) + if (CSQ_FIELD.equals(key)) { addConsequences(variant, seq, sf, altAlelleIndex); continue; } /* + * filter out fields we don't want to capture + */ + if (!vcfFieldsOfInterest.contains(key)) + { + continue; + } + + /* * we extract values for other data which are allele-specific; * these may be per alternate allele (INFO[key].Number = 'A') * or per allele including reference (INFO[key].Number = 'R') @@ -775,7 +825,7 @@ public class VCFLoader protected void addConsequences(VariantContext variant, SequenceI seq, SequenceFeature sf, int altAlelleIndex) { - Object value = variant.getAttribute(CSQ); + Object value = variant.getAttribute(CSQ_FIELD); if (value == null || !(value instanceof ArrayList)) { @@ -811,6 +861,8 @@ public class VCFLoader StringBuilder sb = new StringBuilder(128); boolean found = false; + // todo check against vepFieldsOfInterest as well somewhere + for (String consequence : consequences) { String[] csqFields = consequence.split(PIPE_REGEX); @@ -829,7 +881,7 @@ public class VCFLoader if (found) { - sf.setValue(CSQ, sb.toString()); + sf.setValue(CSQ_FIELD, sb.toString()); } }