/*
* default VCF INFO key for VEP consequence data
* NB this can be overridden running VEP with --vcf_info_field
- * - we don't handle this case (require CSQ identifier)
+ * - we don't handle this case (require identifier to be CSQ)
*/
- private static final String CSQ = "CSQ";
+ private static final String CSQ_FIELD = "CSQ";
/*
- * separator for fields in consequence data
+ * separator for fields in consequence data is '|'
*/
- private static final String PIPE = "|";
-
- private static final String PIPE_REGEX = "\\" + PIPE;
+ private static final String PIPE_REGEX = "\\|";
/*
* key for Allele Frequency output by VEP
*/
private String sourceId;
+ List<String> vcfFieldsOfInterest;
+
+ List<String> vepFieldsOfInterest;
+
/**
* Constructor given an alignment context
*
* Reads metadata (such as INFO field descriptions and datatypes) and saves
* them for future reference
*
- * @param sourceId
+ * @param theSourceId
*/
- void saveMetadata(String sourceId)
+ void saveMetadata(String theSourceId)
{
- FeatureSource metadata = new FeatureSource(sourceId);
+ vcfFieldsOfInterest = new ArrayList<>();
+
+ FeatureSource metadata = new FeatureSource(theSourceId);
for (VCFInfoHeaderLine info : header.getInfoHeaderLines())
{
}
metadata.setAttributeName(attributeId, desc);
metadata.setAttributeType(attributeId, attType);
+
+ if (isVcfFieldWanted(attributeId))
+ {
+ vcfFieldsOfInterest.add(attributeId);
+ }
}
- FeatureSources.getInstance().addSource(sourceId, metadata);
+ FeatureSources.getInstance().addSource(theSourceId, metadata);
+ }
+
+ /**
+ * Answers true if the VCF id is one we wish to capture in Jalview, else false
+ *
+ * @param id
+ * @return
+ */
+ private boolean isVcfFieldWanted(String id)
+ {
+ // TODO option to match patterns in a Preferences entry?
+ return true;
+ }
+
+ /**
+ * Answers true if the VEP (CSQ) id is one we wish to capture in Jalview, else
+ * false
+ *
+ * @param id
+ * @return
+ */
+ private boolean isVepFieldWanted(String id)
+ {
+ // TODO option to match patterns in a Preferences entry?
+ return true;
}
/**
*/
protected void locateCsqFields()
{
- VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ);
+ vepFieldsOfInterest = new ArrayList<>();
+
+ VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD);
if (csqInfo == null)
{
return;
{
csqFeatureFieldIndex = index;
}
+
+ if (isVepFieldWanted(field))
+ {
+ vepFieldsOfInterest.add(field);
+ }
+
index++;
}
}
* extract Consequence data (if present) that we are able to
* associated with the allele for this variant feature
*/
- if (CSQ.equals(key))
+ if (CSQ_FIELD.equals(key))
{
addConsequences(variant, seq, sf, altAlelleIndex);
continue;
}
/*
+ * filter out fields we don't want to capture
+ */
+ if (!vcfFieldsOfInterest.contains(key))
+ {
+ continue;
+ }
+
+ /*
* we extract values for other data which are allele-specific;
* these may be per alternate allele (INFO[key].Number = 'A')
* or per allele including reference (INFO[key].Number = 'R')
protected void addConsequences(VariantContext variant, SequenceI seq,
SequenceFeature sf, int altAlelleIndex)
{
- Object value = variant.getAttribute(CSQ);
+ Object value = variant.getAttribute(CSQ_FIELD);
if (value == null || !(value instanceof ArrayList<?>))
{
StringBuilder sb = new StringBuilder(128);
boolean found = false;
+ // todo check against vepFieldsOfInterest as well somewhere
+
for (String consequence : consequences)
{
String[] csqFields = consequence.split(PIPE_REGEX);
if (found)
{
- sf.setValue(CSQ, sb.toString());
+ sf.setValue(CSQ_FIELD, sb.toString());
}
}