+ SequenceI mapTo = mapping.getTo();
+ MapList map = mapping.getMap();
+ if (map.getFromRatio() == 3)
+ {
+ /*
+ * dna-to-peptide product mapping
+ */
+ // JAL-3187 render on the fly instead
+ // AlignmentUtils.computeProteinFeatures(seq, mapTo, map);
+ }
+ else
+ {
+ /*
+ * nucleotide-to-nucleotide mapping e.g. transcript to CDS
+ */
+ List<SequenceFeature> features = seq.getFeatures()
+ .getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT);
+ for (SequenceFeature sf : features)
+ {
+ if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup()))
+ {
+ transferFeature(sf, mapTo, map);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Tries to add overlapping variants read from a VCF file to the given sequence,
+ * and returns the number of variant features added
+ *
+ * @param seq
+ * @return
+ */
+ protected int loadSequenceVCF(SequenceI seq)
+ {
+ VCFMap vcfMap = getVcfMap(seq);
+ if (vcfMap == null)
+ {
+ return 0;
+ }
+
+ /*
+ * work with the dataset sequence here
+ */
+ SequenceI dss = seq.getDatasetSequence();
+ if (dss == null)
+ {
+ dss = seq;
+ }
+ return addVcfVariants(dss, vcfMap);
+ }
+
+ /**
+ * Answers a map from sequence coordinates to VCF chromosome ranges
+ *
+ * @param seq
+ * @return
+ */
+ private VCFMap getVcfMap(SequenceI seq)
+ {
+ /*
+ * simplest case: sequence has id and length matching a VCF contig
+ */
+ VCFMap vcfMap = null;
+ if (dictionary != null)
+ {
+ vcfMap = getContigMap(seq);
+ }
+ if (vcfMap != null)
+ {
+ return vcfMap;
+ }
+
+ /*
+ * otherwise, map to VCF from chromosomal coordinates
+ * of the sequence (if known)
+ */
+ GeneLociI seqCoords = seq.getGeneLoci();
+ if (seqCoords == null)
+ {
+ Cache.log.warn(String.format(
+ "Can't query VCF for %s as chromosome coordinates not known",
+ seq.getName()));
+ return null;
+ }
+
+ String species = seqCoords.getSpeciesId();
+ String chromosome = seqCoords.getChromosomeId();
+ String seqRef = seqCoords.getAssemblyId();
+ MapList map = seqCoords.getMapping();
+
+ // note this requires the configured species to match that
+ // returned with the Ensembl sequence; todo: support aliases?
+ if (!vcfSpecies.equalsIgnoreCase(species))
+ {
+ Cache.log.warn("No VCF loaded to " + seq.getName()
+ + " as species not matched");
+ return null;
+ }
+
+ if (seqRef.equalsIgnoreCase(vcfAssembly))
+ {
+ return new VCFMap(chromosome, map);
+ }
+
+ /*
+ * VCF data has a different reference assembly to the sequence:
+ * query Ensembl to map chromosomal coordinates from sequence to VCF
+ */
+ List<int[]> toVcfRanges = new ArrayList<>();
+ List<int[]> fromSequenceRanges = new ArrayList<>();
+
+ for (int[] range : map.getToRanges())
+ {
+ int[] fromRange = map.locateInFrom(range[0], range[1]);
+ if (fromRange == null)
+ {
+ // corrupted map?!?
+ continue;
+ }
+
+ int[] newRange = mapReferenceRange(range, chromosome, "human", seqRef,
+ vcfAssembly);
+ if (newRange == null)
+ {
+ Cache.log.error(
+ String.format("Failed to map %s:%s:%s:%d:%d to %s", species,
+ chromosome, seqRef, range[0], range[1],
+ vcfAssembly));
+ continue;
+ }
+ else
+ {
+ toVcfRanges.add(newRange);
+ fromSequenceRanges.add(fromRange);
+ }
+ }
+
+ return new VCFMap(chromosome,
+ new MapList(fromSequenceRanges, toVcfRanges, 1, 1));
+ }
+
+ /**
+ * If the sequence id matches a contig declared in the VCF file, and the
+ * sequence length matches the contig length, then returns a 1:1 map of the
+ * sequence to the contig, else returns null
+ *
+ * @param seq
+ * @return
+ */
+ private VCFMap getContigMap(SequenceI seq)
+ {
+ String id = seq.getName();
+ SAMSequenceRecord contig = dictionary.getSequence(id);
+ if (contig != null)
+ {
+ int len = seq.getLength();
+ if (len == contig.getSequenceLength())
+ {
+ MapList map = new MapList(new int[] { 1, len },
+ new int[]
+ { 1, len }, 1, 1);
+ return new VCFMap(id, map);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Queries the VCF reader for any variants that overlap the mapped chromosome
+ * ranges of the sequence, and adds as variant features. Returns the number of
+ * overlapping variants found.
+ *
+ * @param seq
+ * @param map
+ * mapping from sequence to VCF coordinates
+ * @return
+ */
+ protected int addVcfVariants(SequenceI seq, VCFMap map)
+ {
+ boolean forwardStrand = map.map.isToForwardStrand();
+
+ /*
+ * query the VCF for overlaps of each contiguous chromosomal region
+ */
+ int count = 0;
+
+ for (int[] range : map.map.getToRanges())
+ {
+ int vcfStart = Math.min(range[0], range[1]);
+ int vcfEnd = Math.max(range[0], range[1]);
+ try
+ {
+ CloseableIterator<VariantContext> variants = reader
+ .query(map.chromosome, vcfStart, vcfEnd);
+ while (variants.hasNext())
+ {
+ VariantContext variant = variants.next();
+
+ int[] featureRange = map.map.locateInFrom(variant.getStart(),
+ variant.getEnd());
+
+ if (featureRange != null)
+ {
+ int featureStart = Math.min(featureRange[0], featureRange[1]);
+ int featureEnd = Math.max(featureRange[0], featureRange[1]);
+ count += addAlleleFeatures(seq, variant, featureStart,
+ featureEnd, forwardStrand);
+ }
+ }
+ variants.close();
+ } catch (TribbleException e)
+ {
+ /*
+ * RuntimeException throwable by htsjdk
+ */
+ String msg = String.format("Error reading VCF for %s:%d-%d: %s ",
+ map.chromosome, vcfStart, vcfEnd);
+ Cache.log.error(msg);
+ }
+ }
+
+ return count;
+ }
+
+ /**
+ * A convenience method to get an attribute value for an alternate allele
+ *
+ * @param variant
+ * @param attributeName
+ * @param alleleIndex
+ * @return
+ */
+ protected String getAttributeValue(VariantContext variant,
+ String attributeName, int alleleIndex)
+ {
+ Object att = variant.getAttribute(attributeName);
+
+ if (att instanceof String)
+ {
+ return (String) att;
+ }
+ else if (att instanceof ArrayList)
+ {
+ return ((List<String>) att).get(alleleIndex);
+ }
+
+ return null;
+ }
+
+ /**
+ * Adds one variant feature for each allele in the VCF variant record, and
+ * returns the number of features added.
+ *
+ * @param seq
+ * @param variant
+ * @param featureStart
+ * @param featureEnd
+ * @param forwardStrand
+ * @return
+ */
+ protected int addAlleleFeatures(SequenceI seq, VariantContext variant,
+ int featureStart, int featureEnd, boolean forwardStrand)
+ {
+ int added = 0;
+
+ /*
+ * Javadoc says getAlternateAlleles() imposes no order on the list returned
+ * so we proceed defensively to get them in strict order
+ */
+ int altAlleleCount = variant.getAlternateAlleles().size();
+ for (int i = 0; i < altAlleleCount; i++)
+ {
+ added += addAlleleFeature(seq, variant, i, featureStart, featureEnd,
+ forwardStrand);
+ }
+ return added;
+ }
+
+ /**
+ * Inspects one allele and attempts to add a variant feature for it to the
+ * sequence. The additional data associated with this allele is extracted to
+ * store in the feature's key-value map. Answers the number of features added (0
+ * or 1).
+ *
+ * @param seq
+ * @param variant
+ * @param altAlleleIndex
+ * (0, 1..)
+ * @param featureStart
+ * @param featureEnd
+ * @param forwardStrand
+ * @return
+ */
+ protected int addAlleleFeature(SequenceI seq, VariantContext variant,
+ int altAlleleIndex, int featureStart, int featureEnd,
+ boolean forwardStrand)
+ {
+ String reference = variant.getReference().getBaseString();
+ Allele alt = variant.getAlternateAllele(altAlleleIndex);
+ String allele = alt.getBaseString();
+
+ /*
+ * insertion after a genomic base, if on reverse strand, has to be
+ * converted to insertion of complement after the preceding position
+ */
+ int referenceLength = reference.length();
+ if (!forwardStrand && allele.length() > referenceLength
+ && allele.startsWith(reference))
+ {
+ featureStart -= referenceLength;
+ featureEnd = featureStart;
+ char insertAfter = seq.getCharAt(featureStart - seq.getStart());
+ reference = Dna.reverseComplement(String.valueOf(insertAfter));
+ allele = allele.substring(referenceLength) + reference;
+ }
+
+ /*
+ * build the ref,alt allele description e.g. "G,A", using the base
+ * complement if the sequence is on the reverse strand
+ */
+ StringBuilder sb = new StringBuilder();
+ sb.append(forwardStrand ? reference : Dna.reverseComplement(reference));
+ sb.append(COMMA);
+ sb.append(forwardStrand ? allele : Dna.reverseComplement(allele));
+ String alleles = sb.toString(); // e.g. G,A
+
+ /*
+ * pick out the consequence data (if any) that is for the current allele
+ * and feature (transcript) that matches the current sequence
+ */
+ String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD,
+ altAlleleIndex, csqAlleleFieldIndex,
+ csqAlleleNumberFieldIndex, seq.getName().toLowerCase(),
+ csqFeatureFieldIndex);
+
+ /*
+ * pick out the ontology term for the consequence type
+ */
+ String type = SequenceOntologyI.SEQUENCE_VARIANT;
+ if (consequence != null)
+ {
+ type = getOntologyTerm(consequence);
+ }
+
+ SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
+ featureEnd, FEATURE_GROUP_VCF);
+ sf.setSource(sourceId);
+
+ /*
+ * save the derived alleles as a named attribute; this will be
+ * needed when Jalview computes derived peptide variants
+ */
+ addFeatureAttribute(sf, Gff3Helper.ALLELES, alleles);
+
+ /*
+ * add selected VCF fixed column data as feature attributes
+ */
+ addFeatureAttribute(sf, VCF_POS, String.valueOf(variant.getStart()));
+ addFeatureAttribute(sf, VCF_ID, variant.getID());
+ addFeatureAttribute(sf, VCF_QUAL,
+ String.valueOf(variant.getPhredScaledQual()));
+ addFeatureAttribute(sf, VCF_FILTER, getFilter(variant));
+
+ addAlleleProperties(variant, sf, altAlleleIndex, consequence);
+
+ seq.addSequenceFeature(sf);
+
+ return 1;
+ }
+
+ /**
+ * Answers the VCF FILTER value for the variant - or an approximation to it.
+ * This field is either PASS, or a semi-colon separated list of filters not
+ * passed. htsjdk saves filters as a HashSet, so the order when reassembled into
+ * a list may be different.
+ *
+ * @param variant
+ * @return
+ */
+ String getFilter(VariantContext variant)
+ {
+ Set<String> filters = variant.getFilters();
+ if (filters.isEmpty())
+ {
+ return NO_VALUE;
+ }
+ Iterator<String> iterator = filters.iterator();
+ String first = iterator.next();
+ if (filters.size() == 1)
+ {
+ return first;
+ }
+
+ StringBuilder sb = new StringBuilder(first);
+ while (iterator.hasNext())
+ {
+ sb.append(";").append(iterator.next());
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Adds one feature attribute unless the value is null, empty or '.'
+ *
+ * @param sf
+ * @param key
+ * @param value
+ */
+ void addFeatureAttribute(SequenceFeature sf, String key, String value)
+ {
+ if (value != null && !value.isEmpty() && !NO_VALUE.equals(value))
+ {
+ sf.setValue(key, value);
+ }
+ }
+
+ /**
+ * Determines the Sequence Ontology term to use for the variant feature type in
+ * Jalview. The default is 'sequence_variant', but a more specific term is used
+ * if:
+ * <ul>
+ * <li>VEP (or SnpEff) Consequence annotation is included in the VCF</li>
+ * <li>sequence id can be matched to VEP Feature (or SnpEff Feature_ID)</li>
+ * </ul>
+ *
+ * @param consequence
+ * @return
+ * @see http://www.sequenceontology.org/browser/current_svn/term/SO:0001060
+ */
+ String getOntologyTerm(String consequence)
+ {
+ String type = SequenceOntologyI.SEQUENCE_VARIANT;
+
+ /*
+ * could we associate Consequence data with this allele and feature (transcript)?
+ * if so, prefer the consequence term from that data
+ */
+ if (csqAlleleFieldIndex == -1) // && snpEffAlleleFieldIndex == -1
+ {
+ /*
+ * no Consequence data so we can't refine the ontology term
+ */
+ return type;
+ }
+
+ if (consequence != null)
+ {
+ String[] csqFields = consequence.split(PIPE_REGEX);
+ if (csqFields.length > csqConsequenceFieldIndex)
+ {
+ type = csqFields[csqConsequenceFieldIndex];
+ }
+ }
+ else
+ {
+ // todo the same for SnpEff consequence data matching if wanted
+ }
+
+ /*
+ * if of the form (e.g.) missense_variant&splice_region_variant,
+ * just take the first ('most severe') consequence
+ */
+ if (type != null)
+ {
+ int pos = type.indexOf('&');
+ if (pos > 0)
+ {
+ type = type.substring(0, pos);
+ }
+ }
+ return type;
+ }
+
+ /**
+ * Returns matched consequence data if it can be found, else null.
+ * <ul>
+ * <li>inspects the VCF data for key 'vcfInfoId'</li>
+ * <li>splits this on comma (to distinct consequences)</li>
+ * <li>returns the first consequence (if any) where</li>
+ * <ul>
+ * <li>the allele matches the altAlleleIndex'th allele of variant</li>
+ * <li>the feature matches the sequence name (e.g. transcript id)</li>
+ * </ul>
+ * </ul>
+ * If matched, the consequence is returned (as pipe-delimited fields).
+ *
+ * @param variant
+ * @param vcfInfoId
+ * @param altAlleleIndex
+ * @param alleleFieldIndex
+ * @param alleleNumberFieldIndex
+ * @param seqName
+ * @param featureFieldIndex
+ * @return
+ */
+ private String getConsequenceForAlleleAndFeature(VariantContext variant,
+ String vcfInfoId, int altAlleleIndex, int alleleFieldIndex,
+ int alleleNumberFieldIndex,
+ String seqName, int featureFieldIndex)
+ {
+ if (alleleFieldIndex == -1 || featureFieldIndex == -1)
+ {
+ return null;
+ }
+ Object value = variant.getAttribute(vcfInfoId);
+
+ if (value == null || !(value instanceof List<?>))
+ {
+ return null;
+ }
+
+ /*
+ * inspect each consequence in turn (comma-separated blocks
+ * extracted by htsjdk)
+ */
+ List<String> consequences = (List<String>) value;
+
+ for (String consequence : consequences)
+ {
+ String[] csqFields = consequence.split(PIPE_REGEX);
+ if (csqFields.length > featureFieldIndex)
+ {
+ String featureIdentifier = csqFields[featureFieldIndex];
+ if (featureIdentifier.length() > 4
+ && seqName.indexOf(featureIdentifier.toLowerCase()) > -1)
+ {
+ /*
+ * feature (transcript) matched - now check for allele match
+ */
+ if (matchAllele(variant, altAlleleIndex, csqFields,
+ alleleFieldIndex, alleleNumberFieldIndex))
+ {
+ return consequence;
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ private boolean matchAllele(VariantContext variant, int altAlleleIndex,
+ String[] csqFields, int alleleFieldIndex,
+ int alleleNumberFieldIndex)
+ {
+ /*
+ * if ALLELE_NUM is present, it must match altAlleleIndex
+ * NB first alternate allele is 1 for ALLELE_NUM, 0 for altAlleleIndex
+ */
+ if (alleleNumberFieldIndex > -1)
+ {
+ if (csqFields.length <= alleleNumberFieldIndex)
+ {
+ return false;
+ }
+ String alleleNum = csqFields[alleleNumberFieldIndex];
+ return String.valueOf(altAlleleIndex + 1).equals(alleleNum);
+ }
+
+ /*
+ * else consequence allele must match variant allele
+ */
+ if (alleleFieldIndex > -1 && csqFields.length > alleleFieldIndex)
+ {
+ String csqAllele = csqFields[alleleFieldIndex];
+ String vcfAllele = variant.getAlternateAllele(altAlleleIndex)
+ .getBaseString();
+ return csqAllele.equals(vcfAllele);
+ }
+ return false;
+ }
+
+ /**
+ * Add any allele-specific VCF key-value data to the sequence feature
+ *
+ * @param variant
+ * @param sf
+ * @param altAlelleIndex
+ * (0, 1..)
+ * @param consequence
+ * if not null, the consequence specific to this sequence (transcript
+ * feature) and allele
+ */
+ protected void addAlleleProperties(VariantContext variant,
+ SequenceFeature sf, final int altAlelleIndex, String consequence)
+ {