From: gmungoc Date: Tue, 26 Sep 2017 12:15:30 +0000 (+0100) Subject: JAL-2738 transfer VCF variant features to CDS sequences X-Git-Tag: Release_2_11_0~225 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=086ba94444cd5748b76a4236149af998022ed240;p=jalview.git JAL-2738 transfer VCF variant features to CDS sequences --- diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 6b1eb0d..6c4526f 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -36,6 +36,8 @@ import java.util.Map.Entry; */ public class VCFLoader { + private static final String FEATURE_GROUP_VCF = "VCF"; + private static final String EXCL = "!"; /* @@ -103,7 +105,7 @@ public class VCFLoader { seqCount++; varCount += added; - computePeptideVariants(seq); + transferAddedFeatures(seq); } } // long elapsed = System.currentTimeMillis() - start; @@ -133,16 +135,14 @@ public class VCFLoader } /** - * (Re-)computes peptide variants from dna variants, for any protein sequence - * to which the dna sequence has a mapping. Note that although duplicate - * features may get computed, they will not be added, since duplicate sequence - * features are ignored in Sequence.addSequenceFeature. + * Transfers VCF features to sequences to which this sequence has a mapping. + * If the mapping is 1:3, computes peptide variants from nucleotide variants. * - * @param dnaSeq + * @param seq */ - protected void computePeptideVariants(SequenceI dnaSeq) + protected void transferAddedFeatures(SequenceI seq) { - DBRefEntry[] dbrefs = dnaSeq.getDBRefs(); + DBRefEntry[] dbrefs = seq.getDBRefs(); if (dbrefs == null) { return; @@ -150,13 +150,36 @@ public class VCFLoader for (DBRefEntry dbref : dbrefs) { Mapping mapping = dbref.getMap(); - if (mapping == null || mapping.getTo() == null - || mapping.getMap().getFromRatio() != 3) + if (mapping == null || mapping.getTo() == null) { continue; } - AlignmentUtils.computeProteinFeatures(dnaSeq, mapping.getTo(), - mapping.getMap()); + + SequenceI mapTo = mapping.getTo(); + MapList map = mapping.getMap(); + if (map.getFromRatio() == 3) + { + /* + * dna-to-peptide product mapping + */ + AlignmentUtils.computeProteinFeatures(seq, mapTo, map); + } + else + { + /* + * nucleotide-to-nucleotide mapping e.g. transcript to CDS + */ + // TODO no DBRef to CDS is added to transcripts + List features = seq.getFeatures() + .getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT); + for (SequenceFeature sf : features) + { + if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup())) + { + transferFeature(sf, mapTo, map); + } + } + } } } @@ -363,7 +386,7 @@ public class VCFLoader String type = SequenceOntologyI.SEQUENCE_VARIANT; SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, - featureEnd, score, "VCF"); + featureEnd, score, FEATURE_GROUP_VCF); sf.setValue(Gff3Helper.ALLELES, alleles); @@ -503,6 +526,32 @@ public class VCFLoader } /** + * Transfers the sequence feature to the target sequence, locating its start + * and end range based on the mapping. Features which do not overlap the + * target sequence are ignored. + * + * @param sf + * @param targetSequence + * @param mapping + * mapping from the feature's coordinates to the target sequence + */ + protected void transferFeature(SequenceFeature sf, + SequenceI targetSequence, MapList mapping) + { + int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd()); + + if (mappedRange != null) + { + String group = sf.getFeatureGroup(); + int newBegin = Math.min(mappedRange[0], mappedRange[1]); + int newEnd = Math.max(mappedRange[0], mappedRange[1]); + SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, + group, sf.getScore()); + targetSequence.addSequenceFeature(copy); + } + } + + /** * Formats a ranges map lookup key * * @param chromosome