X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=78d2ad58ed0f28814bda7ce6cf09063eadccafdd;hb=4b24105fbfd942b6ec58a407b9dc68a331ba13d7;hp=6b1eb0dcfe3bf65dae568da76563972ffdead6c7;hpb=4fa6faf64b0d16caffa71ac38506d473952478c5;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 6b1eb0d..78d2ad5 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -11,7 +11,7 @@ import jalview.analysis.Dna; import jalview.api.AlignViewControllerGuiI; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.GeneLoci; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -36,6 +36,8 @@ import java.util.Map.Entry; */ public class VCFLoader { + private static final String FEATURE_GROUP_VCF = "VCF"; + private static final String EXCL = "!"; /* @@ -103,7 +105,7 @@ public class VCFLoader { seqCount++; varCount += added; - computePeptideVariants(seq); + transferAddedFeatures(seq); } } // long elapsed = System.currentTimeMillis() - start; @@ -133,16 +135,14 @@ public class VCFLoader } /** - * (Re-)computes peptide variants from dna variants, for any protein sequence - * to which the dna sequence has a mapping. Note that although duplicate - * features may get computed, they will not be added, since duplicate sequence - * features are ignored in Sequence.addSequenceFeature. + * Transfers VCF features to sequences to which this sequence has a mapping. + * If the mapping is 1:3, computes peptide variants from nucleotide variants. * - * @param dnaSeq + * @param seq */ - protected void computePeptideVariants(SequenceI dnaSeq) + protected void transferAddedFeatures(SequenceI seq) { - DBRefEntry[] dbrefs = dnaSeq.getDBRefs(); + DBRefEntry[] dbrefs = seq.getDBRefs(); if (dbrefs == null) { return; @@ -150,13 +150,36 @@ public class VCFLoader for (DBRefEntry dbref : dbrefs) { Mapping mapping = dbref.getMap(); - if (mapping == null || mapping.getTo() == null - || mapping.getMap().getFromRatio() != 3) + if (mapping == null || mapping.getTo() == null) { continue; } - AlignmentUtils.computeProteinFeatures(dnaSeq, mapping.getTo(), - mapping.getMap()); + + SequenceI mapTo = mapping.getTo(); + MapList map = mapping.getMap(); + if (map.getFromRatio() == 3) + { + /* + * dna-to-peptide product mapping + */ + AlignmentUtils.computeProteinFeatures(seq, mapTo, map); + } + else + { + /* + * nucleotide-to-nucleotide mapping e.g. transcript to CDS + */ + // TODO no DBRef to CDS is added to transcripts + List features = seq.getFeatures() + .getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT); + for (SequenceFeature sf : features) + { + if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup())) + { + transferFeature(sf, mapTo, map); + } + } + } } } @@ -176,13 +199,13 @@ public class VCFLoader boolean isVcfRefGrch37) { int count = 0; - GeneLoci seqCoords = seq.getGeneLoci(); + GeneLociI seqCoords = seq.getGeneLoci(); if (seqCoords == null) { return 0; } - List seqChromosomalContigs = seqCoords.mapping.getToRanges(); + List seqChromosomalContigs = seqCoords.getMap().getToRanges(); for (int[] range : seqChromosomalContigs) { count += addVcfVariants(seq, reader, range, isVcfRefGrch37); @@ -208,11 +231,11 @@ public class VCFLoader protected int addVcfVariants(SequenceI seq, VCFReader reader, int[] range, boolean isVcfRefGrch37) { - GeneLoci seqCoords = seq.getGeneLoci(); + GeneLociI seqCoords = seq.getGeneLoci(); - String chromosome = seqCoords.chromosome; - String seqRef = seqCoords.assembly; - String species = seqCoords.species; + String chromosome = seqCoords.getChromosomeId(); + String seqRef = seqCoords.getAssemblyId(); + String species = seqCoords.getSpeciesId(); // TODO handle species properly if ("".equals(species)) @@ -250,7 +273,7 @@ public class VCFLoader * (convert a reverse strand range to forwards) */ int count = 0; - MapList mapping = seqCoords.mapping; + MapList mapping = seqCoords.getMap(); int fromLocus = Math.min(range[0], range[1]); int toLocus = Math.max(range[0], range[1]); @@ -363,7 +386,7 @@ public class VCFLoader String type = SequenceOntologyI.SEQUENCE_VARIANT; SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, - featureEnd, score, "VCF"); + featureEnd, score, FEATURE_GROUP_VCF); sf.setValue(Gff3Helper.ALLELES, alleles); @@ -503,6 +526,32 @@ public class VCFLoader } /** + * Transfers the sequence feature to the target sequence, locating its start + * and end range based on the mapping. Features which do not overlap the + * target sequence are ignored. + * + * @param sf + * @param targetSequence + * @param mapping + * mapping from the feature's coordinates to the target sequence + */ + protected void transferFeature(SequenceFeature sf, + SequenceI targetSequence, MapList mapping) + { + int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd()); + + if (mappedRange != null) + { + String group = sf.getFeatureGroup(); + int newBegin = Math.min(mappedRange[0], mappedRange[1]); + int newEnd = Math.max(mappedRange[0], mappedRange[1]); + SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, + group, sf.getScore()); + targetSequence.addSequenceFeature(copy); + } + } + + /** * Formats a ranges map lookup key * * @param chromosome