X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=053b52f9c118d725bb1a8135f88fe30ead3ee00f;hb=b53d89acfa678df63d6870176b4c7ec9285f52ee;hp=20e3ccd48192504ad0c39bfcdda69be153a16749;hpb=0a680b4ff1aaad7580d3b10941233307e2190be4;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 20e3ccd..053b52f 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -4,7 +4,6 @@ import jalview.analysis.AlignmentUtils; import jalview.analysis.Dna; import jalview.api.AlignViewControllerGuiI; import jalview.bin.Cache; -import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; @@ -14,6 +13,7 @@ import jalview.datamodel.features.FeatureAttributeType; import jalview.datamodel.features.FeatureSource; import jalview.datamodel.features.FeatureSources; import jalview.ext.ensembl.EnsemblMap; +import jalview.ext.htsjdk.HtsContigDb; import jalview.ext.htsjdk.VCFReader; import jalview.io.gff.Gff3Helper; import jalview.io.gff.SequenceOntologyI; @@ -21,6 +21,7 @@ import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.MessageManager; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -50,6 +51,10 @@ import htsjdk.variant.vcf.VCFInfoHeaderLine; */ public class VCFLoader { + private static final String NO_VALUE = "."; + + private static final String DEFAULT_SPECIES = "homo_sapiens"; + /** * A class to model the mapping from sequence to VCF coordinates. Cases include * * - * @param seq - * @param variant - * @param altAlleleIndex + * @param consequence * @return * @see http://www.sequenceontology.org/browser/current_svn/term/SO:0001060 */ - String getOntologyTerm(SequenceI seq, VariantContext variant, - int altAlleleIndex) + String getOntologyTerm(String consequence) { String type = SequenceOntologyI.SEQUENCE_VARIANT; + /* + * could we associate Consequence data with this allele and feature (transcript)? + * if so, prefer the consequence term from that data + */ if (csqAlleleFieldIndex == -1) // && snpEffAlleleFieldIndex == -1 { /* @@ -947,14 +1026,6 @@ public class VCFLoader return type; } - /* - * can we associate Consequence data with this allele and feature (transcript)? - * if so, prefer the consequence term from that data - */ - String consequence = getConsequenceForAlleleAndFeature(variant, - CSQ_FIELD, - altAlleleIndex, csqAlleleFieldIndex, csqAlleleNumberFieldIndex, - seq.getName().toLowerCase(), csqFeatureFieldIndex); if (consequence != null) { String[] csqFields = consequence.split(PIPE_REGEX); @@ -1085,13 +1156,15 @@ public class VCFLoader * Add any allele-specific VCF key-value data to the sequence feature * * @param variant - * @param seq * @param sf * @param altAlelleIndex * (0, 1..) + * @param consequence + * if not null, the consequence specific to this sequence (transcript + * feature) and allele */ - protected void addAlleleProperties(VariantContext variant, SequenceI seq, - SequenceFeature sf, final int altAlelleIndex) + protected void addAlleleProperties(VariantContext variant, + SequenceFeature sf, final int altAlelleIndex, String consequence) { Map atts = variant.getAttributes(); @@ -1105,7 +1178,15 @@ public class VCFLoader */ if (CSQ_FIELD.equals(key)) { - addConsequences(variant, seq, sf, altAlelleIndex); + addConsequences(variant, sf, consequence); + continue; + } + + /* + * filter out fields we don't want to capture + */ + if (!vcfFieldsOfInterest.contains(key)) + { continue; } @@ -1163,35 +1244,19 @@ public class VCFLoader /** * Inspects CSQ data blocks (consequences) and adds attributes on the sequence - * feature for the current allele (and transcript if applicable) + * feature. *

- * Allele matching: if field ALLELE_NUM is present, it must match - * altAlleleIndex. If not present, then field Allele value must match the VCF - * Allele. - *

- * Transcript matching: if sequence name can be identified to at least one of - * the consequences' Feature values, then select only consequences that match - * the value (i.e. consequences for the current transcript sequence). If not, - * take all consequences (this is the case when adding features to the gene - * sequence). + * If myConsequence is not null, then this is the specific + * consequence data (pipe-delimited fields) that is for the current allele and + * transcript (sequence) being processed) * * @param variant - * @param seq * @param sf - * @param altAlleleIndex - * (0, 1..) + * @param myConsequence */ - protected void addConsequences(VariantContext variant, SequenceI seq, - SequenceFeature sf, int altAlleleIndex) + protected void addConsequences(VariantContext variant, SequenceFeature sf, + String myConsequence) { - /* - * first try to identify the matching consequence - */ - String myConsequence = getConsequenceForAlleleAndFeature(variant, - CSQ_FIELD, altAlleleIndex, csqAlleleFieldIndex, - csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), - csqFeatureFieldIndex); - Object value = variant.getAttribute(CSQ_FIELD); if (value == null || !(value instanceof List))