X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=5adc55c0764ef1eacac1d74391bb4ad7380f1a74;hb=14193747f3831242bc7dfac12394eb20eb0ba480;hp=e381b264ffde9cecb38055a211d572160e601fdd;hpb=a57976ba40e1abe6d7c1940386e1a25419ef9c9d;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index e381b26..5adc55c 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -6,6 +6,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLineCount; +import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; import jalview.analysis.AlignmentUtils; @@ -17,6 +18,9 @@ import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureSource; +import jalview.datamodel.features.FeatureSources; import jalview.ext.ensembl.EnsemblMap; import jalview.ext.htsjdk.VCFReader; import jalview.io.gff.Gff3Helper; @@ -116,6 +120,12 @@ public class VCFLoader private int csqAlleleNumberFieldIndex = -1; private int csqFeatureFieldIndex = -1; + /* + * a unique identifier under which to save metadata about feature + * attributes (selected INFO field data) + */ + private String sourceId; + /** * Constructor given an alignment context * @@ -175,18 +185,19 @@ public class VCFLoader reader = new VCFReader(filePath); header = reader.getFileHeader(); - VCFHeaderLine ref = header - .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); + + sourceId = filePath; + + saveMetadata(sourceId); /* * get offset of CSQ ALLELE_NUM and Feature if declared */ locateCsqFields(); - // check if reference is wrt assembly19 (GRCh37) - // todo may need to allow user to specify reference assembly? - boolean isRefGrch37 = (ref != null && ref.getValue().contains( - "assembly19")); + VCFHeaderLine ref = header + .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); + String vcfAssembly = ref.getValue(); int varCount = 0; int seqCount = 0; @@ -196,7 +207,7 @@ public class VCFLoader */ for (SequenceI seq : al.getSequences()) { - int added = loadSequenceVCF(seq, reader, isRefGrch37); + int added = loadSequenceVCF(seq, reader, vcfAssembly); if (added > 0) { seqCount++; @@ -239,6 +250,47 @@ public class VCFLoader } /** + * Reads metadata (such as INFO field descriptions and datatypes) and saves + * them for future reference + * + * @param sourceId + */ + void saveMetadata(String sourceId) + { + FeatureSource metadata = new FeatureSource(sourceId); + + for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) + { + String attributeId = info.getID(); + String desc = info.getDescription(); + VCFHeaderLineType type = info.getType(); + FeatureAttributeType attType = null; + switch (type) + { + case Character: + attType = FeatureAttributeType.Character; + break; + case Flag: + attType = FeatureAttributeType.Flag; + break; + case Float: + attType = FeatureAttributeType.Float; + break; + case Integer: + attType = FeatureAttributeType.Integer; + break; + case String: + attType = FeatureAttributeType.String; + break; + } + metadata.setAttributeName(attributeId, desc); + metadata.setAttributeType(attributeId, attType); + } + + FeatureSources.getInstance().addSource(sourceId, metadata); + } + + /** * Records the position of selected fields defined in the CSQ INFO header (if * there is one). CSQ fields are declared in the CSQ INFO Description e.g. *
@@ -336,16 +388,17 @@ public class VCFLoader
/**
* Tries to add overlapping variants read from a VCF file to the given
* sequence, and returns the number of variant features added. Note that this
- * requires the sequence to hold information as to its chromosomal positions
- * and reference, in order to be able to map the VCF variants to the sequence.
+ * requires the sequence to hold information as to its species, chromosomal
+ * positions and reference assembly, in order to be able to map the VCF
+ * variants to the sequence (or not)
*
* @param seq
* @param reader
- * @param isVcfRefGrch37
+ * @param vcfAssembly
* @return
*/
protected int loadSequenceVCF(SequenceI seq, VCFReader reader,
- boolean isVcfRefGrch37)
+ String vcfAssembly)
{
int count = 0;
GeneLociI seqCoords = seq.getGeneLoci();
@@ -357,16 +410,55 @@ public class VCFLoader
return 0;
}
+ if (!vcfSpeciesMatchesSequence(vcfAssembly, seqCoords.getSpeciesId()))
+ {
+ return 0;
+ }
+
List