X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;fp=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=ea5b8e06f723f3ac512a903c780508e2158184f5;hb=3459a8a691cb22508d7067f240b7254e588e77d3;hp=29b300486332c456068cfad098ccfd620f828122;hpb=5b27f1062b2203c4c31702e205f4c78e1992063e;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 29b3004..ea5b8e0 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -96,13 +96,14 @@ public class VCFLoader private static final String DEFAULT_SPECIES = "homo_sapiens"; /** - * A class to model the mapping from sequence to VCF coordinates. Cases include + * A class to model the mapping from sequence to VCF coordinates. Cases + * include * */ class VCFMap @@ -146,15 +147,20 @@ public class VCFLoader private static final String VCF_SPECIES = "VCF_SPECIES"; // default is human - private static final String DEFAULT_REFERENCE = "grch37"; // fallback default is human GRCh37 + private static final String DEFAULT_REFERENCE = "grch37"; // fallback default + // is human GRCh37 /* * keys to fields of VEP CSQ consequence data * see https://www.ensembl.org/info/docs/tools/vep/vep_formats.html */ private static final String CSQ_CONSEQUENCE_KEY = "Consequence"; + private static final String CSQ_ALLELE_KEY = "Allele"; - private static final String CSQ_ALLELE_NUM_KEY = "ALLELE_NUM"; // 0 (ref), 1... + + private static final String CSQ_ALLELE_NUM_KEY = "ALLELE_NUM"; // 0 (ref), + // 1... + private static final String CSQ_FEATURE_KEY = "Feature"; // Ensembl stable id /* @@ -225,8 +231,11 @@ public class VCFLoader * see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html */ private int csqConsequenceFieldIndex = -1; + private int csqAlleleFieldIndex = -1; + private int csqAlleleNumberFieldIndex = -1; + private int csqFeatureFieldIndex = -1; // todo the same fields for SnpEff ANN data if wanted @@ -312,7 +321,8 @@ public class VCFLoader */ public SequenceI loadVCFContig(String contig) { - VCFHeaderLine headerLine = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); + VCFHeaderLine headerLine = header + .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); if (headerLine == null) { Console.error("VCF reference header not found"); @@ -413,9 +423,9 @@ public class VCFLoader /** * Attempts to determine and save the species and genome assembly version to - * which the VCF data applies. This may be done by parsing the {@code reference} - * header line, configured in a property file, or (potentially) confirmed - * interactively by the user. + * which the VCF data applies. This may be done by parsing the + * {@code reference} header line, configured in a property file, or + * (potentially) confirmed interactively by the user. *

* The saved values should be identifiers valid for Ensembl's REST service * {@code map} endpoint, so they can be used (if necessary) to retrieve the @@ -665,7 +675,7 @@ public class VCFLoader { try { - patterns.add(Pattern.compile(token.toUpperCase(Locale.ROOT))); + patterns.add(Pattern.compile(token.toUpperCase(Locale.ROOT))); } catch (PatternSyntaxException e) { System.err.println("Invalid pattern ignored: " + token); @@ -723,8 +733,8 @@ public class VCFLoader } /** - * Tries to add overlapping variants read from a VCF file to the given sequence, - * and returns the number of variant features added + * Tries to add overlapping variants read from a VCF file to the given + * sequence, and returns the number of variant features added * * @param seq * @return @@ -821,10 +831,9 @@ public class VCFLoader vcfAssembly); if (newRange == null) { - Console.error( - String.format("Failed to map %s:%s:%s:%d:%d to %s", species, - chromosome, seqRef, range[0], range[1], - vcfAssembly)); + Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s", + species, chromosome, seqRef, range[0], range[1], + vcfAssembly)); continue; } else @@ -920,7 +929,7 @@ public class VCFLoader * RuntimeException throwable by htsjdk */ String msg = String.format("Error reading VCF for %s:%d-%d: %s ", - map.chromosome, vcfStart, vcfEnd,e.getLocalizedMessage()); + map.chromosome, vcfStart, vcfEnd, e.getLocalizedMessage()); Console.error(msg); } } @@ -985,8 +994,8 @@ public class VCFLoader /** * Inspects one allele and attempts to add a variant feature for it to the * sequence. The additional data associated with this allele is extracted to - * store in the feature's key-value map. Answers the number of features added (0 - * or 1). + * store in the feature's key-value map. Answers the number of features added + * (0 or 1). * * @param seq * @param variant @@ -1034,10 +1043,10 @@ public class VCFLoader * pick out the consequence data (if any) that is for the current allele * and feature (transcript) that matches the current sequence */ - String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD, - altAlleleIndex, csqAlleleFieldIndex, - csqAlleleNumberFieldIndex, seq.getName().toLowerCase(Locale.ROOT), - csqFeatureFieldIndex); + String consequence = getConsequenceForAlleleAndFeature(variant, + CSQ_FIELD, altAlleleIndex, csqAlleleFieldIndex, + csqAlleleNumberFieldIndex, + seq.getName().toLowerCase(Locale.ROOT), csqFeatureFieldIndex); /* * pick out the ontology term for the consequence type @@ -1077,8 +1086,8 @@ public class VCFLoader /** * Answers the VCF FILTER value for the variant - or an approximation to it. * This field is either PASS, or a semi-colon separated list of filters not - * passed. htsjdk saves filters as a HashSet, so the order when reassembled into - * a list may be different. + * passed. htsjdk saves filters as a HashSet, so the order when reassembled + * into a list may be different. * * @param variant * @return @@ -1122,9 +1131,9 @@ public class VCFLoader } /** - * Determines the Sequence Ontology term to use for the variant feature type in - * Jalview. The default is 'sequence_variant', but a more specific term is used - * if: + * Determines the Sequence Ontology term to use for the variant feature type + * in Jalview. The default is 'sequence_variant', but a more specific term is + * used if: *