import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.MessageManager;
+import jalview.util.StringUtils;
import java.io.File;
import java.io.IOException;
private Set<String> badData;
/**
- * Constructor given a VCF file
+ * Constructor given a path to a VCF file
*
- * @param alignment
+ * @param vcfFile
*/
public VCFLoader(String vcfFile)
{
}
/**
- * A convenience method to get an attribute value for an alternate allele
+ * A convenience method to get an attribute value for an alternate allele.
+ * {@code alleleIndex} is the position in the list of values for the allele.
+ * If {@alleleIndex == -1} then all values are concatenated (comma-separated).
+ * This is the case for fields declared with "Number=." i.e. values are not
+ * related to specific alleles.
*
* @param variant
* @param attributeName
{
Object att = variant.getAttribute(attributeName);
+ String result = null;
if (att instanceof String)
{
- return (String) att;
+ result = (String) att;
}
- else if (att instanceof ArrayList)
+ else if (att instanceof List<?>)
{
- return ((List<String>) att).get(alleleIndex);
+ List<String> theList = (List<String>) att;
+ if (alleleIndex == -1)
+ {
+ result = StringUtils.listToDelimitedString(theList, ",");
+ }
+ else
+ {
+ result = theList.get(alleleIndex);
+ }
}
- return null;
+ return result;
}
/**
*/
index++;
}
+ else if (number == VCFHeaderLineCount.UNBOUNDED) // .
+ {
+ index = -1;
+ }
else if (number != VCFHeaderLineCount.A)
{
/*
+ ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
private static final String[] VCF = { "##fileformat=VCFv4.2",
- // fields other than AF are ignored when parsing as they have no INFO definition
+ // note fields with no INFO definition are ignored when parsing
"##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
"##INFO=<ID=AC_Female,Number=A,Type=Integer,Description=\"Allele count in Female genotypes\"",
"##INFO=<ID=AF_AFR,Number=A,Type=Float,Description=\"Allele Frequency among African/African American genotypes\"",
+ "##INFO=<ID=CLNSIG,Number=.,Type=String,Description=\"Clinical significance for this single variant\"",
"##reference=Homo_sapiens/GRCh38",
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
// A/T,C variants in position 2 of gene sequence (precedes transcript)
// should create 2 variant features with respective AF values
// malformed values for AC_Female and AF_AFR should be ignored
- "17\t45051611\trs384765\tA\tT,C\t1666.64\tRF;XYZ\tAC=15;AF=5.0e-03,4.0e-03;AC_Female=12,3d;AF_AFR=low,2.3e-4",
+ "17\t45051611\trs384765\tA\tT,C\t1666.64\tRF;XYZ\tAC=15;AF=5.0e-03,4.0e-03;AC_Female=12,3d;AF_AFR=low,2.3e-4;CLNSIG=benign,probably_benign",
// SNP G/C in position 4 of gene sequence, position 2 of transcript
// insertion G/GA is transferred to nucleotide but not to peptide
"17\t45051613\t.\tG\tGA,C\t1666.65\t.\tAC=15;AF=3.0e-03,2.0e-03",
assertEquals(sf.getValue("ID"), "rs384765");
assertEquals(sf.getValue("QUAL"), "1666.64");
assertEquals(sf.getValue("FILTER"), "RF;XYZ");
+ /*
+ * if INFO declares Number=1, all values are attached to each allele
+ */
+ assertEquals(sf.getValue("CLNSIG"), "benign,probably_benign");
// malformed integer for AC_Female is ignored (JAL-3375)
assertNull(sf.getValue("AC_Female"));
// malformed float for AF_AFR is ignored (JAL-3375)
assertNull(sf.getValue("AC_AFR"));
assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
+ assertEquals(sf.getValue("CLNSIG"), "benign,probably_benign");
sf = geneFeatures.get(2);
assertEquals(sf.getFeatureGroup(), "VCF");