import htsjdk.variant.vcf.VCFHeaderLine;
import jalview.analysis.AlignmentUtils;
+import jalview.analysis.Dna;
import jalview.api.AlignViewControllerGuiI;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.GeneLoci;
import jalview.datamodel.Mapping;
-import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.ext.ensembl.EnsemblMap;
boolean isVcfRefGrch37)
{
int count = 0;
- GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
+ GeneLoci seqCoords = seq.getGeneLoci();
if (seqCoords == null)
{
return 0;
protected int addVcfVariants(SequenceI seq, VCFReader reader,
int[] range, boolean isVcfRefGrch37)
{
- GeneLoci seqCoords = ((Sequence) seq).getGeneLoci();
+ GeneLoci seqCoords = seq.getGeneLoci();
String chromosome = seqCoords.chromosome;
String seqRef = seqCoords.assembly;
range = newRange;
}
+ boolean forwardStrand = range[0] <= range[1];
+
/*
* query the VCF for overlaps
* (convert a reverse strand range to forwards)
int[] seqLocation = mapping.locateInFrom(start, end);
if (seqLocation != null)
{
- addVariantFeatures(seq, variant, seqLocation[0], seqLocation[1]);
+ addVariantFeatures(seq, variant, seqLocation[0], seqLocation[1],
+ forwardStrand);
}
}
/**
* Inspects the VCF variant record, and adds variant features to the sequence.
* Only SNP variants are added, not INDELs.
+ * <p>
+ * If the sequence maps to the reverse strand of the chromosome, reference and
+ * variant bases are recorded as their complements (C/G, A/T).
*
* @param seq
* @param variant
* @param featureStart
* @param featureEnd
+ * @param forwardStrand
*/
protected void addVariantFeatures(SequenceI seq, VariantContext variant,
- int featureStart, int featureEnd)
+ int featureStart, int featureEnd, boolean forwardStrand)
{
- String reference = variant.getReference().getBaseString();
- if (reference.length() != 1)
+ byte[] reference = variant.getReference().getBases();
+ if (reference.length != 1)
{
/*
* sorry, we don't handle INDEL variants
/*
* for now we extract allele frequency as feature score; note
* this attribute is String for a simple SNP, but List<String> if
- * multiple alleles at the locus; we extract for the simple case only,
- * since not sure how to match allele order with AF values
+ * multiple alleles at the locus; we extract for the simple case only
*/
Object af = variant.getAttribute("AF");
float score = 0f;
}
StringBuilder sb = new StringBuilder();
- sb.append(reference);
+ sb.append(forwardStrand ? reference : Dna
+ .getComplement((char) reference[0]));
/*
* inspect alleles and record SNP variants (as the variant
* record could be MIXED and include INDEL and SNP alleles)
- */
- int alleleCount = 0;
-
- /*
- * inspect alleles; warning: getAlleles gives no guarantee
- * as to the order in which they are returned
+ * warning: getAlleles gives no guarantee as to the order
+ * in which they are returned
*/
for (Allele allele : variant.getAlleles())
{
if (!allele.isReference())
{
- String alleleBase = allele.getBaseString();
- if (alleleBase.length() == 1)
+ byte[] alleleBase = allele.getBases();
+ if (alleleBase.length == 1)
{
- sb.append(",").append(alleleBase);
- alleleCount++;
+ sb.append(",").append(
+ forwardStrand ? alleleBase : Dna
+ .getComplement((char) alleleBase[0]));
}
}
}
/*
* verify variant feature(s) added to gene
*/
- List<SequenceFeature> geneFeatures = al.getSequenceAt(0).findFeatures(
- 2, 2);
- assertEquals(geneFeatures.size(), 1);
+ List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
+ .getSequenceFeatures();
+ assertEquals(geneFeatures.size(), 2);
SequenceFeature sf = geneFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 5.08130e-03, 0.000001f);
assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES));
+ sf = geneFeatures.get(1);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 4);
+ assertEquals(sf.getEnd(), 4);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
+ assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+
/*
* verify variant feature(s) added to transcript
*/
List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
- .findFeatures(4, 4);
+ .getSequenceFeatures();
assertEquals(transcriptFeatures.size(), 1);
sf = transcriptFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
*/
SequenceI peptide = al.getSequenceAt(1)
.getDBRefs()[0].getMap().getTo();
- List<SequenceFeature> proteinFeatures = peptide.findFeatures(1, 6);
+ List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
assertEquals(proteinFeatures.size(), 1);
sf = proteinFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 2);
- assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getBegin(), 1);
+ assertEquals(sf.getEnd(), 1);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getDescription(), "p.Ser1Thr");
}
return alignment;
}
+ /**
+ * Test with 'gene' and 'transcript' mapped to the reverse strand of the
+ * chromosome. The VCF variant positions (in forward coordinates) should get
+ * correctly located on sequence positions.
+ *
+ * @throws IOException
+ */
@Test(groups = "Functional")
public void testLoadVCF_reverseStrand() throws IOException
{
- // TODO a test with reverse strand mapping of
- // gene and transcript to chromosome
+ AlignmentI al = buildAlignment();
+
+ /*
+ * invert forward to reverse strand mappings
+ */
+ List<int[]> to = al.getSequenceAt(0).getGeneLoci().mapping
+ .getToRanges();
+ int temp = to.get(0)[0];
+ to.get(0)[0] = to.get(0)[1];
+ to.get(0)[1] = temp;
+ to = al.getSequenceAt(1).getGeneLoci().mapping.getToRanges();
+ to.get(0)[0] = to.get(0)[1];
+ to.get(0)[1] = temp;
+ to.get(1)[0] = to.get(1)[1];
+ to.get(1)[1] = temp;
+ int[] tmp2 = to.get(0);
+ to.set(0, to.get(1));
+ to.set(1, tmp2);
+
+ VCFLoader loader = new VCFLoader(al);
+
+ File f = makeVcf();
+
+ loader.loadVCF(f.getPath(), null);
+
+ /*
+ * verify variant feature(s) added to gene
+ */
+ List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
+ .getSequenceFeatures();
+ assertEquals(geneFeatures.size(), 2);
+ SequenceFeature sf = geneFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 5.08130e-03, 0.000001f);
+ assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES));
+
+ sf = geneFeatures.get(1);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 4);
+ assertEquals(sf.getEnd(), 4);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
+ assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+
+ /*
+ * verify variant feature(s) added to transcript
+ */
+ List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
+ .getSequenceFeatures();
+ assertEquals(transcriptFeatures.size(), 1);
+ sf = transcriptFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 2);
+ assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
+ assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+
+ /*
+ * verify variant feature(s) computed and added to protein
+ * first codon AGC varies to ACC giving S/T
+ */
+ SequenceI peptide = al.getSequenceAt(1).getDBRefs()[0].getMap().getTo();
+ List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
+ assertEquals(proteinFeatures.size(), 1);
+ sf = proteinFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 1);
+ assertEquals(sf.getEnd(), 1);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getDescription(), "p.Ser1Thr");
}
}