return 0;
}
- return addVcfVariants(seq, reader, vcfMap, vcfAssembly);
+ /*
+ * work with the dataset sequence here
+ */
+ SequenceI dss = seq.getDatasetSequence();
+ if (dss == null)
+ {
+ dss = seq;
+ }
+ return addVcfVariants(dss, reader, vcfMap, vcfAssembly);
}
/**
String allele = alt.getBaseString();
/*
+ * insertion after a genomic base, if on reverse strand, has to be
+ * converted to insertion of complement after the preceding position
+ */
+ int referenceLength = reference.length();
+ if (!forwardStrand && allele.length() > referenceLength
+ && allele.startsWith(reference))
+ {
+ featureStart -= referenceLength;
+ featureEnd = featureStart;
+ char insertAfter = seq.getCharAt(featureStart - seq.getStart());
+ reference = Dna.reverseComplement(String.valueOf(insertAfter));
+ allele = allele.substring(referenceLength) + reference;
+ }
+
+ /*
* build the ref,alt allele description e.g. "G,A", using the base
* complement if the sequence is on the reverse strand
*/
- // FIXME correctly handle insertions on reverse strand JAL-2845
StringBuilder sb = new StringBuilder();
sb.append(forwardStrand ? reference : Dna.reverseComplement(reference));
sb.append(COMMA);
/*
* verify variant feature(s) added to gene2
- * gene/1-25 maps to chromosome 45051634- reverse strand
- * variants A/T, A/C at 45051611 and G/GA,G/C at 45051613 map to
- * T/A, T/G and C/TC,C/G at gene positions 24 and 22 respectively
+ * gene2/1-25 maps to chromosome 45051634- reverse strand
*/
List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
.getSequenceFeatures();
SequenceFeatures.sortFeatures(geneFeatures, true);
assertEquals(geneFeatures.size(), 4);
- SequenceFeature sf = geneFeatures.get(0);
- assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 22);
- assertEquals(sf.getEnd(), 22);
- assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 2.0e-03, DELTA);
- assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES));
- sf = geneFeatures.get(1);
+ /*
+ * variant A/T at 45051611 maps to T/A at gene position 24
+ */
+ SequenceFeature sf = geneFeatures.get(3);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 22);
- assertEquals(sf.getEnd(), 22);
+ assertEquals(sf.getBegin(), 24);
+ assertEquals(sf.getEnd(), 24);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 3.0e-03, DELTA);
- assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getScore(), 5.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A");
+ /*
+ * variant A/C at 45051611 maps to T/G at gene position 24
+ */
sf = geneFeatures.get(2);
assertEquals(sf.getFeatureGroup(), "VCF");
assertEquals(sf.getBegin(), 24);
assertEquals(sf.getEnd(), 24);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 4.0e-03, DELTA);
- assertEquals("T,G", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G");
- sf = geneFeatures.get(3);
+ /*
+ * variant G/C at 45051613 maps to C/G at gene position 22
+ */
+ sf = geneFeatures.get(1);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 24);
- assertEquals(sf.getEnd(), 24);
+ assertEquals(sf.getBegin(), 22);
+ assertEquals(sf.getEnd(), 22);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 5.0e-03, DELTA);
- assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getScore(), 2.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
+
+ /*
+ * insertion G/GA at 45051613 maps to an insertion at
+ * the preceding position (21) on reverse strand gene
+ * reference: CAAGC -> GCTTG/21-25
+ * genomic variant: CAAGAC (G/GA)
+ * gene variant: GTCTTG (G/GT at 21)
+ */
+ sf = geneFeatures.get(0);
+ assertEquals(sf.getFeatureGroup(), "VCF");
+ assertEquals(sf.getBegin(), 21);
+ assertEquals(sf.getEnd(), 21);
+ assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
+ assertEquals(sf.getScore(), 3.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
/*
- * verify variant feature(s) added to transcript2
- * variants G/GA,G/C at position 22 of gene overlap and map to
- * C/TC,C/G at position 17 of transcript
+ * verify 2 variant features added to transcript2
*/
List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
.getSequenceFeatures();
assertEquals(transcriptFeatures.size(), 2);
+
+ /*
+ * insertion G/GT at position 21 of gene maps to position 16 of transcript
+ */
sf = transcriptFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 17);
- assertEquals(sf.getEnd(), 17);
+ assertEquals(sf.getBegin(), 16);
+ assertEquals(sf.getEnd(), 16);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 2.0e-03, DELTA);
- assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getScore(), 3.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
+ /*
+ * SNP C/G at position 22 of gene maps to position 17 of transcript
+ */
sf = transcriptFeatures.get(1);
assertEquals(sf.getFeatureGroup(), "VCF");
assertEquals(sf.getBegin(), 17);
assertEquals(sf.getEnd(), 17);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 3.0e-03, DELTA);
- assertEquals("C,TC", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getScore(), 2.0e-03, DELTA);
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
/*
* verify variant feature(s) computed and added to protein