import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
-import java.util.Arrays;
import java.util.List;
import org.testng.annotations.Test;
public class VCFLoaderTest
{
// columns 9717- of gene P30419 from Ensembl (modified)
- private static final String FASTA = ">ENSG00000136448/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
+ private static final String FASTA =
+ // forward strand 'gene'
+ ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
+ "CAAGCTGGCGGACGAGAGTGTGACA\n"
// and a 'made up' mini-transcript with two exons
- + ">ENST00000592782/1-18\n--AGCTGGCG----AGAGTGTGAC-\n";
+ + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
+ +
+ // 'reverse strand' gene (reverse complement)
+ ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
+ + "TGTCACACTCTCGTCCGCCAGCTTG\n"
+ // and its 'transcript'
+ + ">transcript2/1-18\n"
+ + "-GTCACACTCT----CGCCAGCT--\n";
private static final String[] VCF = { "##fileformat=VCFv4.2",
"##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
assertEquals(sf.getEnd(), 2);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 5.08130e-03, 0.000001f);
- assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
sf = geneFeatures.get(1);
assertEquals(sf.getFeatureGroup(), "VCF");
assertEquals(sf.getEnd(), 4);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
- assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
/*
* verify variant feature(s) added to transcript
assertEquals(sf.getEnd(), 2);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
- assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
/*
* verify variant feature(s) computed and added to protein
DataSourceType.PASTE);
/*
- * map gene sequence to chromosome (normally done when the sequence is fetched
+ * map gene1 sequence to chromosome (normally done when the sequence is fetched
* from Ensembl and transcripts computed)
*/
AlignmentI alignment = af.getViewport().getAlignment();
- int[][] to = new int[][] { new int[] { 45051610, 45051634 } };
- List<int[]> toRanges = Arrays.asList(to);
- SequenceI gene = alignment.getSequenceAt(0);
- List<int[]> fromRanges = Arrays.asList(new int[][] { new int[] {
- gene.getStart(), gene.getEnd() } });
- ((Sequence) gene).setGeneLoci(new GeneLoci("human", "GRCh38", "17",
- new MapList(fromRanges, toRanges, 1, 1)));
+ SequenceI gene1 = alignment.getSequenceAt(0);
+ int[] to = new int[] { 45051610, 45051634 };
+ int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
+ gene1.setGeneLoci(new GeneLoci("human", "GRCh38", "17", new MapList(
+ from, to, 1, 1)));
/*
- * map 'transcript' to chromosome via 'gene'
- * transcript/1-18 is gene/3-10,15-24
+ * map 'transcript1' to chromosome via 'gene1'
+ * transcript1/1-18 is gene1/3-10,15-24
* which is chromosome 45051612-45051619,45051624-45051633
*/
- to = new int[][] { new int[] { 45051612, 45051619 },
- new int[] { 45051624, 45051633 } };
- toRanges = Arrays.asList(to);
- SequenceI transcript = alignment.getSequenceAt(1);
- fromRanges = Arrays.asList(new int[][] { new int[] {
- transcript.getStart(), transcript.getEnd() } });
- ((Sequence) transcript).setGeneLoci(new GeneLoci("human", "GRCh38",
- "17", new MapList(fromRanges, toRanges, 1, 1)));
+ to = new int[] { 45051612, 45051619, 45051624, 45051633 };
+ SequenceI transcript1 = alignment.getSequenceAt(1);
+ from = new int[] { transcript1.getStart(), transcript1.getEnd() };
+ transcript1.setGeneLoci(new GeneLoci("human", "GRCh38", "17",
+ new MapList(from, to, 1, 1)));
/*
- * add a protein product as a DBRef on the transcript
+ * map gene2 to chromosome reverse strand
*/
- SequenceI peptide = new Sequence("ENSP001", "SWRECD");
+ SequenceI gene2 = alignment.getSequenceAt(2);
+ to = new int[] { 45051634, 45051610 };
+ from = new int[] { gene2.getStart(), gene2.getEnd() };
+ gene2.setGeneLoci(new GeneLoci("human", "GRCh38", "17", new MapList(
+ from, to, 1, 1)));
+
+ /*
+ * map 'transcript2' to chromosome via 'gene2'
+ * transcript2/1-18 is gene2/2-11,16-23
+ * which is chromosome 45051633-45051624,45051619-45051612
+ */
+ to = new int[] { 45051633, 45051624, 45051619, 45051612 };
+ SequenceI transcript2 = alignment.getSequenceAt(3);
+ from = new int[] { transcript2.getStart(), transcript2.getEnd() };
+ transcript2.setGeneLoci(new GeneLoci("human", "GRCh38", "17",
+ new MapList(from, to, 1, 1)));
+
+ /*
+ * add a protein product as a DBRef on transcript1
+ */
+ SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
3, 1);
- Mapping map = new Mapping(peptide, mapList);
+ Mapping map = new Mapping(peptide1, mapList);
DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
- transcript.addDBRef(product);
+ transcript1.addDBRef(product);
+
+ /*
+ * add a protein product as a DBRef on transcript2
+ */
+ SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
+ mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
+ map = new Mapping(peptide2, mapList);
+ product = new DBRefEntry("", "", "ENSP002", map);
+ transcript2.addDBRef(product);
return alignment;
}
{
AlignmentI al = buildAlignment();
- /*
- * invert forward to reverse strand mappings
- */
- List<int[]> to = al.getSequenceAt(0).getGeneLoci().mapping
- .getToRanges();
- int temp = to.get(0)[0];
- to.get(0)[0] = to.get(0)[1];
- to.get(0)[1] = temp;
- to = al.getSequenceAt(1).getGeneLoci().mapping.getToRanges();
- to.get(0)[0] = to.get(0)[1];
- to.get(0)[1] = temp;
- to.get(1)[0] = to.get(1)[1];
- to.get(1)[1] = temp;
- int[] tmp2 = to.get(0);
- to.set(0, to.get(1));
- to.set(1, tmp2);
-
VCFLoader loader = new VCFLoader(al);
File f = makeVcf();
loader.loadVCF(f.getPath(), null);
/*
- * verify variant feature(s) added to gene
+ * verify variant feature(s) added to gene2
+ * gene/1-25 maps to chromosome 45051634- reverse strand
+ * variants A/T at 45051611 and G/C at 45051613 map to
+ * T/A and C/G at gene positions 24 and 22 respectively
*/
- List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
+ List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
.getSequenceFeatures();
assertEquals(geneFeatures.size(), 2);
SequenceFeature sf = geneFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 2);
- assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getBegin(), 22);
+ assertEquals(sf.getEnd(), 22);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 5.08130e-03, 0.000001f);
- assertEquals("A,T", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
+ assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES));
sf = geneFeatures.get(1);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 4);
- assertEquals(sf.getEnd(), 4);
+ assertEquals(sf.getBegin(), 24);
+ assertEquals(sf.getEnd(), 24);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
- assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals(sf.getScore(), 5.08130e-03, 0.000001f);
+ assertEquals("T,A", sf.getValue(Gff3Helper.ALLELES));
/*
- * verify variant feature(s) added to transcript
+ * verify variant feature(s) added to transcript2
+ * variant C/G at position 22 of gene overlaps and maps to
+ * position 17 of transcript
*/
- List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
+ List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
.getSequenceFeatures();
assertEquals(transcriptFeatures.size(), 1);
sf = transcriptFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 2);
- assertEquals(sf.getEnd(), 2);
+ assertEquals(sf.getBegin(), 17);
+ assertEquals(sf.getEnd(), 17);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
assertEquals(sf.getScore(), 3.08130e-03, 0.000001f);
- assertEquals("G,C", sf.getValue(Gff3Helper.ALLELES));
+ assertEquals("C,G", sf.getValue(Gff3Helper.ALLELES));
/*
* verify variant feature(s) computed and added to protein
- * first codon AGC varies to ACC giving S/T
+ * last codon GCT varies to GGT giving A/G in the last peptide position
*/
- SequenceI peptide = al.getSequenceAt(1).getDBRefs()[0].getMap().getTo();
+ SequenceI peptide = al.getSequenceAt(3).getDBRefs()[0].getMap().getTo();
List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
assertEquals(proteinFeatures.size(), 1);
sf = proteinFeatures.get(0);
assertEquals(sf.getFeatureGroup(), "VCF");
- assertEquals(sf.getBegin(), 1);
- assertEquals(sf.getEnd(), 1);
+ assertEquals(sf.getBegin(), 6);
+ assertEquals(sf.getEnd(), 6);
assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
- assertEquals(sf.getDescription(), "p.Ser1Thr");
+ assertEquals(sf.getDescription(), "p.Ala6Gly");
}
}