2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.io.vcf;
23 import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
24 import static org.testng.Assert.assertEquals;
25 import static org.testng.Assert.assertNull;
26 import static org.testng.Assert.assertTrue;
28 import jalview.bin.Cache;
29 import jalview.bin.Console;
30 import jalview.datamodel.AlignmentI;
31 import jalview.datamodel.DBRefEntry;
32 import jalview.datamodel.Mapping;
33 import jalview.datamodel.Sequence;
34 import jalview.datamodel.SequenceFeature;
35 import jalview.datamodel.SequenceI;
36 import jalview.datamodel.features.FeatureAttributes;
37 import jalview.datamodel.features.SequenceFeatures;
38 import jalview.gui.AlignFrame;
39 import jalview.io.DataSourceType;
40 import jalview.io.FileLoader;
41 import jalview.io.gff.Gff3Helper;
42 import jalview.util.MapList;
45 import java.io.IOException;
46 import java.io.PrintWriter;
47 import java.util.List;
50 import org.testng.annotations.BeforeClass;
51 import org.testng.annotations.BeforeTest;
52 import org.testng.annotations.Test;
54 public class VCFLoaderTest
56 private static final float DELTA = 0.00001f;
58 // columns 9717- of gene P30419 from Ensembl (much modified)
59 private static final String FASTA = "" +
61 * forward strand 'gene' and 'transcript' with two exons
63 ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
64 + "CAAGCTGGCGGACGAGAGTGTGACA\n"
65 + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
68 * reverse strand gene and transcript (reverse complement alleles!)
70 + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
71 + "TGTCACACTCTCGTCCGCCAGCTTG\n" + ">transcript2/1-18\n"
72 + "-GTCACACTCT----CGCCAGCT--\n"
75 * 'gene' on chromosome 5 with two transcripts
77 + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n"
78 + "CAAGCTGGCGGACGAGAGTGTGACA\n"
79 + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
80 + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
82 private static final String[] VCF = { "##fileformat=VCFv4.2",
83 // fields other than AF are ignored when parsing as they have no INFO
85 "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
86 "##INFO=<ID=AC_Female,Number=A,Type=Integer,Description=\"Allele count in Female genotypes\"",
87 "##INFO=<ID=AF_AFR,Number=A,Type=Float,Description=\"Allele Frequency among African/African American genotypes\"",
88 "##reference=Homo_sapiens/GRCh38",
89 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
90 // A/T,C variants in position 2 of gene sequence (precedes transcript)
91 // should create 2 variant features with respective AF values
92 // malformed values for AC_Female and AF_AFR should be ignored
93 "17\t45051611\trs384765\tA\tT,C\t1666.64\tRF;XYZ\tAC=15;AF=5.0e-03,4.0e-03;AC_Female=12,3d;AF_AFR=low,2.3e-4",
94 // SNP G/C in position 4 of gene sequence, position 2 of transcript
95 // insertion G/GA is transferred to nucleotide but not to peptide
96 "17\t45051613\t.\tG\tGA,C\t1666.65\t.\tAC=15;AF=3.0e-03,2.0e-03",
97 // '.' in INFO field should be ignored
98 "17\t45051615\t.\tG\tC\t1666.66\tRF\tAC=16;AF=." };
100 @BeforeClass(alwaysRun = true)
104 * configure to capture all available VCF and VEP (CSQ) fields
106 Cache.loadProperties("test/jalview/io/testProps.jvprops");
107 Cache.setProperty("VCF_FIELDS", ".*");
108 Cache.setProperty("VEP_FIELDS", ".*");
109 Cache.setProperty("VCF_ASSEMBLY", "GRCh38=GRCh38");
110 Console.initLogger();
113 @BeforeTest(alwaysRun = true)
114 public void setUpBeforeTest()
117 * clear down feature attributes metadata
119 FeatureAttributes.getInstance().clear();
122 @Test(groups = "Functional")
123 public void testDoLoad() throws IOException
125 AlignmentI al = buildAlignment();
127 File f = makeVcfFile();
128 VCFLoader loader = new VCFLoader(f.getPath());
130 loader.doLoad(al.getSequencesArray(), null);
133 * verify variant feature(s) added to gene
134 * NB alleles at a locus may not be processed, and features added,
135 * in the order in which they appear in the VCF record as method
136 * VariantContext.getAlternateAlleles() does not guarantee order
137 * - order of assertions here matches what we find (is not important)
139 List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
140 .getSequenceFeatures();
141 SequenceFeatures.sortFeatures(geneFeatures, true);
142 assertEquals(geneFeatures.size(), 5);
143 SequenceFeature sf = geneFeatures.get(0);
144 assertEquals(sf.getFeatureGroup(), "VCF");
145 assertEquals(sf.getBegin(), 2);
146 assertEquals(sf.getEnd(), 2);
147 assertEquals(sf.getScore(), 0f);
148 assertEquals(sf.getValue("AF"), "4.0e-03");
149 assertEquals(sf.getValue("AF_AFR"), "2.3e-4");
150 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C");
151 assertEquals(sf.getType(), SEQUENCE_VARIANT);
152 assertEquals(sf.getValue("POS"), "45051611");
153 assertEquals(sf.getValue("ID"), "rs384765");
154 assertEquals(sf.getValue("QUAL"), "1666.64");
155 assertEquals(sf.getValue("FILTER"), "RF;XYZ");
156 // malformed integer for AC_Female is ignored (JAL-3375)
157 assertNull(sf.getValue("AC_Female"));
159 sf = geneFeatures.get(1);
160 assertEquals(sf.getFeatureGroup(), "VCF");
161 assertEquals(sf.getBegin(), 2);
162 assertEquals(sf.getEnd(), 2);
163 assertEquals(sf.getType(), SEQUENCE_VARIANT);
164 assertEquals(sf.getScore(), 0f);
165 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
167 assertEquals(sf.getValue("AC_Female"), "12");
168 // malformed float for AF_AFR is ignored (JAL-3375)
169 assertNull(sf.getValue("AC_AFR"));
170 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
172 sf = geneFeatures.get(2);
173 assertEquals(sf.getFeatureGroup(), "VCF");
174 assertEquals(sf.getBegin(), 4);
175 assertEquals(sf.getEnd(), 4);
176 assertEquals(sf.getType(), SEQUENCE_VARIANT);
177 assertEquals(sf.getScore(), 0f);
178 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
180 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
182 sf = geneFeatures.get(3);
183 assertEquals(sf.getFeatureGroup(), "VCF");
184 assertEquals(sf.getBegin(), 4);
185 assertEquals(sf.getEnd(), 4);
186 assertEquals(sf.getType(), SEQUENCE_VARIANT);
187 assertEquals(sf.getScore(), 0f);
188 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
190 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
191 assertNull(sf.getValue("ID")); // '.' is ignored
192 assertNull(sf.getValue("FILTER")); // '.' is ignored
194 sf = geneFeatures.get(4);
195 assertEquals(sf.getFeatureGroup(), "VCF");
196 assertEquals(sf.getBegin(), 6);
197 assertEquals(sf.getEnd(), 6);
198 assertEquals(sf.getType(), SEQUENCE_VARIANT);
199 assertEquals(sf.getScore(), 0f);
200 // AF=. should not have been captured
201 assertNull(sf.getValue("AF"));
202 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
205 * verify variant feature(s) added to transcript
207 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
208 .getSequenceFeatures();
209 assertEquals(transcriptFeatures.size(), 3);
210 sf = transcriptFeatures.get(0);
211 assertEquals(sf.getFeatureGroup(), "VCF");
212 assertEquals(sf.getBegin(), 2);
213 assertEquals(sf.getEnd(), 2);
214 assertEquals(sf.getType(), SEQUENCE_VARIANT);
215 assertEquals(sf.getScore(), 0f);
216 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
218 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
219 sf = transcriptFeatures.get(1);
220 assertEquals(sf.getFeatureGroup(), "VCF");
221 assertEquals(sf.getBegin(), 2);
222 assertEquals(sf.getEnd(), 2);
223 assertEquals(sf.getType(), SEQUENCE_VARIANT);
224 assertEquals(sf.getScore(), 0f);
225 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
227 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
230 * verify SNP variant feature(s) computed and added to protein
231 * first codon AGC varies to ACC giving S/T
233 List<DBRefEntry> dbRefs = al.getSequenceAt(1).getDBRefs();
234 SequenceI peptide = null;
235 for (DBRefEntry dbref : dbRefs)
237 if (dbref.getMap().getMap().getFromRatio() == 3)
239 peptide = dbref.getMap().getTo();
242 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
245 * JAL-3187 don't precompute protein features, do dynamically instead
247 assertTrue(proteinFeatures.isEmpty());
250 private File makeVcfFile() throws IOException
252 File f = File.createTempFile("Test", ".vcf");
254 PrintWriter pw = new PrintWriter(f);
255 for (String vcfLine : VCF)
264 * Make a simple alignment with one 'gene' and one 'transcript'
268 private AlignmentI buildAlignment()
270 AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
271 DataSourceType.PASTE);
274 * map gene1 sequence to chromosome (normally done when the sequence is fetched
275 * from Ensembl and transcripts computed)
277 AlignmentI alignment = af.getViewport().getAlignment();
278 SequenceI gene1 = alignment.findName("gene1");
279 int[] to = new int[] { 45051610, 45051634 };
280 int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
281 gene1.setGeneLoci("homo_sapiens", "GRCh38", "17",
282 new MapList(from, to, 1, 1));
285 * map 'transcript1' to chromosome via 'gene1'
286 * transcript1/1-18 is gene1/3-10,15-24
287 * which is chromosome 45051612-45051619,45051624-45051633
289 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
290 SequenceI transcript1 = alignment.findName("transcript1");
291 from = new int[] { transcript1.getStart(), transcript1.getEnd() };
292 transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17",
293 new MapList(from, to, 1, 1));
296 * map gene2 to chromosome reverse strand
298 SequenceI gene2 = alignment.findName("gene2");
299 to = new int[] { 45051634, 45051610 };
300 from = new int[] { gene2.getStart(), gene2.getEnd() };
301 gene2.setGeneLoci("homo_sapiens", "GRCh38", "17",
302 new MapList(from, to, 1, 1));
305 * map 'transcript2' to chromosome via 'gene2'
306 * transcript2/1-18 is gene2/2-11,16-23
307 * which is chromosome 45051633-45051624,45051619-45051612
309 to = new int[] { 45051633, 45051624, 45051619, 45051612 };
310 SequenceI transcript2 = alignment.findName("transcript2");
311 from = new int[] { transcript2.getStart(), transcript2.getEnd() };
312 transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17",
313 new MapList(from, to, 1, 1));
316 * add a protein product as a DBRef on transcript1
318 SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
319 MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
321 Mapping map = new Mapping(peptide1, mapList);
322 DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
323 transcript1.addDBRef(product);
326 * add a protein product as a DBRef on transcript2
328 SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
329 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
330 map = new Mapping(peptide2, mapList);
331 product = new DBRefEntry("", "", "ENSP002", map);
332 transcript2.addDBRef(product);
335 * map gene3 to chromosome
337 SequenceI gene3 = alignment.findName("gene3");
338 to = new int[] { 45051610, 45051634 };
339 from = new int[] { gene3.getStart(), gene3.getEnd() };
340 gene3.setGeneLoci("homo_sapiens", "GRCh38", "5",
341 new MapList(from, to, 1, 1));
344 * map 'transcript3' to chromosome
346 SequenceI transcript3 = alignment.findName("transcript3");
347 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
348 from = new int[] { transcript3.getStart(), transcript3.getEnd() };
349 transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5",
350 new MapList(from, to, 1, 1));
353 * map 'transcript4' to chromosome
355 SequenceI transcript4 = alignment.findName("transcript4");
356 to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634,
358 from = new int[] { transcript4.getStart(), transcript4.getEnd() };
359 transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5",
360 new MapList(from, to, 1, 1));
363 * add a protein product as a DBRef on transcript3
365 SequenceI peptide3 = new Sequence("ENSP003", "SWRECD");
366 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
367 map = new Mapping(peptide3, mapList);
368 product = new DBRefEntry("", "", "ENSP003", map);
369 transcript3.addDBRef(product);
375 * Test with 'gene' and 'transcript' mapped to the reverse strand of the
376 * chromosome. The VCF variant positions (in forward coordinates) should get
377 * correctly located on sequence positions.
379 * @throws IOException
381 @Test(groups = "Functional")
382 public void testDoLoad_reverseStrand() throws IOException
384 AlignmentI al = buildAlignment();
386 File f = makeVcfFile();
388 VCFLoader loader = new VCFLoader(f.getPath());
390 loader.doLoad(al.getSequencesArray(), null);
393 * verify variant feature(s) added to gene2
394 * gene2/1-25 maps to chromosome 45051634- reverse strand
396 List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
397 .getSequenceFeatures();
398 SequenceFeatures.sortFeatures(geneFeatures, true);
399 assertEquals(geneFeatures.size(), 5);
403 * insertion G/GA at 45051613 maps to an insertion at
404 * the preceding position (21) on reverse strand gene
405 * reference: CAAGC -> GCTTG/21-25
406 * genomic variant: CAAGAC (G/GA)
407 * gene variant: GTCTTG (G/GT at 21)
409 sf = geneFeatures.get(1);
410 assertEquals(sf.getFeatureGroup(), "VCF");
411 assertEquals(sf.getBegin(), 21);
412 assertEquals(sf.getEnd(), 21);
413 assertEquals(sf.getType(), SEQUENCE_VARIANT);
414 assertEquals(sf.getScore(), 0f);
415 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
416 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
420 * variant G/C at 45051613 maps to C/G at gene position 22
422 sf = geneFeatures.get(2);
423 assertEquals(sf.getFeatureGroup(), "VCF");
424 assertEquals(sf.getBegin(), 22);
425 assertEquals(sf.getEnd(), 22);
426 assertEquals(sf.getType(), SEQUENCE_VARIANT);
427 assertEquals(sf.getScore(), 0f);
428 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
429 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
433 * variant A/C at 45051611 maps to T/G at gene position 24
435 sf = geneFeatures.get(3);
436 assertEquals(sf.getFeatureGroup(), "VCF");
437 assertEquals(sf.getBegin(), 24);
438 assertEquals(sf.getEnd(), 24);
439 assertEquals(sf.getType(), SEQUENCE_VARIANT);
440 assertEquals(sf.getScore(), 0f);
441 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G");
442 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 4.0e-03,
446 * variant A/T at 45051611 maps to T/A at gene position 24
448 sf = geneFeatures.get(4);
449 assertEquals(sf.getFeatureGroup(), "VCF");
450 assertEquals(sf.getBegin(), 24);
451 assertEquals(sf.getEnd(), 24);
452 assertEquals(sf.getType(), SEQUENCE_VARIANT);
453 assertEquals(sf.getScore(), 0f);
454 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A");
455 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
459 * verify 3 variant features added to transcript2
461 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
462 .getSequenceFeatures();
463 assertEquals(transcriptFeatures.size(), 3);
466 * insertion G/GT at position 21 of gene maps to position 16 of transcript
468 sf = transcriptFeatures.get(1);
469 assertEquals(sf.getFeatureGroup(), "VCF");
470 assertEquals(sf.getBegin(), 16);
471 assertEquals(sf.getEnd(), 16);
472 assertEquals(sf.getType(), SEQUENCE_VARIANT);
473 assertEquals(sf.getScore(), 0f);
474 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
475 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
479 * SNP C/G at position 22 of gene maps to position 17 of transcript
481 sf = transcriptFeatures.get(2);
482 assertEquals(sf.getFeatureGroup(), "VCF");
483 assertEquals(sf.getBegin(), 17);
484 assertEquals(sf.getEnd(), 17);
485 assertEquals(sf.getType(), SEQUENCE_VARIANT);
486 assertEquals(sf.getScore(), 0f);
487 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
488 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
492 * verify variant feature(s) computed and added to protein
493 * last codon GCT varies to GGT giving A/G in the last peptide position
495 List<DBRefEntry> dbRefs = al.getSequenceAt(3).getDBRefs();
496 SequenceI peptide = null;
497 for (DBRefEntry dbref : dbRefs)
499 if (dbref.getMap().getMap().getFromRatio() == 3)
501 peptide = dbref.getMap().getTo();
504 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
507 * JAL-3187 don't precompute protein features, do dynamically instead
509 assertTrue(proteinFeatures.isEmpty());
513 * Tests that if VEP consequence (CSQ) data is present in the VCF data, then
514 * it is added to the variant feature, but restricted where possible to the
515 * consequences for a specific transcript
517 * @throws IOException
519 @Test(groups = "Functional")
520 public void testDoLoad_vepCsq() throws IOException
522 AlignmentI al = buildAlignment();
524 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf.vcf");
527 * VCF data file with variants at gene3 positions
532 * 17 A/AC (insertion), A/G
534 loader.doLoad(al.getSequencesArray(), null);
537 * verify variant feature(s) added to gene3
539 List<SequenceFeature> geneFeatures = al.findName("gene3")
540 .getSequenceFeatures();
541 SequenceFeatures.sortFeatures(geneFeatures, true);
542 assertEquals(geneFeatures.size(), 7);
543 SequenceFeature sf = geneFeatures.get(0);
544 assertEquals(sf.getBegin(), 1);
545 assertEquals(sf.getEnd(), 1);
546 assertEquals(sf.getScore(), 0f);
547 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.1f, DELTA);
548 assertEquals(sf.getValue("alleles"), "C,A");
549 // gene features include Consequence for all transcripts
550 Map map = (Map) sf.getValue("CSQ");
551 assertEquals(map.size(), 9);
552 assertEquals(map.get("PolyPhen"), "Bad");
554 sf = geneFeatures.get(1);
555 assertEquals(sf.getBegin(), 5);
556 assertEquals(sf.getEnd(), 5);
557 assertEquals(sf.getScore(), 0f);
558 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
559 assertEquals(sf.getValue("alleles"), "C,T");
560 map = (Map) sf.getValue("CSQ");
561 assertEquals(map.size(), 9);
562 assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded
564 sf = geneFeatures.get(2);
565 assertEquals(sf.getBegin(), 9);
566 assertEquals(sf.getEnd(), 11); // deletion over 3 positions
567 assertEquals(sf.getScore(), 0f);
568 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.3f, DELTA);
569 assertEquals(sf.getValue("alleles"), "CGG,C");
570 map = (Map) sf.getValue("CSQ");
571 assertEquals(map.size(), 9);
573 sf = geneFeatures.get(3);
574 assertEquals(sf.getBegin(), 13);
575 assertEquals(sf.getEnd(), 13);
576 assertEquals(sf.getScore(), 0f);
577 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
578 assertEquals(sf.getValue("alleles"), "C,T");
579 map = (Map) sf.getValue("CSQ");
580 assertEquals(map.size(), 9);
582 sf = geneFeatures.get(4);
583 assertEquals(sf.getBegin(), 13);
584 assertEquals(sf.getEnd(), 13);
585 assertEquals(sf.getScore(), 0f);
586 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
587 assertEquals(sf.getValue("alleles"), "C,G");
588 map = (Map) sf.getValue("CSQ");
589 assertEquals(map.size(), 9);
591 sf = geneFeatures.get(5);
592 assertEquals(sf.getBegin(), 17);
593 assertEquals(sf.getEnd(), 17);
594 assertEquals(sf.getScore(), 0f);
595 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
596 assertEquals(sf.getValue("alleles"), "A,G");
597 map = (Map) sf.getValue("CSQ");
598 assertEquals(map.size(), 9);
600 sf = geneFeatures.get(6);
601 assertEquals(sf.getBegin(), 17);
602 assertEquals(sf.getEnd(), 17); // insertion
603 assertEquals(sf.getScore(), 0f);
604 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
605 assertEquals(sf.getValue("alleles"), "A,AC");
606 map = (Map) sf.getValue("CSQ");
607 assertEquals(map.size(), 9);
610 * verify variant feature(s) added to transcript3
611 * at columns 5 (1), 17 (2), positions 3, 11
612 * note the deletion at columns 9-11 is not transferred since col 11
613 * has no mapping to transcript 3
615 List<SequenceFeature> transcriptFeatures = al.findName("transcript3")
616 .getSequenceFeatures();
617 SequenceFeatures.sortFeatures(transcriptFeatures, true);
618 assertEquals(transcriptFeatures.size(), 3);
619 sf = transcriptFeatures.get(0);
620 assertEquals(sf.getBegin(), 3);
621 assertEquals(sf.getEnd(), 3);
622 assertEquals(sf.getScore(), 0f);
623 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
624 assertEquals(sf.getValue("alleles"), "C,T");
625 // transcript features only have Consequence for that transcripts
626 map = (Map) sf.getValue("CSQ");
627 assertEquals(map.size(), 9);
628 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
630 sf = transcriptFeatures.get(1);
631 assertEquals(sf.getBegin(), 11);
632 assertEquals(sf.getEnd(), 11);
633 assertEquals(sf.getScore(), 0f);
634 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
635 assertEquals(sf.getValue("alleles"), "A,G");
636 assertEquals(map.size(), 9);
637 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
639 sf = transcriptFeatures.get(2);
640 assertEquals(sf.getBegin(), 11);
641 assertEquals(sf.getEnd(), 11);
642 assertEquals(sf.getScore(), 0f);
643 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
644 assertEquals(sf.getValue("alleles"), "A,AC");
645 assertEquals(map.size(), 9);
646 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
649 * verify variants computed on protein product for transcript3
651 * codon variants are AGC/AGT position 1 which is synonymous
652 * and GAG/GGG which is E/G in position 4
653 * the insertion variant is not transferred to the peptide
655 List<DBRefEntry> dbRefs = al.findName("transcript3").getDBRefs();
656 SequenceI peptide = null;
657 for (DBRefEntry dbref : dbRefs)
659 if (dbref.getMap().getMap().getFromRatio() == 3)
661 peptide = dbref.getMap().getTo();
664 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
666 * JAL-3187 don't precompute protein features, do dynamically instead
668 assertTrue(proteinFeatures.isEmpty());
669 // SequenceFeatures.sortFeatures(proteinFeatures, true);
670 // assertEquals(proteinFeatures.size(), 2);
671 // sf = proteinFeatures.get(0);
672 // assertEquals(sf.getFeatureGroup(), "VCF");
673 // assertEquals(sf.getBegin(), 1);
674 // assertEquals(sf.getEnd(), 1);
675 // assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT);
676 // assertEquals(sf.getDescription(), "agC/agT");
677 // sf = proteinFeatures.get(1);
678 // assertEquals(sf.getFeatureGroup(), "VCF");
679 // assertEquals(sf.getBegin(), 4);
680 // assertEquals(sf.getEnd(), 4);
681 // assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
682 // assertEquals(sf.getDescription(), "p.Glu4Gly");
685 * verify variant feature(s) added to transcript4
686 * at columns 13 (2) and 17 (2), positions 7 and 11
688 transcriptFeatures = al.findName("transcript4").getSequenceFeatures();
689 SequenceFeatures.sortFeatures(transcriptFeatures, true);
690 assertEquals(transcriptFeatures.size(), 4);
691 sf = transcriptFeatures.get(0);
692 assertEquals(sf.getBegin(), 7);
693 assertEquals(sf.getEnd(), 7);
694 assertEquals(sf.getScore(), 0f);
695 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
696 assertEquals(sf.getValue("alleles"), "C,T");
697 assertEquals(map.size(), 9);
698 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
700 sf = transcriptFeatures.get(1);
701 assertEquals(sf.getBegin(), 7);
702 assertEquals(sf.getEnd(), 7);
703 assertEquals(sf.getScore(), 0f);
704 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
705 assertEquals(sf.getValue("alleles"), "C,G");
706 assertEquals(map.size(), 9);
707 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
709 sf = transcriptFeatures.get(2);
710 assertEquals(sf.getBegin(), 11);
711 assertEquals(sf.getEnd(), 11);
712 assertEquals(sf.getScore(), 0f);
713 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
714 assertEquals(sf.getValue("alleles"), "A,G");
715 assertEquals(map.size(), 9);
716 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
718 sf = transcriptFeatures.get(3);
719 assertEquals(sf.getBegin(), 11);
720 assertEquals(sf.getEnd(), 11);
721 assertEquals(sf.getScore(), 0f);
722 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
723 assertEquals(sf.getValue("alleles"), "A,AC");
724 assertEquals(map.size(), 9);
725 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
729 * A test that demonstrates loading a contig sequence from an indexed sequence
730 * database which is the reference for a VCF file
732 * @throws IOException
734 @Test(groups = "Functional")
735 public void testLoadVCFContig() throws IOException
737 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf2.vcf");
739 SequenceI seq = loader.loadVCFContig("contig123");
740 assertEquals(seq.getLength(), 15);
741 assertEquals(seq.getSequenceAsString(), "AAAAACCCCCGGGGG");
742 List<SequenceFeature> features = seq.getSequenceFeatures();
743 SequenceFeatures.sortFeatures(features, true);
744 assertEquals(features.size(), 2);
745 SequenceFeature sf = features.get(0);
746 assertEquals(sf.getBegin(), 8);
747 assertEquals(sf.getEnd(), 8);
748 assertEquals(sf.getDescription(), "C,A");
749 sf = features.get(1);
750 assertEquals(sf.getBegin(), 12);
751 assertEquals(sf.getEnd(), 12);
752 assertEquals(sf.getDescription(), "G,T");
754 seq = loader.loadVCFContig("contig789");
755 assertEquals(seq.getLength(), 25);
756 assertEquals(seq.getSequenceAsString(), "GGGGGTTTTTAAAAACCCCCGGGGG");
757 features = seq.getSequenceFeatures();
758 SequenceFeatures.sortFeatures(features, true);
759 assertEquals(features.size(), 2);
760 sf = features.get(0);
761 assertEquals(sf.getBegin(), 2);
762 assertEquals(sf.getEnd(), 2);
763 assertEquals(sf.getDescription(), "G,T");
764 sf = features.get(1);
765 assertEquals(sf.getBegin(), 21);
766 assertEquals(sf.getEnd(), 21);
767 assertEquals(sf.getDescription(), "G,A");
769 seq = loader.loadVCFContig("contig456");
770 assertEquals(seq.getLength(), 20);
771 assertEquals(seq.getSequenceAsString(), "CCCCCGGGGGTTTTTAAAAA");
772 features = seq.getSequenceFeatures();
773 SequenceFeatures.sortFeatures(features, true);
774 assertEquals(features.size(), 1);
775 sf = features.get(0);
776 assertEquals(sf.getBegin(), 15);
777 assertEquals(sf.getEnd(), 15);
778 assertEquals(sf.getDescription(), "T,C");