1 package jalview.ext.htsjdk;
3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertTrue;
6 import htsjdk.samtools.util.CloseableIterator;
7 import htsjdk.variant.variantcontext.Allele;
8 import htsjdk.variant.variantcontext.VariantContext;
11 import java.io.IOException;
12 import java.io.PrintWriter;
13 import java.util.List;
15 import org.testng.annotations.Test;
17 public class VCFReaderTest
19 private static final String[] VCF = new String[] {
20 "##fileformat=VCFv4.2",
21 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
22 "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G
23 "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA
24 "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A
26 // gnomAD exome variant dataset
27 private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
29 // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
32 * A test to exercise some basic functionality of the htsjdk VCF reader,
33 * reading from a non-index VCF file
37 @Test(groups = "Functional")
38 public void testReadVcf_plain() throws IOException
40 File f = writeVcfFile();
41 VCFReader reader = new VCFReader(f.getAbsolutePath());
42 CloseableIterator<VariantContext> variants = reader.iterator();
47 VariantContext vc = variants.next();
48 assertTrue(vc.isSNP());
49 Allele ref = vc.getReference();
50 assertEquals(ref.getBaseString(), "C");
51 List<Allele> alleles = vc.getAlleles();
52 assertEquals(alleles.size(), 2);
53 assertTrue(alleles.get(0).isReference());
54 assertEquals(alleles.get(0).getBaseString(), "C");
55 assertFalse(alleles.get(1).isReference());
56 assertEquals(alleles.get(1).getBaseString(), "G");
62 assertFalse(vc.isSNP());
63 assertTrue(vc.isSimpleInsertion());
64 ref = vc.getReference();
65 assertEquals(ref.getBaseString(), "G");
66 alleles = vc.getAlleles();
67 assertEquals(alleles.size(), 2);
68 assertTrue(alleles.get(0).isReference());
69 assertEquals(alleles.get(0).getBaseString(), "G");
70 assertFalse(alleles.get(1).isReference());
71 assertEquals(alleles.get(1).getBaseString(), "GA");
77 assertFalse(vc.isSNP());
78 assertTrue(vc.isSimpleDeletion());
79 ref = vc.getReference();
80 assertEquals(ref.getBaseString(), "ACG");
81 alleles = vc.getAlleles();
82 assertEquals(alleles.size(), 2);
83 assertTrue(alleles.get(0).isReference());
84 assertEquals(alleles.get(0).getBaseString(), "ACG");
85 assertFalse(alleles.get(1).isReference());
86 assertEquals(alleles.get(1).getBaseString(), "A");
88 assertFalse(variants.hasNext());
95 * Creates a temporary file to be read by the htsjdk VCF reader
100 protected File writeVcfFile() throws IOException
102 File f = File.createTempFile("Test", "vcf");
104 PrintWriter pw = new PrintWriter(f);
105 for (String vcfLine : VCF) {
113 * A 'test' that demonstrates querying an indexed VCF file for features in a
116 * @throws IOException
119 public void testQuery_indexed() throws IOException
122 * if not specified, assumes index file is filename.tbi
124 VCFReader reader = new VCFReader(VCF_PATH);
127 * gene NMT1 (human) is on chromosome 17
128 * GCHR38 (Ensembl): 45051610-45109016
129 * GCHR37 (gnoMAD): 43128978-43186384
130 * CDS begins at offset 9720, first CDS variant at offset 9724
132 CloseableIterator<VariantContext> features = reader.query("17",
133 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
135 assertEquals(printNext(features), 43138702);
136 assertEquals(printNext(features), 43138704);
137 assertEquals(printNext(features), 43138707);
138 assertEquals(printNext(features), 43138708);
139 assertEquals(printNext(features), 43138710);
140 assertEquals(printNext(features), 43138711);
141 assertFalse(features.hasNext());
148 * Prints the toString value of the next variant, and returns its start
154 protected int printNext(CloseableIterator<VariantContext> features)
156 VariantContext next = features.next();
157 System.out.println(next.toString());
158 return next.getStart();
161 // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
164 * Test the query method that wraps a non-indexed VCF file
166 * @throws IOException
168 @Test(groups = "Functional")
169 public void testQuery_plain() throws IOException
171 File f = writeVcfFile();
172 VCFReader reader = new VCFReader(f.getAbsolutePath());
175 * query for overlap of 5-8 - should find variant at 7
177 CloseableIterator<VariantContext> variants = reader.query("20", 5, 8);
182 VariantContext vc = variants.next();
183 assertTrue(vc.isIndel());
184 assertEquals(vc.getStart(), 7);
185 assertEquals(vc.getEnd(), 7);
186 Allele ref = vc.getReference();
187 assertEquals(ref.getBaseString(), "G");
188 List<Allele> alleles = vc.getAlleles();
189 assertEquals(alleles.size(), 2);
190 assertTrue(alleles.get(0).isReference());
191 assertEquals(alleles.get(0).getBaseString(), "G");
192 assertFalse(alleles.get(1).isReference());
193 assertEquals(alleles.get(1).getBaseString(), "GA");
195 assertFalse(variants.hasNext());