2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.htsjdk;
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertFalse;
25 import static org.testng.Assert.assertTrue;
26 import htsjdk.samtools.util.CloseableIterator;
27 import htsjdk.variant.variantcontext.Allele;
28 import htsjdk.variant.variantcontext.VariantContext;
31 import java.io.IOException;
32 import java.io.PrintWriter;
33 import java.util.List;
35 import org.testng.annotations.Test;
37 public class VCFReaderTest
39 private static final String[] VCF = new String[] { "##fileformat=VCFv4.2",
40 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
41 "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G
42 "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA
43 "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A
45 // gnomAD exome variant dataset
46 private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
48 // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
51 * A test to exercise some basic functionality of the htsjdk VCF reader,
52 * reading from a non-index VCF file
56 @Test(groups = "Functional")
57 public void testReadVcf_plain() throws IOException
59 File f = writeVcfFile();
60 VCFReader reader = new VCFReader(f.getAbsolutePath());
61 CloseableIterator<VariantContext> variants = reader.iterator();
66 VariantContext vc = variants.next();
67 assertTrue(vc.isSNP());
68 Allele ref = vc.getReference();
69 assertEquals(ref.getBaseString(), "C");
70 List<Allele> alleles = vc.getAlleles();
71 assertEquals(alleles.size(), 2);
72 assertTrue(alleles.get(0).isReference());
73 assertEquals(alleles.get(0).getBaseString(), "C");
74 assertFalse(alleles.get(1).isReference());
75 assertEquals(alleles.get(1).getBaseString(), "G");
81 assertFalse(vc.isSNP());
82 assertTrue(vc.isSimpleInsertion());
83 ref = vc.getReference();
84 assertEquals(ref.getBaseString(), "G");
85 alleles = vc.getAlleles();
86 assertEquals(alleles.size(), 2);
87 assertTrue(alleles.get(0).isReference());
88 assertEquals(alleles.get(0).getBaseString(), "G");
89 assertFalse(alleles.get(1).isReference());
90 assertEquals(alleles.get(1).getBaseString(), "GA");
96 assertFalse(vc.isSNP());
97 assertTrue(vc.isSimpleDeletion());
98 ref = vc.getReference();
99 assertEquals(ref.getBaseString(), "ACG");
100 alleles = vc.getAlleles();
101 assertEquals(alleles.size(), 2);
102 assertTrue(alleles.get(0).isReference());
103 assertEquals(alleles.get(0).getBaseString(), "ACG");
104 assertFalse(alleles.get(1).isReference());
105 assertEquals(alleles.get(1).getBaseString(), "A");
107 assertFalse(variants.hasNext());
114 * Creates a temporary file to be read by the htsjdk VCF reader
117 * @throws IOException
119 protected File writeVcfFile() throws IOException
121 File f = File.createTempFile("Test", "vcf");
123 PrintWriter pw = new PrintWriter(f);
124 for (String vcfLine : VCF)
133 * A 'test' that demonstrates querying an indexed VCF file for features in a
136 * @throws IOException
139 public void testQuery_indexed() throws IOException
142 * if not specified, assumes index file is filename.tbi
144 VCFReader reader = new VCFReader(VCF_PATH);
147 * gene NMT1 (human) is on chromosome 17
148 * GCHR38 (Ensembl): 45051610-45109016
149 * GCHR37 (gnoMAD): 43128978-43186384
150 * CDS begins at offset 9720, first CDS variant at offset 9724
152 CloseableIterator<VariantContext> features = reader.query("17",
153 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
155 assertEquals(printNext(features), 43138702);
156 assertEquals(printNext(features), 43138704);
157 assertEquals(printNext(features), 43138707);
158 assertEquals(printNext(features), 43138708);
159 assertEquals(printNext(features), 43138710);
160 assertEquals(printNext(features), 43138711);
161 assertFalse(features.hasNext());
168 * Prints the toString value of the next variant, and returns its start
174 protected int printNext(CloseableIterator<VariantContext> features)
176 VariantContext next = features.next();
177 System.out.println(next.toString());
178 return next.getStart();
181 // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
184 * Test the query method that wraps a non-indexed VCF file
186 * @throws IOException
188 @Test(groups = "Functional")
189 public void testQuery_plain() throws IOException
191 File f = writeVcfFile();
192 VCFReader reader = new VCFReader(f.getAbsolutePath());
195 * query for overlap of 5-8 - should find variant at 7
197 CloseableIterator<VariantContext> variants = reader.query("20", 5, 8);
202 VariantContext vc = variants.next();
203 assertTrue(vc.isIndel());
204 assertEquals(vc.getStart(), 7);
205 assertEquals(vc.getEnd(), 7);
206 Allele ref = vc.getReference();
207 assertEquals(ref.getBaseString(), "G");
208 List<Allele> alleles = vc.getAlleles();
209 assertEquals(alleles.size(), 2);
210 assertTrue(alleles.get(0).isReference());
211 assertEquals(alleles.get(0).getBaseString(), "G");
212 assertFalse(alleles.get(1).isReference());
213 assertEquals(alleles.get(1).getBaseString(), "GA");
215 assertFalse(variants.hasNext());