From 9cfc8fa0bb0118066ce0406f701b533ca8e84ff6 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 19 Sep 2017 14:52:58 +0100 Subject: [PATCH] JAL-1793 simple tests of reading raw and indexed VCF files with htsjdk --- .../jalview/ext/htsjdk/TabixFeatureReaderTest.java | 51 ++++++++++ test/jalview/ext/htsjdk/VCFFileReaderTest.java | 106 ++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 test/jalview/ext/htsjdk/TabixFeatureReaderTest.java create mode 100644 test/jalview/ext/htsjdk/VCFFileReaderTest.java diff --git a/test/jalview/ext/htsjdk/TabixFeatureReaderTest.java b/test/jalview/ext/htsjdk/TabixFeatureReaderTest.java new file mode 100644 index 0000000..48a932e --- /dev/null +++ b/test/jalview/ext/htsjdk/TabixFeatureReaderTest.java @@ -0,0 +1,51 @@ +package jalview.ext.htsjdk; + +import htsjdk.tribble.CloseableTribbleIterator; +import htsjdk.tribble.TabixFeatureReader; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFCodec; + +import java.io.IOException; + +import org.testng.annotations.Test; + +public class TabixFeatureReaderTest +{ + // gnomAD exome variant dataset + private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz"; + + // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz"; + + /** + * A 'test' that demonstrates querying an indexed VCF file for features in a + * specified interval + * + * @throws IOException + */ + @Test + public void testQuery() throws IOException + { + /* + * if not specified, assumes index file is filename.tbi + */ + TabixFeatureReader reader = new TabixFeatureReader<>( + VCF_PATH, VCF_PATH, new VCFCodec()); + + /* + * gene NMT1 (human) is on chromosome 17 + * GCHR38 (Ensembl): 45051610-45109016 + * GCHR37 (gnoMAD): 43128978-43186384 + * CDS begins at offset 9720, first CDS variant at offset 9724 + */ + CloseableTribbleIterator features = reader.query("17", + 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions + while (features.hasNext()) + { + System.out.println(features.next().toString()); + } + + features.close(); + reader.close(); + } + +} diff --git a/test/jalview/ext/htsjdk/VCFFileReaderTest.java b/test/jalview/ext/htsjdk/VCFFileReaderTest.java new file mode 100644 index 0000000..50147e7 --- /dev/null +++ b/test/jalview/ext/htsjdk/VCFFileReaderTest.java @@ -0,0 +1,106 @@ +package jalview.ext.htsjdk; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.List; + +import org.testng.annotations.Test; + +public class VCFFileReaderTest +{ + private static final String[] VCF = new String[] { + "##fileformat=VCFv4.2", + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", + "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G + "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA + "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A + + /** + * A test to exercise some basic functionality of the htsjdk VCF reader + * + * @throws IOException + */ + @Test(groups = "Functional") + public void testReadVcf() throws IOException + { + File f = writeVcfFile(); + VCFFileReader reader = new VCFFileReader(f, false); + CloseableIterator variants = reader.iterator(); + + /* + * SNP C/G variant + */ + VariantContext vc = variants.next(); + assertTrue(vc.isSNP()); + Allele ref = vc.getReference(); + assertEquals(ref.getBaseString(), "C"); + List alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "C"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "G"); + + /* + * Insertion G -> GA + */ + vc = variants.next(); + assertFalse(vc.isSNP()); + assertTrue(vc.isSimpleInsertion()); + ref = vc.getReference(); + assertEquals(ref.getBaseString(), "G"); + alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "G"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "GA"); + + /* + * Deletion ACG -> A + */ + vc = variants.next(); + assertFalse(vc.isSNP()); + assertTrue(vc.isSimpleDeletion()); + ref = vc.getReference(); + assertEquals(ref.getBaseString(), "ACG"); + alleles = vc.getAlleles(); + assertEquals(alleles.size(), 2); + assertTrue(alleles.get(0).isReference()); + assertEquals(alleles.get(0).getBaseString(), "ACG"); + assertFalse(alleles.get(1).isReference()); + assertEquals(alleles.get(1).getBaseString(), "A"); + + assertFalse(variants.hasNext()); + + variants.close(); + reader.close(); + } + + /** + * Creates a temporary file to be read by the htsjdk VCF reader + * + * @return + * @throws IOException + */ + protected File writeVcfFile() throws IOException + { + File f = File.createTempFile("Test", "vcf"); + f.deleteOnExit(); + PrintWriter pw = new PrintWriter(f); + for (String vcfLine : VCF) { + pw.println(vcfLine); + } + pw.close(); + return f; + } +} -- 1.7.10.2