--- /dev/null
+package jalview.ext.htsjdk;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFileReader;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * A thin wrapper for htsjdk classes to read either plain, or compressed, or
+ * compressed and indexed VCF files
+ */
+public class VCFReader implements Closeable, Iterable<VariantContext>
+{
+ private static final String GZ = "gz";
+
+ private static final String TBI_EXTENSION = ".tbi";
+
+ private boolean indexed;
+
+ private VCFFileReader reader;
+
+ /**
+ * Constructor given a raw or compressed VCF file or a (tabix) index file
+ * <p>
+ * For now, file type is inferred from its suffix: .gz or .bgz for compressed
+ * data, .tbi for an index file, anything else is assumed to be plain text
+ * VCF.
+ *
+ * @param f
+ * @throws IOException
+ */
+ public VCFReader(String filePath) throws IOException
+ {
+ if (filePath.endsWith(GZ))
+ {
+ if (new File(filePath + TBI_EXTENSION).exists())
+ {
+ indexed = true;
+ }
+ }
+ else if (filePath.endsWith(TBI_EXTENSION))
+ {
+ indexed = true;
+ filePath = filePath.substring(0, filePath.length() - 4);
+ }
+
+ reader = new VCFFileReader(new File(filePath), indexed);
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ if (reader != null)
+ {
+ reader.close();
+ }
+ }
+
+ /**
+ * Returns an iterator over VCF variants in the file. The client should call
+ * close() on the iterator when finished with it.
+ */
+ @Override
+ public CloseableIterator<VariantContext> iterator()
+ {
+ return reader == null ? null : reader.iterator();
+ }
+
+ /**
+ * Queries for records overlapping the region specified. Note that this method
+ * requires a VCF file with an associated index. If no index exists a
+ * TribbleException will be thrown.
+ *
+ * @param chrom
+ * the chromosome to query
+ * @param start
+ * query interval start
+ * @param end
+ * query interval end
+ * @return
+ */
+ public CloseableIterator<VariantContext> query(final String chrom,
+ final int start, final int end)
+ {
+ return reader == null ? null : reader.query(chrom, start, end);
+ }
+}
+++ /dev/null
-package jalview.ext.htsjdk;
-
-import htsjdk.tribble.CloseableTribbleIterator;
-import htsjdk.tribble.TabixFeatureReader;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFCodec;
-
-import java.io.IOException;
-
-import org.testng.annotations.Test;
-
-public class TabixFeatureReaderTest
-{
- // gnomAD exome variant dataset
- private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
-
- // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
-
- /**
- * A 'test' that demonstrates querying an indexed VCF file for features in a
- * specified interval
- *
- * @throws IOException
- */
- @Test
- public void testQuery() throws IOException
- {
- /*
- * if not specified, assumes index file is filename.tbi
- */
- TabixFeatureReader<VariantContext, VCFCodec> reader = new TabixFeatureReader<>(
- VCF_PATH, VCF_PATH, new VCFCodec());
-
- /*
- * gene NMT1 (human) is on chromosome 17
- * GCHR38 (Ensembl): 45051610-45109016
- * GCHR37 (gnoMAD): 43128978-43186384
- * CDS begins at offset 9720, first CDS variant at offset 9724
- */
- CloseableTribbleIterator<VariantContext> features = reader.query("17",
- 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
- while (features.hasNext())
- {
- System.out.println(features.next().toString());
- }
-
- features.close();
- reader.close();
- }
-
-}
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFFileReader;
import java.io.File;
import java.io.IOException;
import org.testng.annotations.Test;
-public class VCFFileReaderTest
+public class VCFReaderTest
{
private static final String[] VCF = new String[] {
"##fileformat=VCFv4.2",
"20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA
"18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A
+ // gnomAD exome variant dataset
+ private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
/**
* A test to exercise some basic functionality of the htsjdk VCF reader
*
* @throws IOException
*/
@Test(groups = "Functional")
- public void testReadVcf() throws IOException
+ public void testReadVcf_plain() throws IOException
{
File f = writeVcfFile();
- VCFFileReader reader = new VCFFileReader(f, false);
+ VCFReader reader = new VCFReader(f.getAbsolutePath());
CloseableIterator<VariantContext> variants = reader.iterator();
/*
pw.close();
return f;
}
+
+ // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+ /**
+ * A 'test' that demonstrates querying an indexed VCF file for features in a
+ * specified interval
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testQuery_indexed() throws IOException
+ {
+ /*
+ * if not specified, assumes index file is filename.tbi
+ */
+ VCFReader reader = new VCFReader(VCF_PATH);
+
+ /*
+ * gene NMT1 (human) is on chromosome 17
+ * GCHR38 (Ensembl): 45051610-45109016
+ * GCHR37 (gnoMAD): 43128978-43186384
+ * CDS begins at offset 9720, first CDS variant at offset 9724
+ */
+ CloseableIterator<VariantContext> features = reader.query("17",
+ 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
+
+ assertEquals(features.next().getStart(), 43138702);
+ assertEquals(features.next().getStart(), 43138704);
+ assertEquals(features.next().getStart(), 43138707);
+ assertEquals(features.next().getStart(), 43138708);
+ assertEquals(features.next().getStart(), 43138710);
+ assertEquals(features.next().getStart(), 43138711);
+ assertFalse(features.hasNext());
+
+ features.close();
+ reader.close();
+ }
}