JAL-1793 simple tests of reading raw and indexed VCF files with htsjdk
[jalview.git] / test / jalview / ext / htsjdk / TabixFeatureReaderTest.java
diff --git a/test/jalview/ext/htsjdk/TabixFeatureReaderTest.java b/test/jalview/ext/htsjdk/TabixFeatureReaderTest.java
new file mode 100644 (file)
index 0000000..48a932e
--- /dev/null
@@ -0,0 +1,51 @@
+package jalview.ext.htsjdk;
+
+import htsjdk.tribble.CloseableTribbleIterator;
+import htsjdk.tribble.TabixFeatureReader;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFCodec;
+
+import java.io.IOException;
+
+import org.testng.annotations.Test;
+
+public class TabixFeatureReaderTest
+{
+  // gnomAD exome variant dataset
+  private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+  // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz";
+
+  /**
+   * A 'test' that demonstrates querying an indexed VCF file for features in a
+   * specified interval
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testQuery() throws IOException
+  {
+    /*
+     * if not specified, assumes index file is filename.tbi
+     */
+    TabixFeatureReader<VariantContext, VCFCodec> reader = new TabixFeatureReader<>(
+            VCF_PATH, VCF_PATH, new VCFCodec());
+
+    /*
+     * gene NMT1 (human) is on chromosome 17
+     * GCHR38 (Ensembl): 45051610-45109016
+     * GCHR37 (gnoMAD): 43128978-43186384
+     * CDS begins at offset 9720, first CDS variant at offset 9724
+     */
+    CloseableTribbleIterator<VariantContext> features = reader.query("17",
+            43128978 + 9724, 43128978 + 9734); // first 11 CDS positions
+    while (features.hasNext())
+    {
+      System.out.println(features.next().toString());
+    }
+
+    features.close();
+    reader.close();
+  }
+
+}