1 package jalview.ext.htsjdk;
3 import jalview.bin.Cache;
5 import java.io.Closeable;
7 import java.io.IOException;
9 import htsjdk.samtools.util.CloseableIterator;
10 import htsjdk.variant.variantcontext.VariantContext;
11 import htsjdk.variant.vcf.VCFFileReader;
12 import htsjdk.variant.vcf.VCFHeader;
15 * A thin wrapper for htsjdk classes to read either plain, or compressed, or
16 * compressed and indexed VCF files
18 public class VCFReader implements Closeable, Iterable<VariantContext>
20 private static final String GZ = "gz";
22 private static final String TBI_EXTENSION = ".tbi";
24 private static final String CSI_EXTENSION = ".csi";
26 private boolean indexed;
28 private VCFFileReader reader;
31 * Constructor given a raw or compressed VCF file or a (csi or tabix) index file
33 * If the file path ends in ".tbi" or ".csi", <em>or</em> appending one of these
34 * extensions gives a valid file path, open as indexed, else as unindexed.
39 public VCFReader(String filePath) throws IOException
42 if (filePath.endsWith(TBI_EXTENSION)
43 || filePath.endsWith(CSI_EXTENSION))
46 filePath = filePath.substring(0, filePath.length() - 4);
48 else if (new File(filePath + TBI_EXTENSION).exists())
52 else if (new File(filePath + CSI_EXTENSION).exists())
58 * we pass the name of the unindexed file to htsjdk,
59 * with a flag to assert whether it is indexed
61 File file = new File(filePath);
64 reader = new VCFFileReader(file, indexed);
68 Cache.log.error("File not found: " + filePath);
73 public void close() throws IOException
82 * Returns an iterator over VCF variants in the file. The client should call
83 * close() on the iterator when finished with it.
86 public CloseableIterator<VariantContext> iterator()
88 return reader == null ? null : reader.iterator();
92 * Queries for records overlapping the region specified. Note that this method
93 * is performant if the VCF file is indexed, and may be very slow if it is
96 * Client code should call close() on the iterator when finished with it.
99 * the chromosome to query
101 * query interval start
106 public CloseableIterator<VariantContext> query(final String chrom,
107 final int start, final int end)
115 return reader.query(chrom, start, end);
119 return queryUnindexed(chrom, start, end);
124 * Returns an iterator over variant records read from a flat file which
125 * overlap the specified chromosomal positions. Call close() on the iterator
126 * when finished with it!
133 protected CloseableIterator<VariantContext> queryUnindexed(
134 final String chrom, final int start, final int end)
136 final CloseableIterator<VariantContext> it = reader.iterator();
138 return new CloseableIterator<VariantContext>()
140 boolean atEnd = false;
142 // prime look-ahead buffer with next matching record
143 private VariantContext next = findNext();
145 private VariantContext findNext()
151 VariantContext variant = null;
155 int vstart = variant.getStart();
164 int vend = variant.getEnd();
165 // todo what is the undeprecated way to get
166 // the chromosome for the variant?
167 if (chrom.equals(variant.getContig()) && (vstart <= end)
177 public boolean hasNext()
179 boolean hasNext = !atEnd && (next != null);
188 public VariantContext next()
191 * return the next match, and then re-prime
192 * it with the following one (if any)
194 VariantContext temp = next;
214 * Returns an object that models the VCF file headers
218 public VCFHeader getFileHeader()
220 return reader == null ? null : reader.getFileHeader();
224 * Answers true if we are processing a tab-indexed VCF file, false if it is a
225 * plain text (uncompressed) file.
229 public boolean isIndex()