1 package jalview.ext.htsjdk;
3 import htsjdk.samtools.util.CloseableIterator;
4 import htsjdk.variant.variantcontext.VariantContext;
5 import htsjdk.variant.vcf.VCFFileReader;
6 import htsjdk.variant.vcf.VCFHeader;
8 import java.io.Closeable;
10 import java.io.IOException;
13 * A thin wrapper for htsjdk classes to read either plain, or compressed, or
14 * compressed and indexed VCF files
16 public class VCFReader implements Closeable, Iterable<VariantContext>
18 private static final String GZ = "gz";
20 private static final String TBI_EXTENSION = ".tbi";
22 private boolean indexed;
24 private VCFFileReader reader;
27 * Constructor given a raw or compressed VCF file or a (tabix) index file
29 * For now, file type is inferred from its suffix: .gz or .bgz for compressed
30 * data, .tbi for an index file, anything else is assumed to be plain text
36 public VCFReader(String filePath) throws IOException
38 if (filePath.endsWith(GZ))
40 if (new File(filePath + TBI_EXTENSION).exists())
45 else if (filePath.endsWith(TBI_EXTENSION))
48 filePath = filePath.substring(0, filePath.length() - 4);
51 reader = new VCFFileReader(new File(filePath), indexed);
55 public void close() throws IOException
64 * Returns an iterator over VCF variants in the file. The client should call
65 * close() on the iterator when finished with it.
68 public CloseableIterator<VariantContext> iterator()
70 return reader == null ? null : reader.iterator();
74 * Queries for records overlapping the region specified. Note that this method
75 * is performant if the VCF file is indexed, and may be very slow if it is
78 * Client code should call close() on the iterator when finished with it.
81 * the chromosome to query
83 * query interval start
88 public CloseableIterator<VariantContext> query(final String chrom,
89 final int start, final int end)
96 return reader.query(chrom, start, end);
100 return queryUnindexed(chrom, start, end);
105 * Returns an iterator over variant records read from a flat file which
106 * overlap the specified chromosomal positions. Call close() on the iterator
107 * when finished with it!
114 protected CloseableIterator<VariantContext> queryUnindexed(
115 final String chrom, final int start, final int end)
117 final CloseableIterator<VariantContext> it = reader.iterator();
119 return new CloseableIterator<VariantContext>()
121 boolean atEnd = false;
123 // prime look-ahead buffer with next matching record
124 private VariantContext next = findNext();
126 private VariantContext findNext()
132 VariantContext variant = null;
136 int vstart = variant.getStart();
145 int vend = variant.getEnd();
146 // todo what is the undeprecated way to get
147 // the chromosome for the variant?
148 if (chrom.equals(variant.getChr()) && (vstart <= end)
158 public boolean hasNext()
160 boolean hasNext = !atEnd && (next != null);
169 public VariantContext next()
172 * return the next match, and then re-prime
173 * it with the following one (if any)
175 VariantContext temp = next;
195 * Returns an object that models the VCF file headers
199 public VCFHeader getFileHeader()
201 return reader == null ? null : reader.getFileHeader();
205 * Answers true if we are processing a tab-indexed VCF file, false if it is a
206 * plain text (uncompressed) file.
210 public boolean isIndex()