package jalview.ext.htsjdk; import htsjdk.samtools.util.CloseableIterator; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; import java.io.Closeable; import java.io.File; import java.io.IOException; /** * A thin wrapper for htsjdk classes to read either plain, or compressed, or * compressed and indexed VCF files */ public class VCFReader implements Closeable, Iterable { private static final String GZ = "gz"; private static final String TBI_EXTENSION = ".tbi"; private boolean indexed; private VCFFileReader reader; /** * Constructor given a raw or compressed VCF file or a (tabix) index file *

* For now, file type is inferred from its suffix: .gz or .bgz for compressed * data, .tbi for an index file, anything else is assumed to be plain text * VCF. * * @param f * @throws IOException */ public VCFReader(String filePath) throws IOException { if (filePath.endsWith(GZ)) { if (new File(filePath + TBI_EXTENSION).exists()) { indexed = true; } } else if (filePath.endsWith(TBI_EXTENSION)) { indexed = true; filePath = filePath.substring(0, filePath.length() - 4); } reader = new VCFFileReader(new File(filePath), indexed); } @Override public void close() throws IOException { if (reader != null) { reader.close(); } } /** * Returns an iterator over VCF variants in the file. The client should call * close() on the iterator when finished with it. */ @Override public CloseableIterator iterator() { return reader == null ? null : reader.iterator(); } /** * Queries for records overlapping the region specified. Note that this method * is performant if the VCF file is indexed, and may be very slow if it is * not. *

* Client code should call close() on the iterator when finished with it. * * @param chrom * the chromosome to query * @param start * query interval start * @param end * query interval end * @return */ public CloseableIterator query(final String chrom, final int start, final int end) { if (reader == null) { return null; } if (indexed) { return reader.query(chrom, start, end); } else { return queryUnindexed(chrom, start, end); } } /** * Returns an iterator over variant records read from a flat file which * overlap the specified chromosomal positions. Call close() on the iterator * when finished with it! * * @param chrom * @param start * @param end * @return */ protected CloseableIterator queryUnindexed( final String chrom, final int start, final int end) { final CloseableIterator it = reader.iterator(); return new CloseableIterator() { boolean atEnd = false; // prime look-ahead buffer with next matching record private VariantContext next = findNext(); private VariantContext findNext() { if (atEnd) { return null; } VariantContext variant = null; while (it.hasNext()) { variant = it.next(); int vstart = variant.getStart(); if (vstart > end) { atEnd = true; close(); return null; } int vend = variant.getEnd(); // todo what is the undeprecated way to get // the chromosome for the variant? if (chrom.equals(variant.getChr()) && (vstart <= end) && (vend >= start)) { return variant; } } return null; } @Override public boolean hasNext() { boolean hasNext = !atEnd && (next != null); if (!hasNext) { close(); } return hasNext; } @Override public VariantContext next() { /* * return the next match, and then re-prime * it with the following one (if any) */ VariantContext temp = next; next = findNext(); return temp; } @Override public void remove() { // not implemented } @Override public void close() { it.close(); } }; } /** * Returns an object that models the VCF file headers * * @return */ public VCFHeader getFileHeader() { return reader == null ? null : reader.getFileHeader(); } /** * Answers true if we are processing a tab-indexed VCF file, false if it is a * plain text (uncompressed) file. * * @return */ public boolean isIndex() { return indexed; } }