X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fext%2Fhtsjdk%2FVCFReader.java;fp=src%2Fjalview%2Fext%2Fhtsjdk%2FVCFReader.java;h=14c057f7e56a23b195889473fe11d38cd04465de;hb=f4766a7bbcfae845fc95923b01fa14ff83d589ff;hp=0000000000000000000000000000000000000000;hpb=163ed5b997bbda48e4cdd950e87a8fe01baae7fb;p=jalview.git diff --git a/src/jalview/ext/htsjdk/VCFReader.java b/src/jalview/ext/htsjdk/VCFReader.java new file mode 100644 index 0000000..14c057f --- /dev/null +++ b/src/jalview/ext/htsjdk/VCFReader.java @@ -0,0 +1,214 @@ +package jalview.ext.htsjdk; + +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; + +/** + * A thin wrapper for htsjdk classes to read either plain, or compressed, or + * compressed and indexed VCF files + */ +public class VCFReader implements Closeable, Iterable +{ + private static final String GZ = "gz"; + + private static final String TBI_EXTENSION = ".tbi"; + + private boolean indexed; + + private VCFFileReader reader; + + /** + * Constructor given a raw or compressed VCF file or a (tabix) index file + *

+ * For now, file type is inferred from its suffix: .gz or .bgz for compressed + * data, .tbi for an index file, anything else is assumed to be plain text + * VCF. + * + * @param f + * @throws IOException + */ + public VCFReader(String filePath) throws IOException + { + if (filePath.endsWith(GZ)) + { + if (new File(filePath + TBI_EXTENSION).exists()) + { + indexed = true; + } + } + else if (filePath.endsWith(TBI_EXTENSION)) + { + indexed = true; + filePath = filePath.substring(0, filePath.length() - 4); + } + + reader = new VCFFileReader(new File(filePath), indexed); + } + + @Override + public void close() throws IOException + { + if (reader != null) + { + reader.close(); + } + } + + /** + * Returns an iterator over VCF variants in the file. The client should call + * close() on the iterator when finished with it. + */ + @Override + public CloseableIterator iterator() + { + return reader == null ? null : reader.iterator(); + } + + /** + * Queries for records overlapping the region specified. Note that this method + * is performant if the VCF file is indexed, and may be very slow if it is + * not. + *

+ * Client code should call close() on the iterator when finished with it. + * + * @param chrom + * the chromosome to query + * @param start + * query interval start + * @param end + * query interval end + * @return + */ + public CloseableIterator query(final String chrom, + final int start, final int end) + { + if (reader == null) { + return null; + } + if (indexed) + { + return reader.query(chrom, start, end); + } + else + { + return queryUnindexed(chrom, start, end); + } + } + + /** + * Returns an iterator over variant records read from a flat file which + * overlap the specified chromosomal positions. Call close() on the iterator + * when finished with it! + * + * @param chrom + * @param start + * @param end + * @return + */ + protected CloseableIterator queryUnindexed( + final String chrom, final int start, final int end) + { + final CloseableIterator it = reader.iterator(); + + return new CloseableIterator() + { + boolean atEnd = false; + + // prime look-ahead buffer with next matching record + private VariantContext next = findNext(); + + private VariantContext findNext() + { + if (atEnd) + { + return null; + } + VariantContext variant = null; + while (it.hasNext()) + { + variant = it.next(); + int vstart = variant.getStart(); + + if (vstart > end) + { + atEnd = true; + close(); + return null; + } + + int vend = variant.getEnd(); + // todo what is the undeprecated way to get + // the chromosome for the variant? + if (chrom.equals(variant.getChr()) && (vstart <= end) + && (vend >= start)) + { + return variant; + } + } + return null; + } + + @Override + public boolean hasNext() + { + boolean hasNext = !atEnd && (next != null); + if (!hasNext) + { + close(); + } + return hasNext; + } + + @Override + public VariantContext next() + { + /* + * return the next match, and then re-prime + * it with the following one (if any) + */ + VariantContext temp = next; + next = findNext(); + return temp; + } + + @Override + public void remove() + { + // not implemented + } + + @Override + public void close() + { + it.close(); + } + }; + } + + /** + * Returns an object that models the VCF file headers + * + * @return + */ + public VCFHeader getFileHeader() + { + return reader == null ? null : reader.getFileHeader(); + } + + /** + * Answers true if we are processing a tab-indexed VCF file, false if it is a + * plain text (uncompressed) file. + * + * @return + */ + public boolean isIndex() + { + return indexed; + } +}