X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=inline;f=src%2Fjalview%2Fext%2Fhtsjdk%2FVCFReader.java;h=14c057f7e56a23b195889473fe11d38cd04465de;hb=296593216c47a835f462d1d74a40b41e4818f737;hp=8dfd7e265b5e1ce5a49af7b4791724e10d9cadfb;hpb=f943e5b7a7a5ce2b819495f83dbad28028a9a956;p=jalview.git diff --git a/src/jalview/ext/htsjdk/VCFReader.java b/src/jalview/ext/htsjdk/VCFReader.java index 8dfd7e2..14c057f 100644 --- a/src/jalview/ext/htsjdk/VCFReader.java +++ b/src/jalview/ext/htsjdk/VCFReader.java @@ -3,6 +3,7 @@ package jalview.ext.htsjdk; import htsjdk.samtools.util.CloseableIterator; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; import java.io.Closeable; import java.io.File; @@ -71,8 +72,10 @@ public class VCFReader implements Closeable, Iterable /** * Queries for records overlapping the region specified. Note that this method - * requires a VCF file with an associated index. If no index exists a - * TribbleException will be thrown. + * is performant if the VCF file is indexed, and may be very slow if it is + * not. + *

+ * Client code should call close() on the iterator when finished with it. * * @param chrom * the chromosome to query @@ -85,6 +88,127 @@ public class VCFReader implements Closeable, Iterable public CloseableIterator query(final String chrom, final int start, final int end) { - return reader == null ? null : reader.query(chrom, start, end); + if (reader == null) { + return null; + } + if (indexed) + { + return reader.query(chrom, start, end); + } + else + { + return queryUnindexed(chrom, start, end); + } + } + + /** + * Returns an iterator over variant records read from a flat file which + * overlap the specified chromosomal positions. Call close() on the iterator + * when finished with it! + * + * @param chrom + * @param start + * @param end + * @return + */ + protected CloseableIterator queryUnindexed( + final String chrom, final int start, final int end) + { + final CloseableIterator it = reader.iterator(); + + return new CloseableIterator() + { + boolean atEnd = false; + + // prime look-ahead buffer with next matching record + private VariantContext next = findNext(); + + private VariantContext findNext() + { + if (atEnd) + { + return null; + } + VariantContext variant = null; + while (it.hasNext()) + { + variant = it.next(); + int vstart = variant.getStart(); + + if (vstart > end) + { + atEnd = true; + close(); + return null; + } + + int vend = variant.getEnd(); + // todo what is the undeprecated way to get + // the chromosome for the variant? + if (chrom.equals(variant.getChr()) && (vstart <= end) + && (vend >= start)) + { + return variant; + } + } + return null; + } + + @Override + public boolean hasNext() + { + boolean hasNext = !atEnd && (next != null); + if (!hasNext) + { + close(); + } + return hasNext; + } + + @Override + public VariantContext next() + { + /* + * return the next match, and then re-prime + * it with the following one (if any) + */ + VariantContext temp = next; + next = findNext(); + return temp; + } + + @Override + public void remove() + { + // not implemented + } + + @Override + public void close() + { + it.close(); + } + }; + } + + /** + * Returns an object that models the VCF file headers + * + * @return + */ + public VCFHeader getFileHeader() + { + return reader == null ? null : reader.getFileHeader(); + } + + /** + * Answers true if we are processing a tab-indexed VCF file, false if it is a + * plain text (uncompressed) file. + * + * @return + */ + public boolean isIndex() + { + return indexed; } }