2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.htsjdk;
23 import jalview.bin.Cache;
25 import java.io.Closeable;
27 import java.io.IOException;
29 import htsjdk.samtools.util.CloseableIterator;
30 import htsjdk.variant.variantcontext.VariantContext;
31 import htsjdk.variant.vcf.VCFFileReader;
32 import htsjdk.variant.vcf.VCFHeader;
35 * A thin wrapper for htsjdk classes to read either plain, or compressed, or
36 * compressed and indexed VCF files
38 public class VCFReader implements Closeable, Iterable<VariantContext>
40 private static final String GZ = "gz";
42 private static final String TBI_EXTENSION = ".tbi";
44 private static final String CSI_EXTENSION = ".csi";
46 private boolean indexed;
48 private VCFFileReader reader;
51 * Constructor given a raw or compressed VCF file or a (csi or tabix) index file
53 * If the file path ends in ".tbi" or ".csi", <em>or</em> appending one of these
54 * extensions gives a valid file path, open as indexed, else as unindexed.
59 public VCFReader(String filePath) throws IOException
62 if (filePath.endsWith(TBI_EXTENSION)
63 || filePath.endsWith(CSI_EXTENSION))
66 filePath = filePath.substring(0, filePath.length() - 4);
68 else if (new File(filePath + TBI_EXTENSION).exists())
72 else if (new File(filePath + CSI_EXTENSION).exists())
78 * we pass the name of the unindexed file to htsjdk,
79 * with a flag to assert whether it is indexed
81 File file = new File(filePath);
84 reader = new VCFFileReader(file, indexed);
88 Cache.error("File not found: " + filePath);
93 public void close() throws IOException
102 * Returns an iterator over VCF variants in the file. The client should call
103 * close() on the iterator when finished with it.
106 public CloseableIterator<VariantContext> iterator()
108 return reader == null ? null : reader.iterator();
112 * Queries for records overlapping the region specified. Note that this method
113 * is performant if the VCF file is indexed, and may be very slow if it is
116 * Client code should call close() on the iterator when finished with it.
119 * the chromosome to query
121 * query interval start
126 public CloseableIterator<VariantContext> query(final String chrom,
127 final int start, final int end)
135 return reader.query(chrom, start, end);
139 return queryUnindexed(chrom, start, end);
144 * Returns an iterator over variant records read from a flat file which
145 * overlap the specified chromosomal positions. Call close() on the iterator
146 * when finished with it!
153 protected CloseableIterator<VariantContext> queryUnindexed(
154 final String chrom, final int start, final int end)
156 final CloseableIterator<VariantContext> it = reader.iterator();
158 return new CloseableIterator<VariantContext>()
160 boolean atEnd = false;
162 // prime look-ahead buffer with next matching record
163 private VariantContext next = findNext();
165 private VariantContext findNext()
171 VariantContext variant = null;
175 int vstart = variant.getStart();
184 int vend = variant.getEnd();
185 // todo what is the undeprecated way to get
186 // the chromosome for the variant?
187 if (chrom.equals(variant.getContig()) && (vstart <= end)
197 public boolean hasNext()
199 boolean hasNext = !atEnd && (next != null);
208 public VariantContext next()
211 * return the next match, and then re-prime
212 * it with the following one (if any)
214 VariantContext temp = next;
234 * Returns an object that models the VCF file headers
238 public VCFHeader getFileHeader()
240 return reader == null ? null : reader.getFileHeader();
244 * Answers true if we are processing a tab-indexed VCF file, false if it is a
245 * plain text (uncompressed) file.
249 public boolean isIndex()