2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.htsjdk;
23 import java.io.Closeable;
25 import java.io.IOException;
27 import htsjdk.samtools.util.CloseableIterator;
28 import htsjdk.variant.variantcontext.VariantContext;
29 import htsjdk.variant.vcf.VCFFileReader;
30 import htsjdk.variant.vcf.VCFHeader;
31 import jalview.bin.Console;
34 * A thin wrapper for htsjdk classes to read either plain, or compressed, or
35 * compressed and indexed VCF files
37 public class VCFReader implements Closeable, Iterable<VariantContext>
39 private static final String GZ = "gz";
41 private static final String TBI_EXTENSION = ".tbi";
43 private static final String CSI_EXTENSION = ".csi";
45 private boolean indexed;
47 private VCFFileReader reader;
50 * Constructor given a raw or compressed VCF file or a (csi or tabix) index
53 * If the file path ends in ".tbi" or ".csi", <em>or</em> appending one of
54 * these extensions gives a valid file path, open as indexed, else as
60 public VCFReader(String filePath) throws IOException
63 if (filePath.endsWith(TBI_EXTENSION)
64 || filePath.endsWith(CSI_EXTENSION))
67 filePath = filePath.substring(0, filePath.length() - 4);
69 else if (new File(filePath + TBI_EXTENSION).exists())
73 else if (new File(filePath + CSI_EXTENSION).exists())
79 * we pass the name of the unindexed file to htsjdk,
80 * with a flag to assert whether it is indexed
82 File file = new File(filePath);
85 reader = new VCFFileReader(file, indexed);
89 Console.error("File not found: " + filePath);
94 public void close() throws IOException
103 * Returns an iterator over VCF variants in the file. The client should call
104 * close() on the iterator when finished with it.
107 public CloseableIterator<VariantContext> iterator()
109 return reader == null ? null : reader.iterator();
113 * Queries for records overlapping the region specified. Note that this method
114 * is performant if the VCF file is indexed, and may be very slow if it is
117 * Client code should call close() on the iterator when finished with it.
120 * the chromosome to query
122 * query interval start
127 public CloseableIterator<VariantContext> query(final String chrom,
128 final int start, final int end)
136 return reader.query(chrom, start, end);
140 return queryUnindexed(chrom, start, end);
145 * Returns an iterator over variant records read from a flat file which
146 * overlap the specified chromosomal positions. Call close() on the iterator
147 * when finished with it!
154 protected CloseableIterator<VariantContext> queryUnindexed(
155 final String chrom, final int start, final int end)
157 final CloseableIterator<VariantContext> it = reader.iterator();
159 return new CloseableIterator<VariantContext>()
161 boolean atEnd = false;
163 // prime look-ahead buffer with next matching record
164 private VariantContext next = findNext();
166 private VariantContext findNext()
172 VariantContext variant = null;
176 int vstart = variant.getStart();
185 int vend = variant.getEnd();
186 // todo what is the undeprecated way to get
187 // the chromosome for the variant?
188 if (chrom.equals(variant.getContig()) && (vstart <= end)
198 public boolean hasNext()
200 boolean hasNext = !atEnd && (next != null);
209 public VariantContext next()
212 * return the next match, and then re-prime
213 * it with the following one (if any)
215 VariantContext temp = next;
235 * Returns an object that models the VCF file headers
239 public VCFHeader getFileHeader()
241 return reader == null ? null : reader.getFileHeader();
245 * Answers true if we are processing a tab-indexed VCF file, false if it is a
246 * plain text (uncompressed) file.
250 public boolean isIndex()