2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.htsjdk;
23 import java.io.Closeable;
25 import java.io.IOException;
27 import htsjdk.samtools.util.CloseableIterator;
28 import htsjdk.variant.variantcontext.VariantContext;
29 import htsjdk.variant.vcf.VCFFileReader;
30 import htsjdk.variant.vcf.VCFHeader;
31 import jalview.bin.Console;
34 * A thin wrapper for htsjdk classes to read either plain, or compressed, or
35 * compressed and indexed VCF files
37 public class VCFReader implements Closeable, Iterable<VariantContext>
39 private static final String GZ = "gz";
41 private static final String TBI_EXTENSION = ".tbi";
43 private static final String CSI_EXTENSION = ".csi";
45 private boolean indexed;
47 private VCFFileReader reader;
50 * Constructor given a raw or compressed VCF file or a (csi or tabix) index file
52 * If the file path ends in ".tbi" or ".csi", <em>or</em> appending one of these
53 * extensions gives a valid file path, open as indexed, else as unindexed.
58 public VCFReader(String filePath) throws IOException
61 if (filePath.endsWith(TBI_EXTENSION)
62 || filePath.endsWith(CSI_EXTENSION))
65 filePath = filePath.substring(0, filePath.length() - 4);
67 else if (new File(filePath + TBI_EXTENSION).exists())
71 else if (new File(filePath + CSI_EXTENSION).exists())
77 * we pass the name of the unindexed file to htsjdk,
78 * with a flag to assert whether it is indexed
80 File file = new File(filePath);
83 reader = new VCFFileReader(file, indexed);
87 Console.error("File not found: " + filePath);
92 public void close() throws IOException
101 * Returns an iterator over VCF variants in the file. The client should call
102 * close() on the iterator when finished with it.
105 public CloseableIterator<VariantContext> iterator()
107 return reader == null ? null : reader.iterator();
111 * Queries for records overlapping the region specified. Note that this method
112 * is performant if the VCF file is indexed, and may be very slow if it is
115 * Client code should call close() on the iterator when finished with it.
118 * the chromosome to query
120 * query interval start
125 public CloseableIterator<VariantContext> query(final String chrom,
126 final int start, final int end)
134 return reader.query(chrom, start, end);
138 return queryUnindexed(chrom, start, end);
143 * Returns an iterator over variant records read from a flat file which
144 * overlap the specified chromosomal positions. Call close() on the iterator
145 * when finished with it!
152 protected CloseableIterator<VariantContext> queryUnindexed(
153 final String chrom, final int start, final int end)
155 final CloseableIterator<VariantContext> it = reader.iterator();
157 return new CloseableIterator<VariantContext>()
159 boolean atEnd = false;
161 // prime look-ahead buffer with next matching record
162 private VariantContext next = findNext();
164 private VariantContext findNext()
170 VariantContext variant = null;
174 int vstart = variant.getStart();
183 int vend = variant.getEnd();
184 // todo what is the undeprecated way to get
185 // the chromosome for the variant?
186 if (chrom.equals(variant.getContig()) && (vstart <= end)
196 public boolean hasNext()
198 boolean hasNext = !atEnd && (next != null);
207 public VariantContext next()
210 * return the next match, and then re-prime
211 * it with the following one (if any)
213 VariantContext temp = next;
233 * Returns an object that models the VCF file headers
237 public VCFHeader getFileHeader()
239 return reader == null ? null : reader.getFileHeader();
243 * Answers true if we are processing a tab-indexed VCF file, false if it is a
244 * plain text (uncompressed) file.
248 public boolean isIndex()