X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fhtsjdk%2FVCFReader.java;h=e91164a1d4e448de0564c0c8ae9d853387937b5d;hb=cb8e52fbbc5f725e3f7f48c672cdddb0690bd978;hp=c5e09e041376e13ed1691bfe727b963f2b5c3d6b;hpb=9317cd655af4803461acc71581ecbdc0a6677069;p=jalview.git diff --git a/src/jalview/ext/htsjdk/VCFReader.java b/src/jalview/ext/htsjdk/VCFReader.java index c5e09e0..e91164a 100644 --- a/src/jalview/ext/htsjdk/VCFReader.java +++ b/src/jalview/ext/htsjdk/VCFReader.java @@ -1,13 +1,34 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.ext.htsjdk; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; + import htsjdk.samtools.util.CloseableIterator; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; +import jalview.bin.Console; /** * A thin wrapper for htsjdk classes to read either plain, or compressed, or @@ -19,36 +40,52 @@ public class VCFReader implements Closeable, Iterable private static final String TBI_EXTENSION = ".tbi"; + private static final String CSI_EXTENSION = ".csi"; + private boolean indexed; private VCFFileReader reader; /** - * Constructor given a raw or compressed VCF file or a (tabix) index file + * Constructor given a raw or compressed VCF file or a (csi or tabix) index file *

- * For now, file type is inferred from its suffix: .gz or .bgz for compressed - * data, .tbi for an index file, anything else is assumed to be plain text - * VCF. + * If the file path ends in ".tbi" or ".csi", or appending one of these + * extensions gives a valid file path, open as indexed, else as unindexed. * * @param f * @throws IOException */ public VCFReader(String filePath) throws IOException { - if (filePath.endsWith(GZ)) + indexed = false; + if (filePath.endsWith(TBI_EXTENSION) + || filePath.endsWith(CSI_EXTENSION)) { - if (new File(filePath + TBI_EXTENSION).exists()) - { - indexed = true; - } + indexed = true; + filePath = filePath.substring(0, filePath.length() - 4); } - else if (filePath.endsWith(TBI_EXTENSION)) + else if (new File(filePath + TBI_EXTENSION).exists()) + { + indexed = true; + } + else if (new File(filePath + CSI_EXTENSION).exists()) { indexed = true; - filePath = filePath.substring(0, filePath.length() - 4); } - reader = new VCFFileReader(new File(filePath), indexed); + /* + * we pass the name of the unindexed file to htsjdk, + * with a flag to assert whether it is indexed + */ + File file = new File(filePath); + if (file.exists()) + { + reader = new VCFFileReader(file, indexed); + } + else + { + Console.error("File not found: " + filePath); + } } @Override @@ -72,9 +109,10 @@ public class VCFReader implements Closeable, Iterable /** * Queries for records overlapping the region specified. Note that this method - * requires a VCF file with an associated index. If no index exists a - * TribbleException will be thrown. Client code should call close() on the - * iterator when finished with it. + * is performant if the VCF file is indexed, and may be very slow if it is + * not. + *

+ * Client code should call close() on the iterator when finished with it. * * @param chrom * the chromosome to query @@ -87,7 +125,108 @@ public class VCFReader implements Closeable, Iterable public CloseableIterator query(final String chrom, final int start, final int end) { - return reader == null ? null : reader.query(chrom, start, end); + if (reader == null) + { + return null; + } + if (indexed) + { + return reader.query(chrom, start, end); + } + else + { + return queryUnindexed(chrom, start, end); + } + } + + /** + * Returns an iterator over variant records read from a flat file which + * overlap the specified chromosomal positions. Call close() on the iterator + * when finished with it! + * + * @param chrom + * @param start + * @param end + * @return + */ + protected CloseableIterator queryUnindexed( + final String chrom, final int start, final int end) + { + final CloseableIterator it = reader.iterator(); + + return new CloseableIterator() + { + boolean atEnd = false; + + // prime look-ahead buffer with next matching record + private VariantContext next = findNext(); + + private VariantContext findNext() + { + if (atEnd) + { + return null; + } + VariantContext variant = null; + while (it.hasNext()) + { + variant = it.next(); + int vstart = variant.getStart(); + + if (vstart > end) + { + atEnd = true; + close(); + return null; + } + + int vend = variant.getEnd(); + // todo what is the undeprecated way to get + // the chromosome for the variant? + if (chrom.equals(variant.getContig()) && (vstart <= end) + && (vend >= start)) + { + return variant; + } + } + return null; + } + + @Override + public boolean hasNext() + { + boolean hasNext = !atEnd && (next != null); + if (!hasNext) + { + close(); + } + return hasNext; + } + + @Override + public VariantContext next() + { + /* + * return the next match, and then re-prime + * it with the following one (if any) + */ + VariantContext temp = next; + next = findNext(); + return temp; + } + + @Override + public void remove() + { + // not implemented + } + + @Override + public void close() + { + it.close(); + } + }; } /** @@ -99,4 +238,15 @@ public class VCFReader implements Closeable, Iterable { return reader == null ? null : reader.getFileHeader(); } + + /** + * Answers true if we are processing a tab-indexed VCF file, false if it is a + * plain text (uncompressed) file. + * + * @return + */ + public boolean isIndex() + { + return indexed; + } }