package jalview.ext.htsjdk;
+import jalview.bin.Cache;
+
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
private static final String TBI_EXTENSION = ".tbi";
+ private static final String CSI_EXTENSION = ".csi";
+
private boolean indexed;
private VCFFileReader reader;
/**
- * Constructor given a raw or compressed VCF file or a (tabix) index file
+ * Constructor given a raw or compressed VCF file or a (csi or tabix) index file
* <p>
- * For now, file type is inferred from its suffix: .gz or .bgz for compressed
- * data, .tbi for an index file, anything else is assumed to be plain text
- * VCF.
+ * If the file path ends in ".tbi" or ".csi", <em>or</em> appending one of these
+ * extensions gives a valid file path, open as indexed, else as unindexed.
*
* @param f
* @throws IOException
*/
public VCFReader(String filePath) throws IOException
{
- if (filePath.endsWith(GZ))
+ indexed = false;
+ if (filePath.endsWith(TBI_EXTENSION)
+ || filePath.endsWith(CSI_EXTENSION))
{
- if (new File(filePath + TBI_EXTENSION).exists())
- {
- indexed = true;
- }
+ indexed = true;
+ filePath = filePath.substring(0, filePath.length() - 4);
}
- else if (filePath.endsWith(TBI_EXTENSION))
+ else if (new File(filePath + TBI_EXTENSION).exists())
+ {
+ indexed = true;
+ }
+ else if (new File(filePath + CSI_EXTENSION).exists())
{
indexed = true;
- filePath = filePath.substring(0, filePath.length() - 4);
}
- reader = new VCFFileReader(new File(filePath), indexed);
+ /*
+ * we pass the name of the unindexed file to htsjdk,
+ * with a flag to assert whether it is indexed
+ */
+ File file = new File(filePath);
+ if (file.exists())
+ {
+ reader = new VCFFileReader(file, indexed);
+ }
+ else
+ {
+ Cache.log.error("File not found: " + filePath);
+ }
}
@Override
public CloseableIterator<VariantContext> query(final String chrom,
final int start, final int end)
{
- if (reader == null) {
- return null;
- }
+ if (reader == null)
+ {
+ return null;
+ }
if (indexed)
{
return reader.query(chrom, start, end);
{
final CloseableIterator<VariantContext> it = reader.iterator();
- return new CloseableIterator()
+ return new CloseableIterator<>()
{
boolean atEnd = false;
*/
private static final String VCF_ASSEMBLY = "VCF_ASSEMBLY";
- private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh38,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38";
+ private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh37,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38";
private static final String VCF_SPECIES = "VCF_SPECIES"; // default is human
+ private static final String DEFAULT_REFERENCE = "grch37"; // fallback default is human GRCh37
+
/*
* keys to fields of VEP CSQ consequence data
* see https://www.ensembl.org/info/docs/tools/vep/vep_formats.html
public SequenceI loadVCFContig(String contig)
{
VCFHeaderLine headerLine = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
- String ref = headerLine == null ? null : headerLine.getValue();
+ if (headerLine == null)
+ {
+ Cache.log.error("VCF reference header not found");
+ return null;
+ }
+ String ref = headerLine.getValue();
if (ref.startsWith("file://"))
{
ref = ref.substring(7);
}
else
{
- System.err.println("VCF reference not found: " + ref);
+ Cache.log.error("VCF reference not found: " + ref);
}
return seq;
{
VCFHeaderLine ref = header
.getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
- String reference = ref.getValue();
+ String reference = ref == null ? null : ref.getValue();
setSpeciesAndAssembly(reference);
*/
protected void setSpeciesAndAssembly(String reference)
{
+ if (reference == null)
+ {
+ reference = DEFAULT_REFERENCE; // default to GRCh37 if not specified
+ }
reference = reference.toLowerCase();
- vcfSpecies = DEFAULT_SPECIES;
/*
* for a non-human species, or other assembly identifier,
}
}
+ vcfSpecies = DEFAULT_SPECIES;
prop = Cache.getProperty(VCF_SPECIES);
if (prop != null)
{