From bb5a8705457b0a597b43e5699d6422770c3b5742 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 29 May 2019 16:13:10 +0100 Subject: [PATCH] JAL-2738 accept .csi, default to GRCh37:homo_sapiens if no ##reference --- src/jalview/ext/htsjdk/VCFReader.java | 51 ++++++++++++++++++++++----------- src/jalview/io/vcf/VCFLoader.java | 21 ++++++++++---- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/src/jalview/ext/htsjdk/VCFReader.java b/src/jalview/ext/htsjdk/VCFReader.java index 04525f0..3e7fbc2 100644 --- a/src/jalview/ext/htsjdk/VCFReader.java +++ b/src/jalview/ext/htsjdk/VCFReader.java @@ -1,5 +1,7 @@ package jalview.ext.htsjdk; +import jalview.bin.Cache; + import java.io.Closeable; import java.io.File; import java.io.IOException; @@ -19,36 +21,52 @@ public class VCFReader implements Closeable, Iterable private static final String TBI_EXTENSION = ".tbi"; + private static final String CSI_EXTENSION = ".csi"; + private boolean indexed; private VCFFileReader reader; /** - * Constructor given a raw or compressed VCF file or a (tabix) index file + * Constructor given a raw or compressed VCF file or a (csi or tabix) index file *

- * For now, file type is inferred from its suffix: .gz or .bgz for compressed - * data, .tbi for an index file, anything else is assumed to be plain text - * VCF. + * If the file path ends in ".tbi" or ".csi", or appending one of these + * extensions gives a valid file path, open as indexed, else as unindexed. * * @param f * @throws IOException */ public VCFReader(String filePath) throws IOException { - if (filePath.endsWith(GZ)) + indexed = false; + if (filePath.endsWith(TBI_EXTENSION) + || filePath.endsWith(CSI_EXTENSION)) { - if (new File(filePath + TBI_EXTENSION).exists()) - { - indexed = true; - } + indexed = true; + filePath = filePath.substring(0, filePath.length() - 4); } - else if (filePath.endsWith(TBI_EXTENSION)) + else if (new File(filePath + TBI_EXTENSION).exists()) + { + indexed = true; + } + else if (new File(filePath + CSI_EXTENSION).exists()) { indexed = true; - filePath = filePath.substring(0, filePath.length() - 4); } - reader = new VCFFileReader(new File(filePath), indexed); + /* + * we pass the name of the unindexed file to htsjdk, + * with a flag to assert whether it is indexed + */ + File file = new File(filePath); + if (file.exists()) + { + reader = new VCFFileReader(file, indexed); + } + else + { + Cache.log.error("File not found: " + filePath); + } } @Override @@ -88,9 +106,10 @@ public class VCFReader implements Closeable, Iterable public CloseableIterator query(final String chrom, final int start, final int end) { - if (reader == null) { - return null; - } + if (reader == null) + { + return null; + } if (indexed) { return reader.query(chrom, start, end); @@ -116,7 +135,7 @@ public class VCFReader implements Closeable, Iterable { final CloseableIterator it = reader.iterator(); - return new CloseableIterator() + return new CloseableIterator<>() { boolean atEnd = false; diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 9831af7..04c73b5 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -100,10 +100,12 @@ public class VCFLoader */ private static final String VCF_ASSEMBLY = "VCF_ASSEMBLY"; - private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh38,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38"; + private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh37,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38"; private static final String VCF_SPECIES = "VCF_SPECIES"; // default is human + private static final String DEFAULT_REFERENCE = "grch37"; // fallback default is human GRCh37 + /* * keys to fields of VEP CSQ consequence data * see https://www.ensembl.org/info/docs/tools/vep/vep_formats.html @@ -263,7 +265,12 @@ public class VCFLoader public SequenceI loadVCFContig(String contig) { VCFHeaderLine headerLine = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); - String ref = headerLine == null ? null : headerLine.getValue(); + if (headerLine == null) + { + Cache.log.error("VCF reference header not found"); + return null; + } + String ref = headerLine.getValue(); if (ref.startsWith("file://")) { ref = ref.substring(7); @@ -282,7 +289,7 @@ public class VCFLoader } else { - System.err.println("VCF reference not found: " + ref); + Cache.log.error("VCF reference not found: " + ref); } return seq; @@ -301,7 +308,7 @@ public class VCFLoader { VCFHeaderLine ref = header .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); - String reference = ref.getValue(); + String reference = ref == null ? null : ref.getValue(); setSpeciesAndAssembly(reference); @@ -373,8 +380,11 @@ public class VCFLoader */ protected void setSpeciesAndAssembly(String reference) { + if (reference == null) + { + reference = DEFAULT_REFERENCE; // default to GRCh37 if not specified + } reference = reference.toLowerCase(); - vcfSpecies = DEFAULT_SPECIES; /* * for a non-human species, or other assembly identifier, @@ -397,6 +407,7 @@ public class VCFLoader } } + vcfSpecies = DEFAULT_SPECIES; prop = Cache.getProperty(VCF_SPECIES); if (prop != null) { -- 1.7.10.2