import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
+import htsjdk.samtools.SAMException;
+import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFContigHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLineCount;
private VCFHeader header;
/*
+ * a Dictionary of contigs (if present) referenced in the VCF file
+ */
+ private SAMSequenceDictionary dictionary;
+
+ /*
* the position (0...) of field in each block of
* CSQ (consequence) data (if declared in the VCF INFO header for CSQ)
* see http://www.ensembl.org/info/docs/tools/vep/vep_formats.html
header = reader.getFileHeader();
+ try
+ {
+ dictionary = header.getSequenceDictionary();
+ } catch (SAMException e)
+ {
+ // ignore - thrown if any contig line lacks length info
+ }
+
sourceId = filePath;
saveMetadata(sourceId);
// ignore
}
}
+ header = null;
+ dictionary = null;
}
}
/*
* simplest case: sequence has id and length matching a VCF contig
*/
+ VCFMap vcfMap = null;
+ if (dictionary != null)
+ {
+ vcfMap = getContigMap(seq);
+ }
+ if (vcfMap != null)
+ {
+ return vcfMap;
+ }
+
+ /*
+ * otherwise, map to VCF from chromosomal coordinates
+ * of the sequence (if known)
+ */
GeneLociI seqCoords = seq.getGeneLoci();
if (seqCoords == null)
{
- VCFMap map = getContigMap(seq);
- if (map == null)
- {
- Cache.log.warn(String.format(
- "Can't query VCF for %s as chromosome coordinates not known",
- seq.getName()));
- }
- return map;
+ Cache.log.warn(String.format(
+ "Can't query VCF for %s as chromosome coordinates not known",
+ seq.getName()));
+ return null;
}
String species = seqCoords.getSpeciesId();
/**
* If the sequence id matches a contig declared in the VCF file, and the
- * sequence matches the contig length, then returns a 1:1 map of the sequence to
- * the contig, else returns null
+ * sequence length matches the contig length, then returns a 1:1 map of the
+ * sequence to the contig, else returns null
*
* @param seq
* @return
private VCFMap getContigMap(SequenceI seq)
{
String id = seq.getName();
- for (VCFContigHeaderLine contig : header.getContigLines())
+ SAMSequenceRecord contig = dictionary.getSequence(id);
+ if (contig != null)
{
- if (contig.getID().equals(id))
+ int len = seq.getLength();
+ if (len == contig.getSequenceLength())
{
- /*
- * have to construct a SAMSequenceRecord to
- * read the contig 'length' field!
- */
- int len = seq.getLength();
- SAMSequenceRecord ssr = contig.getSAMSequenceRecord();
- if (len == ssr.getSequenceLength())
- {
- MapList map = new MapList(new int[] { 1, len },
- new int[]
- { 1, len }, 1, 1);
- return new VCFMap(id, map);
- }
+ MapList map = new MapList(new int[] { 1, len },
+ new int[]
+ { 1, len }, 1, 1);
+ return new VCFMap(id, map);
}
-
}
return null;
}