--- /dev/null
+package jalview.ext.htsjdk;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
+import htsjdk.samtools.util.StringUtil;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import java.io.File;
+import java.math.BigInteger;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * a source of sequence data accessed via the HTSJDK
+ *
+ * @author jprocter
+ *
+ */
+public class HtsContigDb
+{
+
+ private String name;
+
+ private File dbLocation;
+
+ private htsjdk.samtools.reference.ReferenceSequenceFile refFile = null;
+
+ public HtsContigDb(String name, File descriptor) throws Exception
+ {
+ if (descriptor.isFile())
+ {
+ this.name = name;
+ dbLocation = descriptor;
+ }
+ initSource();
+ }
+
+ private void initSource() throws Exception
+ {
+ if (refFile != null)
+ {
+ return;
+ }
+
+ refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(
+ dbLocation, true);
+ if (refFile == null || refFile.getSequenceDictionary() == null)
+ {
+ // refFile = initSequenceDictionaryFor(dbLocation);
+ }
+
+ }
+
+
+ SAMSequenceDictionary rrefDict = null;
+ private ReferenceSequenceFile initSequenceDictionaryFor(File dbLocation2) throws Exception
+ {
+ rrefDict = getDictionary(dbLocation2, true);
+ if (rrefDict != null)
+ {
+ ReferenceSequenceFile rrefFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(dbLocation2, true);
+ return rrefFile;
+ }
+ return null;
+ }
+ /**
+ * code below hacked out from picard ----
+ *
+ * picard/src/java/picard/sam/CreateSequenceDictionary.java
+ * https://github.com/
+ * broadinstitute/picard/commit/270580d3e28123496576f0b91b3433179bb5d876
+ */
+
+
+ /*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+ /**
+ *
+ * @param f
+ * @param truncate
+ * @return
+ * @throws Exception
+ */
+ SAMSequenceDictionary getDictionary(File f, boolean truncate)
+ throws Exception
+ {
+ if (md5 == null)
+ {
+ initCreateSequenceDictionary();
+ }
+ final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory
+ .getReferenceSequenceFile(f, truncate);
+ ReferenceSequence refSeq;
+ List<SAMSequenceRecord> ret = new ArrayList<SAMSequenceRecord>();
+ Set<String> sequenceNames = new HashSet<String>();
+ for (int numSequences = 0; (refSeq = refSeqFile.nextSequence()) != null; ++numSequences)
+ {
+ if (sequenceNames.contains(refSeq.getName()))
+ {
+ throw new Exception(
+ "Sequence name appears more than once in reference: "
+ + refSeq.getName());
+ }
+ sequenceNames.add(refSeq.getName());
+ ret.add(makeSequenceRecord(refSeq));
+ }
+ return new SAMSequenceDictionary(ret);
+ }
+
+ public boolean isValid()
+ {
+ return dbLocation != null && refFile != null;
+ }
+
+ /**
+ * Create one SAMSequenceRecord from a single fasta sequence
+ */
+ private SAMSequenceRecord makeSequenceRecord(
+ final ReferenceSequence refSeq)
+ {
+
+ final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(),
+ refSeq.length());
+
+ // Compute MD5 of upcased bases
+ final byte[] bases = refSeq.getBases();
+ for (int i = 0; i < bases.length; ++i)
+ {
+ bases[i] = StringUtil.toUpperCase(bases[i]);
+ }
+
+ ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
+ // if (GENOME_ASSEMBLY != null) {
+ // ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
+ // }
+ // ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
+ // if (SPECIES != null) {
+ // ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
+ // }
+ return ret;
+ }
+
+ private MessageDigest md5;
+
+ public void initCreateSequenceDictionary() throws Exception
+ {
+ try
+ {
+ md5 = MessageDigest.getInstance("MD5");
+ } catch (NoSuchAlgorithmException e)
+ {
+ throw new Exception("MD5 algorithm not found", e);
+ }
+ }
+
+ private String md5Hash(final byte[] bytes)
+ {
+ md5.reset();
+ md5.update(bytes);
+ String s = new BigInteger(1, md5.digest()).toString(16);
+ if (s.length() != 32)
+ {
+ final String zeros = "00000000000000000000000000000000";
+ s = zeros.substring(0, 32 - s.length()) + s;
+ }
+ return s;
+ }
+
+ // ///// end of hts bits.
+
+ SequenceI getSequenceProxy(String id)
+ {
+ if (!isValid())
+ {
+ return null;
+ }
+
+ ReferenceSequence sseq = refFile.getSequence(id);
+ return new Sequence(sseq.getName(), new String(sseq.getBases()));
+ }
+}