X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fhtsjdk%2FHtsContigDb.java;h=9a985b63857e3f584e20bc298e23205ac268b7ce;hb=a7634a9f5f5f691a7bff7bb05ca21a340aa2f834;hp=667e5678753a1ebb736e6446757a6d8f70c54d8c;hpb=853624fb32058cccc544ae7d13af6ad4b0800b6c;p=jalview.git diff --git a/src/jalview/ext/htsjdk/HtsContigDb.java b/src/jalview/ext/htsjdk/HtsContigDb.java index 667e567..9a985b6 100644 --- a/src/jalview/ext/htsjdk/HtsContigDb.java +++ b/src/jalview/ext/htsjdk/HtsContigDb.java @@ -1,16 +1,32 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.ext.htsjdk; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.reference.ReferenceSequence; -import htsjdk.samtools.reference.ReferenceSequenceFile; -import htsjdk.samtools.reference.ReferenceSequenceFileFactory; -import htsjdk.samtools.util.StringUtil; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import java.io.File; +import java.io.IOException; import java.math.BigInteger; +import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -18,6 +34,15 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import htsjdk.samtools.SAMException; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.reference.FastaSequenceIndexCreator; +import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.reference.ReferenceSequenceFile; +import htsjdk.samtools.reference.ReferenceSequenceFileFactory; +import htsjdk.samtools.util.StringUtil; + /** * a source of sequence data accessed via the HTSJDK * @@ -26,14 +51,25 @@ import java.util.Set; */ public class HtsContigDb { - private String name; private File dbLocation; private htsjdk.samtools.reference.ReferenceSequenceFile refFile = null; - public HtsContigDb(String name, File descriptor) throws Exception + public static void createFastaSequenceIndex(Path path, boolean overwrite) + throws IOException + { + try + { + FastaSequenceIndexCreator.create(path, overwrite); + } catch (SAMException e) + { + throw new IOException(e.getMessage()); + } + } + + public HtsContigDb(String name, File descriptor) { if (descriptor.isFile()) { @@ -43,15 +79,29 @@ public class HtsContigDb initSource(); } - private void initSource() throws Exception + public void close() + { + if (refFile != null) + { + try + { + refFile.close(); + } catch (IOException e) + { + // ignore + } + } + } + + private void initSource() { if (refFile != null) { return; } - refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile( - dbLocation, true); + refFile = ReferenceSequenceFileFactory + .getReferenceSequenceFile(dbLocation, true); if (refFile == null || refFile.getSequenceDictionary() == null) { // refFile = initSequenceDictionaryFor(dbLocation); @@ -122,9 +172,10 @@ public class HtsContigDb final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory .getReferenceSequenceFile(f, truncate); ReferenceSequence refSeq; - List ret = new ArrayList(); - Set sequenceNames = new HashSet(); - for (int numSequences = 0; (refSeq = refSeqFile.nextSequence()) != null; ++numSequences) + List ret = new ArrayList<>(); + Set sequenceNames = new HashSet<>(); + for (int numSequences = 0; (refSeq = refSeqFile + .nextSequence()) != null; ++numSequences) { if (sequenceNames.contains(refSeq.getName())) { @@ -199,14 +250,29 @@ public class HtsContigDb // ///// end of hts bits. - SequenceI getSequenceProxy(String id) + /** + * Reads the contig with the given id and returns as a Jalview SequenceI + * object. Note the database must be indexed for this operation to succeed. + * + * @param id + * @return + */ + public SequenceI getSequenceProxy(String id) { - if (!isValid()) + if (!isValid() || !refFile.isIndexed()) { + System.err.println( + "Cannot read contig as file is invalid or not indexed"); return null; } ReferenceSequence sseq = refFile.getSequence(id); return new Sequence(sseq.getName(), new String(sseq.getBases())); } + + public boolean isIndexed() + { + return refFile != null && refFile.isIndexed(); + } + }