From: Jim Procter Date: Fri, 1 Oct 2021 17:29:57 +0000 (+0100) Subject: JAL-2909 JAL-3894 hack in support to import regions of an unindexed SAM file. X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Ffeatures%2FJAL-2909_bamImport_2_11_2;p=jalview.git JAL-2909 JAL-3894 hack in support to import regions of an unindexed SAM file. --- diff --git a/src/jalview/datamodel/CigarParser.java b/src/jalview/datamodel/CigarParser.java index a42c2b1..113223d 100644 --- a/src/jalview/datamodel/CigarParser.java +++ b/src/jalview/datamodel/CigarParser.java @@ -9,7 +9,9 @@ import java.util.TreeMap; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.SAMRecord; +import jalview.bin.Cache; public class CigarParser { @@ -291,7 +293,14 @@ public class CigarParser do { // check extent of read - SAMRecord rec = it.next(); + SAMRecord rec = null; + try { + rec = it.next(); + } catch (SAMFormatException ex) + { + Cache.log.info("Bailing on parsing SAM File - see error below",ex); + break; + } if (extent != null) { diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java index 8d55c53..2d079cf 100644 --- a/src/jalview/io/BamFile.java +++ b/src/jalview/io/BamFile.java @@ -20,6 +20,7 @@ */ package jalview.io; +import jalview.bin.Cache; import jalview.datamodel.CigarParser; import jalview.datamodel.Range; import jalview.datamodel.Sequence; @@ -35,6 +36,7 @@ import java.util.Map; import java.util.PrimitiveIterator.OfInt; import java.util.SortedMap; +import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SAMSequenceRecord; @@ -60,6 +62,8 @@ public class BamFile extends AlignFile // first position in alignment private int alignmentStart = -1; + private File _bamFile; + /** * Creates a new BamFile object. */ @@ -81,13 +85,28 @@ public class BamFile extends AlignFile throws IOException { super(true, inFile, sourceType); + _bamFile = new File(inFile); + initFileReader(); + } + private void initFileReader() throws IOException + { final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); - fileReader = factory.open(new File(inFile)); + // File-based bam + if (_bamFile!=null) + { + fileReader = factory.open(_bamFile); // will need to be adapted for JalviewJS/etc + } + else + { + // try and locate index ? + String index = getDataName() + ".bai"; + fileReader = factory.open(SamInputResource.of(getDataName()) + .index(new URL(index))); + } } - /** * Creates a new BamFile object * @@ -97,25 +116,16 @@ public class BamFile extends AlignFile */ public BamFile(FileParse source) throws IOException { - super(true, source); + super(false, source); parseSuffix(); - final SamReaderFactory factory = SamReaderFactory.makeDefault() - .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, - SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) - .validationStringency(ValidationStringency.SILENT); - - // File-based bam if (source.getDataSourceType() == DataSourceType.FILE) - { - fileReader = factory.open(source.inFile); - } - else - { - // locate index ? - String index = source.getDataName() + ".bai"; - fileReader = factory.open(SamInputResource.of(source.getDataName()) - .index(new URL(index))); - } + { + _bamFile = source.inFile; + } else { + + } + initFileReader(); + doParse(); } @Override @@ -166,11 +176,20 @@ public class BamFile extends AlignFile @Override public void parse() { + boolean needToReopen=false; // only actually parse if params are set if (chromosome != null && chromosome != "") { - SAMRecordIterator it = fileReader.query(chromosome, start, end, + SAMRecordIterator it; + try { + it = fileReader.query(chromosome, start, end, false); + } catch (UnsupportedOperationException ex) + { + needToReopen=true; + // could be a sam text file, so we just iterate through without query + it = fileReader.iterator(); + } CigarParser parser = new CigarParser('-'); Range[] xtent = new Range[] { new Range(start, end) }; SortedMap insertions[] = parser.getInsertions(it, @@ -193,12 +212,35 @@ public class BamFile extends AlignFile padRef(refSeq, parser); padRef(revRefSeq, parser); - it = fileReader.query(chromosome, start, end, false); + if (needToReopen) + { + try { + initFileReader(); + } catch (IOException x) + { + Cache.log.warn("Couldn't reopen S/BAM file",x); + } + } + try { + it = fileReader.query(chromosome, start, end, + false); + } catch (UnsupportedOperationException ex) + { + // could be a sam text file, so we just iterate through without query + it = fileReader.iterator(); + } ArrayList fwd = new ArrayList(), rev = new ArrayList(); while (it.hasNext()) { - SAMRecord rec = it.next(); + SAMRecord rec=null; + try { + rec = it.next(); + } catch (SAMFormatException q) + { + Cache.log.info("Bailing on bad SAM line again",q); + break; + } // set the alignment start to be start of first read (we assume reads // are sorted) diff --git a/src/jalview/io/FileFormat.java b/src/jalview/io/FileFormat.java index 1731eaf..66bcbde 100644 --- a/src/jalview/io/FileFormat.java +++ b/src/jalview/io/FileFormat.java @@ -379,7 +379,7 @@ public enum FileFormat implements FileFormatI return true; } }, - Bam("bam", "bam", true, false) + Bam("bam", "bam, sam", true, false) { @Override public AlignmentFileReaderI getReader(FileParse source) diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 0c201e1..55f5651 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -207,6 +207,11 @@ public class IdentifyFile break; } } + if (data.startsWith("@HD\t")) + { + reply = FileFormat.Bam; + break; + } if (data.startsWith("H ") && !aaIndexHeaderRead) { aaIndexHeaderRead = true;