X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FBamFile.java;h=8d55c537661b2ac3442cf60e6b82a6af09425d14;hb=429bbdae3d5229ec1b92ec59f4b5a25bcfcf13f8;hp=1aebbe9519e9500ad9d819e731116b8998465620;hpb=9289ad58228571a63fdf54ef720603a590d58adc;p=jalview.git diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java index 1aebbe9..8d55c53 100644 --- a/src/jalview/io/BamFile.java +++ b/src/jalview/io/BamFile.java @@ -21,18 +21,24 @@ package jalview.io; import jalview.datamodel.CigarParser; +import jalview.datamodel.Range; import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import java.io.File; import java.io.IOException; +import java.net.URL; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.PrimitiveIterator.OfInt; import java.util.SortedMap; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; @@ -51,6 +57,9 @@ public class BamFile extends AlignFile // chromosome/contig to read private String chromosome = ""; + // first position in alignment + private int alignmentStart = -1; + /** * Creates a new BamFile object. */ @@ -89,13 +98,24 @@ public class BamFile extends AlignFile public BamFile(FileParse source) throws IOException { super(true, source); + parseSuffix(); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); // File-based bam - fileReader = factory.open(source.inFile); + if (source.getDataSourceType() == DataSourceType.FILE) + { + fileReader = factory.open(source.inFile); + } + else + { + // locate index ? + String index = source.getDataName() + ".bai"; + fileReader = factory.open(SamInputResource.of(source.getDataName()) + .index(new URL(index))); + } } @Override @@ -105,6 +125,44 @@ public class BamFile extends AlignFile return null; } + private StringBuilder insertRefSeq( + SortedMap insertions, Range xtent) + { + StringBuilder refseq = new StringBuilder(); + int inserted = 0; + for (int p = xtent.start; p < xtent.end; p++) + { + refseq.append('N'); + } + for (Map.Entry insert : insertions.entrySet()) + { + int inspos = insert.getKey() - xtent.start + inserted; + if (inspos < 0) + { + System.out.println("Ignoring -ve insert position " + insert.getKey() + + " of " + insert.getValue() + + " (alpos: " + inspos + ")"); + inspos = 0; + // continue; + } + for (int i = 0, j = insert.getValue(); i < j; i++) + { + inserted++; + refseq.insert(inspos, '-'); + } + } + return refseq; + } + + private void padRef(SequenceI ref, CigarParser cig) + { // pad to column + int padding = cig.firstAlColumn - ref.findIndex(cig.firstRposCol); + System.out.println("Padding " + padding + " to move refseq position " + + cig.firstRposCol + " to " + cig.firstAlColumn + "col."); + + ref.insertCharAt(0, padding, '-'); + } + @Override public void parse() { @@ -114,21 +172,60 @@ public class BamFile extends AlignFile SAMRecordIterator it = fileReader.query(chromosome, start, end, false); CigarParser parser = new CigarParser('-'); - SortedMap insertions = parser.getInsertions(it); + Range[] xtent = new Range[] { new Range(start, end) }; + SortedMap insertions[] = parser.getInsertions(it, + xtent); it.close(); + SequenceI refSeq = new Sequence("chr:" + chromosome, + insertRefSeq(insertions[0], xtent[0]) + .toString()); + + refSeq.setStart(xtent[0].start); + refSeq.setEnd(xtent[0].end); + SequenceI revRefSeq = new Sequence("rev:chr:" + chromosome, + insertRefSeq(insertions[1], xtent[0]) + .toString()); + revRefSeq.setStart(xtent[0].start); + revRefSeq.setEnd(xtent[0].end); + + // Hack to move the seqs along + padRef(refSeq, parser); + padRef(revRefSeq, parser); + it = fileReader.query(chromosome, start, end, false); + + ArrayList fwd = new ArrayList(), rev = new ArrayList(); while (it.hasNext()) { SAMRecord rec = it.next(); + // set the alignment start to be start of first read (we assume reads + // are sorted) + if (alignmentStart == -1) + { + alignmentStart = rec.getAlignmentStart(); + } + // make dataset sequence: start at 1, end at read length - SequenceI seq = new Sequence(rec.getReadName(), + SequenceI seq = new Sequence( + "" + (rec.getReadNegativeStrandFlag() ? "rev:" : "") + + rec.getReadName(), rec.getReadString().toLowerCase()); seq.setStart(1); seq.setEnd(rec.getReadLength()); - - String newRead = parser.parseCigarToSequence(rec, insertions); + OfInt q = rec.getBaseQualityString().chars() + .iterator(); + int p = seq.getStart(); + while (q.hasNext()) + { + seq.addSequenceFeature(new SequenceFeature("QUALITY", "", p, p, + (float) q.next() - ' ', "bamfile")); + p++; + } + String newRead = parser.parseCigarToSequence(rec, + insertions[rec.getReadNegativeStrandFlag() ? 1 : 0], + alignmentStart, seq); // make alignment sequences SequenceI alsq = seq.deriveSequence(); @@ -136,7 +233,28 @@ public class BamFile extends AlignFile // set start relative to soft clip; assume end is set by Sequence code alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1); - seqs.add(alsq); + if (rec.getReadNegativeStrandFlag()) + { + rev.add(alsq); + } + else + { + fwd.add(alsq); + } + } + // Add forward + if (fwd.size() > 0) + { + seqs.add(refSeq); // FIXME needs to be optional, and properly padded + seqs.addAll(fwd); + fwd.clear(); + } + // and reverse strand reads. + if (rev.size() > 0) + { + seqs.add(revRefSeq); // FIXME needs to be optional and properly padded + seqs.addAll(rev); + rev.clear(); } } } @@ -166,5 +284,39 @@ public class BamFile extends AlignFile chromosome = chr; start = s; end = e; + suffix = chromosome + ":" + start + "-" + end; + } + + public boolean parseSuffix() + { + if (suffix == null) + { + return false; + } + int csep = suffix.indexOf(":"); + int rsep = suffix.indexOf("-", csep); + if (csep < 0 || rsep < 0 || suffix.length() - rsep <= 1) + { + return false; + } + String chr, p1, p2; + chr = suffix.substring(0, csep); + p1 = suffix.substring(csep + 1, rsep); + p2 = suffix.substring(rsep + 1); + int cstart, cend; + try + { + cstart = Integer.parseInt(p1); + cend = Integer.parseInt(p2); + } catch (Exception e) + { + warningMessage = (warningMessage == null ? "" : warningMessage + "\n") + + "Couldn't parse range from " + suffix; + return false; + } + chromosome = chr; + start = cstart; + end = cend; + return true; } }