/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; import jalview.datamodel.CigarParser; import jalview.datamodel.Range; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.PrimitiveIterator.OfInt; import java.util.SortedMap; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; public class BamFile extends AlignFile { // SAM/BAM file reader private SamReader fileReader; // start position to read from private int start = -1; // end position to read to private int end = -1; // chromosome/contig to read private String chromosome = ""; // first position in alignment private int alignmentStart = -1; /** * Creates a new BamFile object. */ public BamFile() { } /** * Creates a new BamFile object. * * @param inFile * Name of file to read * @param sourceType * Whether data source is file, url or other type of data source * * @throws IOException */ public BamFile(String inFile, DataSourceType sourceType) throws IOException { super(true, inFile, sourceType); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); fileReader = factory.open(new File(inFile)); } /** * Creates a new BamFile object * * @param source * wrapper for datasource * @throws IOException */ public BamFile(FileParse source) throws IOException { super(true, source); parseSuffix(); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); // File-based bam if (source.getDataSourceType() == DataSourceType.FILE) { fileReader = factory.open(source.inFile); } else { // locate index ? String index = source.getDataName() + ".bai"; fileReader = factory.open(SamInputResource.of(source.getDataName()) .index(new URL(index))); } } @Override public String print(SequenceI[] seqs, boolean jvsuffix) { // TODO Auto-generated method stub return null; } private StringBuilder insertRefSeq( SortedMap insertions, Range xtent) { StringBuilder refseq = new StringBuilder(); int inserted = 0; for (int p = xtent.start; p < xtent.end; p++) { refseq.append('N'); } for (Map.Entry insert : insertions.entrySet()) { int inspos = insert.getKey() - xtent.start + inserted; if (inspos < 0) { System.out.println("Ignoring -ve insert position " + insert.getKey() + " of " + insert.getValue() + " (alpos: " + inspos + ")"); inspos = 0; // continue; } for (int i = 0, j = insert.getValue(); i < j; i++) { inserted++; refseq.insert(inspos, '-'); } } return refseq; } private void padRef(SequenceI ref, CigarParser cig) { // pad to column int padding = cig.firstAlColumn - ref.findIndex(cig.firstRposCol); System.out.println("Padding " + padding + " to move refseq position " + cig.firstRposCol + " to " + cig.firstAlColumn + "col."); ref.insertCharAt(0, padding, '-'); } @Override public void parse() { // only actually parse if params are set if (chromosome != null && chromosome != "") { SAMRecordIterator it = fileReader.query(chromosome, start, end, false); CigarParser parser = new CigarParser('-'); Range[] xtent = new Range[] { new Range(start, end) }; SortedMap insertions[] = parser.getInsertions(it, xtent); it.close(); SequenceI refSeq = new Sequence("chr:" + chromosome, insertRefSeq(insertions[0], xtent[0]) .toString()); refSeq.setStart(xtent[0].start); refSeq.setEnd(xtent[0].end); SequenceI revRefSeq = new Sequence("rev:chr:" + chromosome, insertRefSeq(insertions[1], xtent[0]) .toString()); revRefSeq.setStart(xtent[0].start); revRefSeq.setEnd(xtent[0].end); // Hack to move the seqs along padRef(refSeq, parser); padRef(revRefSeq, parser); it = fileReader.query(chromosome, start, end, false); ArrayList fwd = new ArrayList(), rev = new ArrayList(); while (it.hasNext()) { SAMRecord rec = it.next(); // set the alignment start to be start of first read (we assume reads // are sorted) if (alignmentStart == -1) { alignmentStart = rec.getAlignmentStart(); } // make dataset sequence: start at 1, end at read length SequenceI seq = new Sequence( "" + (rec.getReadNegativeStrandFlag() ? "rev:" : "") + rec.getReadName(), rec.getReadString().toLowerCase()); seq.setStart(1); seq.setEnd(rec.getReadLength()); OfInt q = rec.getBaseQualityString().chars() .iterator(); int p = seq.getStart(); while (q.hasNext()) { seq.addSequenceFeature(new SequenceFeature("QUALITY", "", p, p, (float) q.next() - ' ', "bamfile")); p++; } String newRead = parser.parseCigarToSequence(rec, insertions[rec.getReadNegativeStrandFlag() ? 1 : 0], alignmentStart, seq); // make alignment sequences SequenceI alsq = seq.deriveSequence(); alsq.setSequence(newRead); // set start relative to soft clip; assume end is set by Sequence code alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1); if (rec.getReadNegativeStrandFlag()) { rev.add(alsq); } else { fwd.add(alsq); } } // Add forward if (fwd.size() > 0) { seqs.add(refSeq); // FIXME needs to be optional, and properly padded seqs.addAll(fwd); fwd.clear(); } // and reverse strand reads. if (rev.size() > 0) { seqs.add(revRefSeq); // FIXME needs to be optional and properly padded seqs.addAll(rev); rev.clear(); } } } /** * Get the list of chromosomes or contigs from the file (listed in SQ entries * in BAM file header) * * @return array of chromosome/contig strings */ @Override public Object[] preprocess() { List refSeqs = fileReader.getFileHeader() .getSequenceDictionary().getSequences(); List chrs = new ArrayList<>(); for (SAMSequenceRecord ref : refSeqs) { chrs.add(ref.getSequenceName()); } return chrs.toArray(); } public void setOptions(String chr, int s, int e) { chromosome = chr; start = s; end = e; suffix = chromosome + ":" + start + "-" + end; } public boolean parseSuffix() { if (suffix == null) { return false; } int csep = suffix.indexOf(":"); int rsep = suffix.indexOf("-", csep); if (csep < 0 || rsep < 0 || suffix.length() - rsep <= 1) { return false; } String chr, p1, p2; chr = suffix.substring(0, csep); p1 = suffix.substring(csep + 1, rsep); p2 = suffix.substring(rsep + 1); int cstart, cend; try { cstart = Integer.parseInt(p1); cend = Integer.parseInt(p2); } catch (Exception e) { warningMessage = (warningMessage == null ? "" : warningMessage + "\n") + "Couldn't parse range from " + suffix; return false; } chromosome = chr; start = cstart; end = cend; return true; } }