/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; import jalview.datamodel.SequenceI; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Iterator; import htsjdk.samtools.CigarElement; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; public class BamFile extends AlignFile { SamReader fileReader; /** * Creates a new BamFile object. */ public BamFile() { } /** * Creates a new BamFile object. * * @param inFile * DOCUMENT ME! * @param sourceType * DOCUMENT ME! * * @throws IOException * DOCUMENT ME! */ public BamFile(String inFile, DataSourceType sourceType) throws IOException { super(inFile, sourceType); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); fileReader = factory.open(new File(inFile)); } public BamFile(FileParse source) throws IOException { final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); // File-based bam fileReader = factory.open(source.inFile); parse(); } @Override public String print(SequenceI[] seqs, boolean jvsuffix) { // TODO Auto-generated method stub return null; } @Override public void parse() throws IOException { SequenceI seq = null; SAMRecordIterator it = fileReader.iterator(); while (it.hasNext()) { SAMRecord rec = it.next(); String read = rec.getReadString(); int start = rec.getStart(); int end = rec.getEnd(); Iterator cit = rec.getCigar().getCigarElements() .iterator(); seq = parseId(rec.getReadName()); String cigarredRead = parseCigarToSequence(read, cit, '-'); seq.setSequence(cigarredRead); seq.setStart(start); seq.setEnd(end); seqs.addElement(seq); } } /** * Apply the CIGAR string to a read sequence and return the updated read * * @param read * the read to update * @param it * iterator over cigar elements * @param gapChar * gap character to use * @return string representing read with gaps, clipping etc applied */ private String parseCigarToSequence(String read, Iterator it, char gapChar) { StringBuilder newRead = new StringBuilder(); int next = 0; while (it.hasNext()) { CigarElement el = it.next(); int length = el.getLength(); switch (el.getOperator()) { case M: // matched residues newRead.append(read.substring(next, next + length - 1)); next += length; break; case N: // intron in RNA case D: // deletion // add gaps char[] gaps = new char[length]; Arrays.fill(gaps, gapChar); newRead.append(gaps); break; case S: // soft clipping - just skip this bit of the read // do nothing next += length; break; case I: // add gaps to the reference sequence, which we know nothing about just // now // TODO newRead.append(read.substring(next, next + length - 1)); break; case H: // hard clipping - this stretch will not appear in the read break; default: // P, X EQ don't know what to do with these break; } } return newRead.toString(); } }