/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; import jalview.datamodel.CigarParser; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; public class BamFile extends AlignFile { // SAM/BAM file reader private SamReader fileReader; // start position to read from private int start = -1; // end position to read to private int end = -1; // chromosome/contig to read private String chromosome = ""; // first position in alignment private int alignmentStart = -1; /** * Creates a new BamFile object. */ public BamFile() { } /** * Creates a new BamFile object. * * @param inFile * Name of file to read * @param sourceType * Whether data source is file, url or other type of data source * * @throws IOException */ public BamFile(String inFile, DataSourceType sourceType) throws IOException { super(true, inFile, sourceType); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); fileReader = factory.open(new File(inFile)); } /** * Creates a new BamFile object * * @param source * wrapper for datasource * @throws IOException */ public BamFile(FileParse source) throws IOException { super(true, source); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); // File-based bam fileReader = factory.open(source.inFile); } @Override public String print(SequenceI[] seqs, boolean jvsuffix) { // TODO Auto-generated method stub return null; } @Override public void parse() { // only actually parse if params are set if (chromosome != null && chromosome != "") { SAMRecordIterator it = fileReader.query(chromosome, start, end, false); CigarParser parser = new CigarParser('-'); SortedMap insertions = parser.getInsertions(it); it.close(); it = fileReader.query(chromosome, start, end, false); while (it.hasNext()) { SAMRecord rec = it.next(); // set the alignment start to be start of first read (we assume reads // are sorted) if (alignmentStart == -1) { alignmentStart = rec.getAlignmentStart(); } // make dataset sequence: start at 1, end at read length SequenceI seq = new Sequence(rec.getReadName(), rec.getReadString().toLowerCase()); seq.setStart(1); seq.setEnd(rec.getReadLength()); String newRead = parser.parseCigarToSequence(rec, insertions, alignmentStart, seq); // make alignment sequences SequenceI alsq = seq.deriveSequence(); alsq.setSequence(newRead); // set start relative to soft clip; assume end is set by Sequence code alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1); seqs.add(alsq); } } } /** * Get the list of chromosomes or contigs from the file (listed in SQ entries * in BAM file header) * * @return array of chromosome/contig strings */ @Override public Object[] preprocess() { List refSeqs = fileReader.getFileHeader() .getSequenceDictionary().getSequences(); List chrs = new ArrayList<>(); for (SAMSequenceRecord ref : refSeqs) { chrs.add(ref.getSequenceName()); } return chrs.toArray(); } public void setOptions(String chr, int s, int e) { chromosome = chr; start = s; end = e; } }