/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
import jalview.datamodel.CigarParser;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
public class BamFile extends AlignFile
{
// SAM/BAM file reader
private SamReader fileReader;
// start position to read from
private int start = -1;
// end position to read to
private int end = -1;
// chromosome/contig to read
private String chromosome = "";
// first position in alignment
private int alignmentStart = -1;
/**
* Creates a new BamFile object.
*/
public BamFile()
{
}
/**
* Creates a new BamFile object.
*
* @param inFile
* Name of file to read
* @param sourceType
* Whether data source is file, url or other type of data source
*
* @throws IOException
*/
public BamFile(String inFile, DataSourceType sourceType)
throws IOException
{
super(true, inFile, sourceType);
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
fileReader = factory.open(new File(inFile));
}
/**
* Creates a new BamFile object
*
* @param source
* wrapper for datasource
* @throws IOException
*/
public BamFile(FileParse source) throws IOException
{
super(true, source);
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
// File-based bam
fileReader = factory.open(source.inFile);
}
@Override
public String print(SequenceI[] seqs, boolean jvsuffix)
{
// TODO Auto-generated method stub
return null;
}
@Override
public void parse()
{
// only actually parse if params are set
if (chromosome != null && chromosome != "")
{
SAMRecordIterator it = fileReader.query(chromosome, start, end,
false);
CigarParser parser = new CigarParser('-');
SortedMap insertions = parser.getInsertions(it);
it.close();
it = fileReader.query(chromosome, start, end, false);
while (it.hasNext())
{
SAMRecord rec = it.next();
// set the alignment start to be start of first read (we assume reads
// are sorted)
if (alignmentStart == -1)
{
alignmentStart = rec.getAlignmentStart();
}
// make dataset sequence: start at 1, end at read length
SequenceI seq = new Sequence(rec.getReadName(),
rec.getReadString().toLowerCase());
seq.setStart(1);
seq.setEnd(rec.getReadLength());
String newRead = parser.parseCigarToSequence(rec, insertions,
alignmentStart);
// make alignment sequences
SequenceI alsq = seq.deriveSequence();
alsq.setSequence(newRead);
// set start relative to soft clip; assume end is set by Sequence code
alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
seqs.add(alsq);
}
}
}
/**
* Get the list of chromosomes or contigs from the file (listed in SQ entries
* in BAM file header)
*
* @return array of chromosome/contig strings
*/
@Override
public Object[] preprocess()
{
List refSeqs = fileReader.getFileHeader()
.getSequenceDictionary().getSequences();
List chrs = new ArrayList<>();
for (SAMSequenceRecord ref : refSeqs)
{
chrs.add(ref.getSequenceName());
}
return chrs.toArray();
}
public void setOptions(String chr, int s, int e)
{
chromosome = chr;
start = s;
end = e;
}
}