/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
import jalview.datamodel.CigarParser;
import jalview.datamodel.Range;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.PrimitiveIterator.OfInt;
import java.util.SortedMap;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
public class BamFile extends AlignFile
{
// SAM/BAM file reader
private SamReader fileReader;
// start position to read from
private int start = -1;
// end position to read to
private int end = -1;
// chromosome/contig to read
private String chromosome = "";
// first position in alignment
private int alignmentStart = -1;
/**
* Creates a new BamFile object.
*/
public BamFile()
{
}
/**
* Creates a new BamFile object.
*
* @param inFile
* Name of file to read
* @param sourceType
* Whether data source is file, url or other type of data source
*
* @throws IOException
*/
public BamFile(String inFile, DataSourceType sourceType)
throws IOException
{
super(true, inFile, sourceType);
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
fileReader = factory.open(new File(inFile));
}
/**
* Creates a new BamFile object
*
* @param source
* wrapper for datasource
* @throws IOException
*/
public BamFile(FileParse source) throws IOException
{
super(true, source);
parseSuffix();
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
// File-based bam
if (source.getDataSourceType() == DataSourceType.FILE)
{
fileReader = factory.open(source.inFile);
}
else
{
// locate index ?
String index = source.getDataName() + ".bai";
fileReader = factory.open(SamInputResource.of(source.getDataName())
.index(new URL(index)));
}
}
@Override
public String print(SequenceI[] seqs, boolean jvsuffix)
{
// TODO Auto-generated method stub
return null;
}
@Override
public void parse()
{
// only actually parse if params are set
if (chromosome != null && chromosome != "")
{
SAMRecordIterator it = fileReader.query(chromosome, start, end,
false);
CigarParser parser = new CigarParser('-');
SortedMap insertions = parser.getInsertions(it);
it.close();
it = fileReader.query(chromosome, start, end, false);
while (it.hasNext())
{
SAMRecord rec = it.next();
// set the alignment start to be start of first read (we assume reads
// are sorted)
if (alignmentStart == -1)
{
alignmentStart = rec.getAlignmentStart();
}
// make dataset sequence: start at 1, end at read length
SequenceI seq = new Sequence(rec.getReadName(),
rec.getReadString().toLowerCase());
seq.setStart(1);
seq.setEnd(rec.getReadLength());
OfInt q = rec.getBaseQualityString().chars()
.iterator();
int p = seq.getStart();
while (q.hasNext())
{
seq.addSequenceFeature(new SequenceFeature("QUALITY", "", p, p,
(float) q.next() - ' ', "bamfile"));
p++;
}
String newRead = parser.parseCigarToSequence(rec, insertions,
alignmentStart, seq);
// make alignment sequences
SequenceI alsq = seq.deriveSequence();
alsq.setSequence(newRead);
// set start relative to soft clip; assume end is set by Sequence code
alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
seqs.add(alsq);
}
}
}
/**
* Get the list of chromosomes or contigs from the file (listed in SQ entries
* in BAM file header)
*
* @return array of chromosome/contig strings
*/
@Override
public Object[] preprocess()
{
List refSeqs = fileReader.getFileHeader()
.getSequenceDictionary().getSequences();
List chrs = new ArrayList<>();
for (SAMSequenceRecord ref : refSeqs)
{
chrs.add(ref.getSequenceName());
}
return chrs.toArray();
}
public void setOptions(String chr, int s, int e)
{
chromosome = chr;
start = s;
end = e;
suffix = chromosome + ":" + start + "-" + end;
}
public boolean parseSuffix()
{
if (suffix == null)
{
return false;
}
int csep = suffix.indexOf(":");
int rsep = suffix.indexOf("-", csep);
if (csep < 0 || rsep < 0 || suffix.length() - rsep <= 1)
{
return false;
}
String chr, p1, p2;
chr = suffix.substring(0, csep);
p1 = suffix.substring(csep + 1, rsep);
p2 = suffix.substring(rsep + 1);
int cstart, cend;
try
{
cstart = Integer.parseInt(p1);
cend = Integer.parseInt(p2);
} catch (Exception e)
{
warningMessage = (warningMessage == null ? "" : warningMessage + "\n")
+ "Couldn't parse range from " + suffix;
return false;
}
chromosome = chr;
start = cstart;
end = cend;
return true;
}
}