/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
import jalview.datamodel.SequenceI;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
public class BamFile extends AlignFile
{
SamReader fileReader;
/**
* Creates a new BamFile object.
*/
public BamFile()
{
}
/**
* Creates a new BamFile object.
*
* @param inFile
* DOCUMENT ME!
* @param sourceType
* DOCUMENT ME!
*
* @throws IOException
* DOCUMENT ME!
*/
public BamFile(String inFile, DataSourceType sourceType)
throws IOException
{
super(inFile, sourceType);
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
fileReader = factory.open(new File(inFile));
}
public BamFile(FileParse source) throws IOException
{
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
.validationStringency(ValidationStringency.SILENT);
// File-based bam
fileReader = factory.open(source.inFile);
parse();
}
@Override
public String print(SequenceI[] seqs, boolean jvsuffix)
{
// TODO Auto-generated method stub
return null;
}
@Override
public void parse() throws IOException
{
SequenceI seq = null;
SAMRecordIterator it = fileReader.iterator();
while (it.hasNext())
{
SAMRecord rec = it.next();
String read = rec.getReadString();
int start = rec.getStart();
int end = rec.getEnd();
Iterator cit = rec.getCigar().getCigarElements()
.iterator();
seq = parseId(rec.getReadName());
String cigarredRead = parseCigarToSequence(read, cit, '-');
seq.setSequence(cigarredRead);
seq.setStart(start);
seq.setEnd(end);
seqs.addElement(seq);
}
}
/**
* Apply the CIGAR string to a read sequence and return the updated read
*
* @param read
* the read to update
* @param it
* iterator over cigar elements
* @param gapChar
* gap character to use
* @return string representing read with gaps, clipping etc applied
*/
private String parseCigarToSequence(String read,
Iterator it, char gapChar)
{
StringBuilder newRead = new StringBuilder();
int next = 0;
while (it.hasNext())
{
CigarElement el = it.next();
int length = el.getLength();
switch (el.getOperator())
{
case M:
// matched residues
newRead.append(read.substring(next, next + length - 1));
next += length;
break;
case N: // intron in RNA
case D: // deletion
// add gaps
char[] gaps = new char[length];
Arrays.fill(gaps, gapChar);
newRead.append(gaps);
break;
case S:
// soft clipping - just skip this bit of the read
// do nothing
next += length;
break;
case I:
// add gaps to the reference sequence, which we know nothing about just
// now
// TODO
newRead.append(read.substring(next, next + length - 1));
break;
case H:
// hard clipping - this stretch will not appear in the read
break;
default:
// P, X EQ don't know what to do with these
break;
}
}
return newRead.toString();
}
}