import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.SortedMap;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
public class BamFile extends AlignFile
{
+ // SAM/BAM file reader
+ private SamReader fileReader;
- SamReader fileReader;
+ // start position to read from
+ private int start = -1;
+
+ // end position to read to
+ private int end = -1;
+
+ // chromosome/contig to read
+ private String chromosome = "";
/**
* Creates a new BamFile object.
* Creates a new BamFile object.
*
* @param inFile
- * DOCUMENT ME!
+ * Name of file to read
* @param sourceType
- * DOCUMENT ME!
+ * Whether data source is file, url or other type of data source
*
* @throws IOException
- * DOCUMENT ME!
*/
public BamFile(String inFile, DataSourceType sourceType)
throws IOException
{
- super(inFile, sourceType);
+ super(true, inFile, sourceType);
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
fileReader = factory.open(new File(inFile));
}
+ /**
+ * Creates a new BamFile object
+ *
+ * @param source
+ * wrapper for datasource
+ * @throws IOException
+ */
public BamFile(FileParse source) throws IOException
{
+ super(true, source);
final SamReaderFactory factory = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
// File-based bam
fileReader = factory.open(source.inFile);
- parse();
}
@Override
}
@Override
- public void parse() throws IOException
+ public void parse()
{
- SAMRecordIterator it = fileReader.iterator();
- CigarParser parser = new CigarParser('-');
- SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
- it.close();
-
- it = fileReader.iterator();
- while (it.hasNext())
+ // only actually parse if params are set
+ if (chromosome != null && chromosome != "")
{
- SAMRecord rec = it.next();
+ SAMRecordIterator it = fileReader.query(chromosome, start, end,
+ false);
+ CigarParser parser = new CigarParser('-');
+ SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
+ it.close();
+
+ it = fileReader.query(chromosome, start, end, false);
+ while (it.hasNext())
+ {
+ SAMRecord rec = it.next();
- // make dataset sequence: start at 1, end at read length
- SequenceI seq = new Sequence(rec.getReadName(),
- rec.getReadString().toLowerCase());
- seq.setStart(1);
- seq.setEnd(rec.getReadLength());
+ // make dataset sequence: start at 1, end at read length
+ SequenceI seq = new Sequence(rec.getReadName(),
+ rec.getReadString().toLowerCase());
+ seq.setStart(1);
+ seq.setEnd(rec.getReadLength());
- String newRead = parser.parseCigarToSequence(rec, insertions);
+ String newRead = parser.parseCigarToSequence(rec, insertions);
- // make alignment sequences
- SequenceI alsq = seq.deriveSequence();
- alsq.setSequence(newRead);
+ // make alignment sequences
+ SequenceI alsq = seq.deriveSequence();
+ alsq.setSequence(newRead);
- // set start relative to soft clip; assume end is set by Sequence code
- alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
- seqs.add(alsq);
+ // set start relative to soft clip; assume end is set by Sequence code
+ alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
+ seqs.add(alsq);
+ }
}
}
+
+ /**
+ * Get the list of chromosomes or contigs from the file (listed in SQ entries
+ * in BAM file header)
+ *
+ * @return array of chromosome/contig strings
+ */
+ @Override
+ public Object[] preprocess()
+ {
+ List<SAMSequenceRecord> refSeqs = fileReader.getFileHeader()
+ .getSequenceDictionary().getSequences();
+ List<String> chrs = new ArrayList<>();
+
+ for (SAMSequenceRecord ref : refSeqs)
+ {
+ chrs.add(ref.getSequenceName());
+ }
+ return chrs.toArray();
+ }
+
+ public void setOptions(String chr, int s, int e)
+ {
+ chromosome = chr;
+ start = s;
+ end = e;
+ }
}