X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2FBamFile.java;h=10e2d0ec7db91f30360e5d5309d81b05e075c896;hb=846629a1a7ab7715d14a18296bc3024c5de4ac5c;hp=26c14a2f3683c57ac7b2a297db6dabd625580075;hpb=0b0b3d3687479204da2d199042c7bc90f3a6fd43;p=jalview.git diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java index 26c14a2..10e2d0e 100644 --- a/src/jalview/io/BamFile.java +++ b/src/jalview/io/BamFile.java @@ -26,18 +26,33 @@ import jalview.datamodel.SequenceI; import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.SortedMap; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; +import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; public class BamFile extends AlignFile { + // SAM/BAM file reader + private SamReader fileReader; - SamReader fileReader; + // start position to read from + private int start = -1; + + // end position to read to + private int end = -1; + + // chromosome/contig to read + private String chromosome = ""; + + // first position in alignment + private int alignmentStart = -1; /** * Creates a new BamFile object. @@ -50,17 +65,16 @@ public class BamFile extends AlignFile * Creates a new BamFile object. * * @param inFile - * DOCUMENT ME! + * Name of file to read * @param sourceType - * DOCUMENT ME! + * Whether data source is file, url or other type of data source * * @throws IOException - * DOCUMENT ME! */ public BamFile(String inFile, DataSourceType sourceType) throws IOException { - super(inFile, sourceType); + super(true, inFile, sourceType); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) @@ -68,8 +82,16 @@ public class BamFile extends AlignFile fileReader = factory.open(new File(inFile)); } + /** + * Creates a new BamFile object + * + * @param source + * wrapper for datasource + * @throws IOException + */ public BamFile(FileParse source) throws IOException { + super(true, source); final SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) @@ -77,7 +99,6 @@ public class BamFile extends AlignFile // File-based bam fileReader = factory.open(source.inFile); - parse(); } @Override @@ -88,33 +109,73 @@ public class BamFile extends AlignFile } @Override - public void parse() throws IOException + public void parse() { - SAMRecordIterator it = fileReader.iterator(); - CigarParser parser = new CigarParser('-'); - SortedMap insertions = parser.getInsertions(it); - it.close(); - - it = fileReader.iterator(); - while (it.hasNext()) + // only actually parse if params are set + if (chromosome != null && chromosome != "") { - SAMRecord rec = it.next(); + SAMRecordIterator it = fileReader.query(chromosome, start, end, + false); + CigarParser parser = new CigarParser('-'); + SortedMap insertions = parser.getInsertions(it); + it.close(); + + it = fileReader.query(chromosome, start, end, false); + while (it.hasNext()) + { + SAMRecord rec = it.next(); - // make dataset sequence: start at 1, end at read length - SequenceI seq = new Sequence(rec.getReadName(), - rec.getReadString().toLowerCase()); - seq.setStart(1); - seq.setEnd(rec.getReadLength()); + // set the alignment start to be start of first read (we assume reads + // are sorted) + if (alignmentStart == -1) + { + alignmentStart = rec.getAlignmentStart(); + } - String newRead = parser.parseCigarToSequence(rec, insertions); + // make dataset sequence: start at 1, end at read length + SequenceI seq = new Sequence(rec.getReadName(), + rec.getReadString().toLowerCase()); + seq.setStart(1); + seq.setEnd(rec.getReadLength()); - // make alignment sequences - SequenceI alsq = seq.deriveSequence(); - alsq.setSequence(newRead); + String newRead = parser.parseCigarToSequence(rec, insertions, + alignmentStart, seq); - // set start relative to soft clip; assume end is set by Sequence code - alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1); - seqs.add(alsq); + // make alignment sequences + SequenceI alsq = seq.deriveSequence(); + alsq.setSequence(newRead); + + // set start relative to soft clip; assume end is set by Sequence code + alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1); + seqs.add(alsq); + } } } + + /** + * Get the list of chromosomes or contigs from the file (listed in SQ entries + * in BAM file header) + * + * @return array of chromosome/contig strings + */ + @Override + public Object[] preprocess() + { + List refSeqs = fileReader.getFileHeader() + .getSequenceDictionary().getSequences(); + List chrs = new ArrayList<>(); + + for (SAMSequenceRecord ref : refSeqs) + { + chrs.add(ref.getSequenceName()); + } + return chrs.toArray(); + } + + public void setOptions(String chr, int s, int e) + { + chromosome = chr; + start = s; + end = e; + } }