2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.CigarParser;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceI;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.List;
31 import java.util.SortedMap;
33 import htsjdk.samtools.SAMRecord;
34 import htsjdk.samtools.SAMRecordIterator;
35 import htsjdk.samtools.SAMSequenceRecord;
36 import htsjdk.samtools.SamReader;
37 import htsjdk.samtools.SamReaderFactory;
38 import htsjdk.samtools.ValidationStringency;
40 public class BamFile extends AlignFile
42 // SAM/BAM file reader
43 private SamReader fileReader;
45 // start position to read from
46 private int start = -1;
48 // end position to read to
51 // chromosome/contig to read
52 private String chromosome = "";
54 // first position in alignment
55 private int alignmentStart = -1;
58 * Creates a new BamFile object.
65 * Creates a new BamFile object.
68 * Name of file to read
70 * Whether data source is file, url or other type of data source
74 public BamFile(String inFile, DataSourceType sourceType)
77 super(true, inFile, sourceType);
78 final SamReaderFactory factory = SamReaderFactory.makeDefault()
79 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
80 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
81 .validationStringency(ValidationStringency.SILENT);
82 fileReader = factory.open(new File(inFile));
86 * Creates a new BamFile object
89 * wrapper for datasource
92 public BamFile(FileParse source) throws IOException
95 final SamReaderFactory factory = SamReaderFactory.makeDefault()
96 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
97 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
98 .validationStringency(ValidationStringency.SILENT);
101 fileReader = factory.open(source.inFile);
105 public String print(SequenceI[] seqs, boolean jvsuffix)
107 // TODO Auto-generated method stub
114 // only actually parse if params are set
115 if (chromosome != null && chromosome != "")
117 SAMRecordIterator it = fileReader.query(chromosome, start, end,
119 CigarParser parser = new CigarParser('-');
120 SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
123 it = fileReader.query(chromosome, start, end, false);
126 SAMRecord rec = it.next();
128 // set the alignment start to be start of first read (we assume reads
130 if (alignmentStart == -1)
132 alignmentStart = rec.getAlignmentStart();
135 // make dataset sequence: start at 1, end at read length
136 SequenceI seq = new Sequence(rec.getReadName(),
137 rec.getReadString().toLowerCase());
139 seq.setEnd(rec.getReadLength());
141 String newRead = parser.parseCigarToSequence(rec, insertions,
144 // make alignment sequences
145 SequenceI alsq = seq.deriveSequence();
146 alsq.setSequence(newRead);
148 // set start relative to soft clip; assume end is set by Sequence code
149 alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
156 * Get the list of chromosomes or contigs from the file (listed in SQ entries
157 * in BAM file header)
159 * @return array of chromosome/contig strings
162 public Object[] preprocess()
164 List<SAMSequenceRecord> refSeqs = fileReader.getFileHeader()
165 .getSequenceDictionary().getSequences();
166 List<String> chrs = new ArrayList<>();
168 for (SAMSequenceRecord ref : refSeqs)
170 chrs.add(ref.getSequenceName());
172 return chrs.toArray();
175 public void setOptions(String chr, int s, int e)