2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.CigarParser;
24 import jalview.datamodel.Range;
25 import jalview.datamodel.Sequence;
26 import jalview.datamodel.SequenceFeature;
27 import jalview.datamodel.SequenceI;
30 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.List;
35 import java.util.PrimitiveIterator.OfInt;
36 import java.util.SortedMap;
38 import htsjdk.samtools.SAMRecord;
39 import htsjdk.samtools.SAMRecordIterator;
40 import htsjdk.samtools.SAMSequenceRecord;
41 import htsjdk.samtools.SamInputResource;
42 import htsjdk.samtools.SamReader;
43 import htsjdk.samtools.SamReaderFactory;
44 import htsjdk.samtools.ValidationStringency;
46 public class BamFile extends AlignFile
48 // SAM/BAM file reader
49 private SamReader fileReader;
51 // start position to read from
52 private int start = -1;
54 // end position to read to
57 // chromosome/contig to read
58 private String chromosome = "";
60 // first position in alignment
61 private int alignmentStart = -1;
64 * Creates a new BamFile object.
71 * Creates a new BamFile object.
74 * Name of file to read
76 * Whether data source is file, url or other type of data source
80 public BamFile(String inFile, DataSourceType sourceType)
83 super(true, inFile, sourceType);
84 final SamReaderFactory factory = SamReaderFactory.makeDefault()
85 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
86 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
87 .validationStringency(ValidationStringency.SILENT);
88 fileReader = factory.open(new File(inFile));
92 * Creates a new BamFile object
95 * wrapper for datasource
98 public BamFile(FileParse source) throws IOException
102 final SamReaderFactory factory = SamReaderFactory.makeDefault()
103 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
104 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
105 .validationStringency(ValidationStringency.SILENT);
108 if (source.getDataSourceType() == DataSourceType.FILE)
110 fileReader = factory.open(source.inFile);
115 String index = source.getDataName() + ".bai";
116 fileReader = factory.open(SamInputResource.of(source.getDataName())
117 .index(new URL(index)));
122 public String print(SequenceI[] seqs, boolean jvsuffix)
124 // TODO Auto-generated method stub
131 // only actually parse if params are set
132 if (chromosome != null && chromosome != "")
134 SAMRecordIterator it = fileReader.query(chromosome, start, end,
136 CigarParser parser = new CigarParser('-');
137 SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
140 it = fileReader.query(chromosome, start, end, false);
143 SAMRecord rec = it.next();
145 // set the alignment start to be start of first read (we assume reads
147 if (alignmentStart == -1)
149 alignmentStart = rec.getAlignmentStart();
152 // make dataset sequence: start at 1, end at read length
153 SequenceI seq = new Sequence(rec.getReadName(),
154 rec.getReadString().toLowerCase());
156 seq.setEnd(rec.getReadLength());
157 OfInt q = rec.getBaseQualityString().chars()
159 int p = seq.getStart();
162 seq.addSequenceFeature(new SequenceFeature("QUALITY", "", p, p,
163 (float) q.next() - ' ', "bamfile"));
166 String newRead = parser.parseCigarToSequence(rec, insertions,
167 alignmentStart, seq);
169 // make alignment sequences
170 SequenceI alsq = seq.deriveSequence();
171 alsq.setSequence(newRead);
173 // set start relative to soft clip; assume end is set by Sequence code
174 alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
181 * Get the list of chromosomes or contigs from the file (listed in SQ entries
182 * in BAM file header)
184 * @return array of chromosome/contig strings
187 public Object[] preprocess()
189 List<SAMSequenceRecord> refSeqs = fileReader.getFileHeader()
190 .getSequenceDictionary().getSequences();
191 List<String> chrs = new ArrayList<>();
193 for (SAMSequenceRecord ref : refSeqs)
195 chrs.add(ref.getSequenceName());
197 return chrs.toArray();
200 public void setOptions(String chr, int s, int e)
205 suffix = chromosome + ":" + start + "-" + end;
208 public boolean parseSuffix()
214 int csep = suffix.indexOf(":");
215 int rsep = suffix.indexOf("-", csep);
216 if (csep < 0 || rsep < 0 || suffix.length() - rsep <= 1)
221 chr = suffix.substring(0, csep);
222 p1 = suffix.substring(csep + 1, rsep);
223 p2 = suffix.substring(rsep + 1);
227 cstart = Integer.parseInt(p1);
228 cend = Integer.parseInt(p2);
229 } catch (Exception e)
231 warningMessage = (warningMessage == null ? "" : warningMessage + "\n")
232 + "Couldn't parse range from " + suffix;