2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.CigarParser;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceFeature;
26 import jalview.datamodel.SequenceI;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.PrimitiveIterator.OfInt;
33 import java.util.SortedMap;
35 import htsjdk.samtools.SAMRecord;
36 import htsjdk.samtools.SAMRecordIterator;
37 import htsjdk.samtools.SAMSequenceRecord;
38 import htsjdk.samtools.SamReader;
39 import htsjdk.samtools.SamReaderFactory;
40 import htsjdk.samtools.ValidationStringency;
42 public class BamFile extends AlignFile
44 // SAM/BAM file reader
45 private SamReader fileReader;
47 // start position to read from
48 private int start = -1;
50 // end position to read to
53 // chromosome/contig to read
54 private String chromosome = "";
56 // first position in alignment
57 private int alignmentStart = -1;
60 * Creates a new BamFile object.
67 * Creates a new BamFile object.
70 * Name of file to read
72 * Whether data source is file, url or other type of data source
76 public BamFile(String inFile, DataSourceType sourceType)
79 super(true, inFile, sourceType);
80 final SamReaderFactory factory = SamReaderFactory.makeDefault()
81 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
82 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
83 .validationStringency(ValidationStringency.SILENT);
84 fileReader = factory.open(new File(inFile));
88 * Creates a new BamFile object
91 * wrapper for datasource
94 public BamFile(FileParse source) throws IOException
97 final SamReaderFactory factory = SamReaderFactory.makeDefault()
98 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
99 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
100 .validationStringency(ValidationStringency.SILENT);
103 fileReader = factory.open(source.inFile);
107 public String print(SequenceI[] seqs, boolean jvsuffix)
109 // TODO Auto-generated method stub
116 // only actually parse if params are set
117 if (chromosome != null && chromosome != "")
119 SAMRecordIterator it = fileReader.query(chromosome, start, end,
121 CigarParser parser = new CigarParser('-');
122 SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
125 it = fileReader.query(chromosome, start, end, false);
128 SAMRecord rec = it.next();
130 // set the alignment start to be start of first read (we assume reads
132 if (alignmentStart == -1)
134 alignmentStart = rec.getAlignmentStart();
137 // make dataset sequence: start at 1, end at read length
138 SequenceI seq = new Sequence(rec.getReadName(),
139 rec.getReadString().toLowerCase());
141 seq.setEnd(rec.getReadLength());
142 OfInt q = rec.getBaseQualityString().chars()
144 int p = seq.getStart();
147 seq.addSequenceFeature(new SequenceFeature("QUALITY", "", p, p,
148 (float) q.next() - ' ', "bamfile"));
151 String newRead = parser.parseCigarToSequence(rec, insertions,
152 alignmentStart, seq);
154 // make alignment sequences
155 SequenceI alsq = seq.deriveSequence();
156 alsq.setSequence(newRead);
158 // set start relative to soft clip; assume end is set by Sequence code
159 alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
166 * Get the list of chromosomes or contigs from the file (listed in SQ entries
167 * in BAM file header)
169 * @return array of chromosome/contig strings
172 public Object[] preprocess()
174 List<SAMSequenceRecord> refSeqs = fileReader.getFileHeader()
175 .getSequenceDictionary().getSequences();
176 List<String> chrs = new ArrayList<>();
178 for (SAMSequenceRecord ref : refSeqs)
180 chrs.add(ref.getSequenceName());
182 return chrs.toArray();
185 public void setOptions(String chr, int s, int e)