2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.CigarParser;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceI;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.List;
31 import java.util.SortedMap;
33 import htsjdk.samtools.SAMRecord;
34 import htsjdk.samtools.SAMRecordIterator;
35 import htsjdk.samtools.SAMSequenceRecord;
36 import htsjdk.samtools.SamReader;
37 import htsjdk.samtools.SamReaderFactory;
38 import htsjdk.samtools.ValidationStringency;
40 public class BamFile extends AlignFile
42 // SAM/BAM file reader
43 private SamReader fileReader;
45 // start position to read from
46 private int start = -1;
48 // end position to read to
51 // chromosome/contig to read
52 private String chromosome = "";
55 * Creates a new BamFile object.
62 * Creates a new BamFile object.
65 * Name of file to read
67 * Whether data source is file, url or other type of data source
71 public BamFile(String inFile, DataSourceType sourceType)
74 super(true, inFile, sourceType);
75 final SamReaderFactory factory = SamReaderFactory.makeDefault()
76 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
77 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
78 .validationStringency(ValidationStringency.SILENT);
79 fileReader = factory.open(new File(inFile));
83 * Creates a new BamFile object
86 * wrapper for datasource
89 public BamFile(FileParse source) throws IOException
92 final SamReaderFactory factory = SamReaderFactory.makeDefault()
93 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
94 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
95 .validationStringency(ValidationStringency.SILENT);
98 fileReader = factory.open(source.inFile);
102 public String print(SequenceI[] seqs, boolean jvsuffix)
104 // TODO Auto-generated method stub
111 // only actually parse if params are set
112 if (chromosome != null && chromosome != "")
114 SAMRecordIterator it = fileReader.query(chromosome, start, end,
116 CigarParser parser = new CigarParser('-');
117 SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
120 it = fileReader.query(chromosome, start, end, false);
123 SAMRecord rec = it.next();
125 // make dataset sequence: start at 1, end at read length
126 SequenceI seq = new Sequence(rec.getReadName(),
127 rec.getReadString().toLowerCase());
129 seq.setEnd(rec.getReadLength());
131 String newRead = parser.parseCigarToSequence(rec, insertions);
133 // make alignment sequences
134 SequenceI alsq = seq.deriveSequence();
135 alsq.setSequence(newRead);
137 // set start relative to soft clip; assume end is set by Sequence code
138 alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
145 * Get the list of chromosomes or contigs from the file (listed in SQ entries
146 * in BAM file header)
148 * @return array of chromosome/contig strings
151 public Object[] preprocess()
153 List<SAMSequenceRecord> refSeqs = fileReader.getFileHeader()
154 .getSequenceDictionary().getSequences();
155 List<String> chrs = new ArrayList<>();
157 for (SAMSequenceRecord ref : refSeqs)
159 chrs.add(ref.getSequenceName());
161 return chrs.toArray();
164 public void setOptions(String chr, int s, int e)