2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.SequenceI;
26 import java.io.IOException;
27 import java.util.Arrays;
28 import java.util.Iterator;
30 import htsjdk.samtools.CigarElement;
31 import htsjdk.samtools.SAMRecord;
32 import htsjdk.samtools.SAMRecordIterator;
33 import htsjdk.samtools.SamReader;
34 import htsjdk.samtools.SamReaderFactory;
35 import htsjdk.samtools.ValidationStringency;
37 public class BamFile extends AlignFile
43 * Creates a new BamFile object.
50 * Creates a new BamFile object.
60 public BamFile(String inFile, DataSourceType sourceType)
63 super(inFile, sourceType);
64 final SamReaderFactory factory = SamReaderFactory.makeDefault()
65 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
66 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
67 .validationStringency(ValidationStringency.SILENT);
68 fileReader = factory.open(new File(inFile));
71 public BamFile(FileParse source) throws IOException
73 final SamReaderFactory factory = SamReaderFactory.makeDefault()
74 .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
75 SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
76 .validationStringency(ValidationStringency.SILENT);
79 fileReader = factory.open(source.inFile);
84 public String print(SequenceI[] seqs, boolean jvsuffix)
86 // TODO Auto-generated method stub
91 public void parse() throws IOException
94 SAMRecordIterator it = fileReader.iterator();
97 SAMRecord rec = it.next();
98 String read = rec.getReadString();
99 int start = rec.getStart();
100 int end = rec.getEnd();
102 Iterator<CigarElement> cit = rec.getCigar().getCigarElements()
105 seq = parseId(rec.getReadName());
106 String cigarredRead = parseCigarToSequence(read, cit, '-');
107 seq.setSequence(cigarredRead);
110 seqs.addElement(seq);
116 * Apply the CIGAR string to a read sequence and return the updated read
121 * iterator over cigar elements
123 * gap character to use
124 * @return string representing read with gaps, clipping etc applied
126 private String parseCigarToSequence(String read,
127 Iterator<CigarElement> it, char gapChar)
129 StringBuilder newRead = new StringBuilder();
134 CigarElement el = it.next();
135 int length = el.getLength();
136 switch (el.getOperator())
140 newRead.append(read.substring(next, next + length - 1));
143 case N: // intron in RNA
146 char[] gaps = new char[length];
147 Arrays.fill(gaps, gapChar);
148 newRead.append(gaps);
151 // soft clipping - just skip this bit of the read
156 // add gaps to the reference sequence, which we know nothing about just
159 newRead.append(read.substring(next, next + length - 1));
162 // hard clipping - this stretch will not appear in the read
165 // P, X EQ don't know what to do with these
170 return newRead.toString();