* the SAM record for the read
* @param insertions
* a map of inserted positions and lengths
+ * @param alignmentStart
+ * start of alignment to be used as offset when padding reads with
+ * gaps
* @return string representing read with gaps, clipping etc applied
*/
public String parseCigarToSequence(SAMRecord rec,
- SortedMap<Integer, Integer> insertions)
+ SortedMap<Integer, Integer> insertions, int alignmentStart)
{
StringBuilder newRead = new StringBuilder();
Iterator<CigarElement> it = rec.getCigar().getCigarElements()
// pad with spaces before read
// first count gaps needed to pad to reference position
// NB position is 1-based so number of gaps = pos-1
- int gaps = rec.getStart() - 1; // rec.getUnclippedStart() - 1;
+ int gaps = rec.getStart() - alignmentStart;
+
// now count gaps to pad for insertions in other reads
int insertCount = countInsertionsBeforeRead(rec, insertions);
addGaps(newRead, gaps + insertCount);
- // addGaps(newTrimmedRead, gaps + insertCount);
int lastinserts = 0;
while (it.hasNext())
int gaps = 0;
// add in any insertion gaps before read
- // TODO start point should be start of alignment not 0
+ // although we only need to get the submap from alignmentStart, there won't
+ // be any insertions before that so we can just start at 0
SortedMap<Integer, Integer> seqInserts = inserts.subMap(0,
rec.getStart());
// chromosome/contig to read
private String chromosome = "";
+ // first position in alignment
+ private int alignmentStart = -1;
+
/**
* Creates a new BamFile object.
*/
{
SAMRecord rec = it.next();
+ // set the alignment start to be start of first read (we assume reads
+ // are sorted)
+ if (alignmentStart == -1)
+ {
+ alignmentStart = rec.getAlignmentStart();
+ }
+
// make dataset sequence: start at 1, end at read length
SequenceI seq = new Sequence(rec.getReadName(),
rec.getReadString().toLowerCase());
seq.setStart(1);
seq.setEnd(rec.getReadLength());
- String newRead = parser.parseCigarToSequence(rec, insertions);
+ String newRead = parser.parseCigarToSequence(rec, insertions,
+ alignmentStart);
// make alignment sequences
SequenceI alsq = seq.deriveSequence();
rec.setAlignmentStart(start);
CigarParser cp = new CigarParser('-');
- String bfresult = cp.parseCigarToSequence(rec, insertions);
+ String bfresult = cp.parseCigarToSequence(rec, insertions, 1);
System.out.println(result);
System.out.println(bfresult);