* the SAM record for the read
* @param insertions
* a map of inserted positions and lengths
+ * @param alignmentStart
+ * start of alignment to be used as offset when padding reads with
+ * gaps
* @return string representing read with gaps, clipping etc applied
*/
public String parseCigarToSequence(SAMRecord rec,
- SortedMap<Integer, Integer> insertions)
+ SortedMap<Integer, Integer> insertions, int alignmentStart)
{
StringBuilder newRead = new StringBuilder();
Iterator<CigarElement> it = rec.getCigar().getCigarElements()
// pad with spaces before read
// first count gaps needed to pad to reference position
// NB position is 1-based so number of gaps = pos-1
- int gaps = rec.getStart() - 1; // rec.getUnclippedStart() - 1;
+ int gaps = rec.getStart() - alignmentStart;
+
// now count gaps to pad for insertions in other reads
int insertCount = countInsertionsBeforeRead(rec, insertions);
addGaps(newRead, gaps + insertCount);
- // addGaps(newTrimmedRead, gaps + insertCount);
int lastinserts = 0;
while (it.hasNext())
{
// ERROR
int pos = rec.getStart() + refnext;
- System.out.println("Insertion not added to seqInserts: " + pos);
+ System.out.println(
+ "Read " + rec.getReadName() + " has an insertion at "
+ + pos + " which is not in seqInserts");
}
else
{
-
// we just inserted something
// remove these inserts now - should be very first entry
int count = seqInserts.get(rec.getStart() + refnext);
StringBuilder newRead = new StringBuilder();
String read = rec.getReadString();
- int nextPos = next;
+ int nextPos = next; // location of next position in read
while (nextPos < next + length)
{
- int insertPos = next + length;
- int insertLen = 0;
+ int insertPos = next + length; // location of next insert in read (or end
+ // of read)
+ int insertLen = 0; // no of gaps to insert after insertPos
if (it.hasNext())
{
// account for sequence already having insertion
{
insertPos = rec.getReadPositionAtReferencePosition(key) - 1;
}
+ else
+ {
+ insertPos = nextPos; // bugfix - trigger by override + insertions in
+ // same M stretch
+ }
}
newRead.append(read.substring(nextPos, insertPos));
nextPos = insertPos;
SAMRecord rec = it.next();
Iterator<CigarElement> cit = rec.getCigar().getCigarElements()
.iterator();
- int next = 0;
+ int next = 1;
while (cit.hasNext())
{
CigarElement el = cit.next();
{
case I:
// add to insertions list, and move along the read
- // location is 1 past the current position of next
- // (note if we try to retrieve +1 position from reference by calling
- // getReferencePositionAtReadPosition(next+1) we get 0 because it's an
- // insertion!)
- int refLocation = rec.getReferencePositionAtReadPosition(next)
- + 1;
+ // location is the start of next CIGAR segment
+ // because getReferencePositionAtReadPosition returns 0 for read
+ // positions which are not in the reference (like insertions)
+ int refLocation = rec.getReferencePositionAtReadPosition(
+ next + el.getLength());
// if there's already an insertion at this location, keep the longest
// insertion; if there's no insertion keep this one
int gaps = 0;
// add in any insertion gaps before read
- // TODO start point should be start of alignment not 0
+ // although we only need to get the submap from alignmentStart, there won't
+ // be any insertions before that so we can just start at 0
SortedMap<Integer, Integer> seqInserts = inserts.subMap(0,
rec.getStart());