Then when inserting need to include previously inserted count AND determine if sequence already has an insertion or needs gap(s) added
*/
- public SortedMap<Integer, Integer> getInsertions(Iterator<SAMRecord> it)
+ public SortedMap<Integer, Integer>[] getInsertions(Iterator<SAMRecord> it)
{
- SortedMap<Integer, Integer> inserts = new TreeMap<>();
- while (it.hasNext())
+ return getInsertions(it, null);
+ }
+
+ public int firstAlColumn = -1, firstRposCol = -1;
+
+ public SortedMap<Integer, Integer>[] getInsertions(Iterator<SAMRecord> it,
+ Range[] extent)
+ {
+ SortedMap<Integer, Integer> sinserts[] = new SortedMap[] {
+ new TreeMap<>(), new TreeMap<>() };
+ Range xten = extent != null && extent[0] != null ? extent[0] : null;
+ do
{
- // check each record for insertions in the CIGAR string
+ // check extent of read
SAMRecord rec = it.next();
+ if (extent != null)
+ {
+
+ int nstart = rec.getAlignmentStart();
+ if (nstart != 0)
+ {
+ nstart = rec.getReferencePositionAtReadPosition(nstart);
+ }
+ int nend = rec.getAlignmentEnd();
+ if (nend != 0)
+ {
+ nend = rec.getReferencePositionAtReadPosition(nend);
+ }
+
+ xten = new Range(
+ nstart <= 0 ? xten.start : Math.min(nstart, xten.start),
+ nend == 0 ? xten.end : Math.max(nend, xten.end));
+ }
+
+ // check each record for insertions in the CIGAR string
+
+ // pick the insert map to update
+ int insmap = rec.getReadNegativeStrandFlag() ? 1 : 0;
+ SortedMap<Integer, Integer> inserts = sinserts[insmap];
+
Iterator<CigarElement> cit = rec.getCigar().getCigarElements()
.iterator();
- int next = 1;
+ int refLocation = rec.getAlignmentStart();
+ int rloc = 0;
+ int alcol = 0;
+ int alpos = 0;
while (cit.hasNext())
{
CigarElement el = cit.next();
switch (el.getOperator())
{
- case I:
- // add to insertions list, and move along the read
- // location is the start of next CIGAR segment
- // because getReferencePositionAtReadPosition returns 0 for read
- // positions which are not in the reference (like insertions)
- int refLocation = rec.getReferencePositionAtReadPosition(
- next + el.getLength());
-
- // if there's already an insertion at this location, keep the longest
- // insertion; if there's no insertion keep this one
- if (!inserts.containsKey(refLocation)
- || (inserts.containsKey(refLocation)
- && inserts.get(refLocation) < el.getLength()))
- {
- inserts.put(refLocation, el.getLength());
- }
- next += el.getLength();
- break;
- case M:
case S:
- // match to reference, or soft clip, move along the read
- next += el.getLength();
+ case H:
+ // ignore soft/hardclipped
+
break;
default:
- // deletions, introns etc don't consume any residues from the read
- break;
+ if (el.getOperator().consumesReadBases()
+ && !el.getOperator().consumesReferenceBases())
+ {
+ // add to insertions list, and move along the read
+ // location is the start of next CIGAR segment
+ // because getReferencePositionAtReadPosition returns 0 for read
+ // positions which are not in the reference (like insertions)
+
+ // if there's already an insertion at this location, keep the
+ // longest
+ // insertion; if there's no insertion keep this one
+ if (!inserts.containsKey(refLocation)
+ || (inserts.containsKey(refLocation)
+ && inserts.get(refLocation) < el.getLength()))
+ {
+ inserts.put(refLocation, el.getLength());
+ }
+ break;
+ }
+ if (el.getOperator().consumesReferenceBases())
+ {
+ if (el.getOperator().consumesReadBases() && alcol == 0
+ && refLocation + 1 > extent[0].start)
+ {
+ // first match position : last rloc + first match op
+ alcol = rloc + 1;
+ alpos = rec.getReferencePositionAtReadPosition(rloc + 1);
+ }
+ refLocation += el.getLength();
+ }
+
+ }
+ if (el.getOperator().consumesReadBases()
+ || el.getOperator().consumesReferenceBases())
+ {
+ rloc += el.getLength();
}
}
-
+ // Wrong: hack that fails to relocate reference to correct place in
+ // sequence
+ if (firstAlColumn < 0)
+ {
+ firstAlColumn = alcol;
+ firstRposCol = alpos;
+ }
+ } while (it.hasNext());
+ if (xten != null)
+ {
+ extent[0] = xten;
}
- return inserts;
+ return sinserts;
}
/**