- alsq.setSequence(cigarredRead);
- alsq.setStart(start);
- alsq.setEnd(end);
- seqs.add(alsq);
- }
-
- for (SequenceI seq : seqs)
- {
- int insertCount = 0;
- SortedMap<Integer, Integer> seqInserts = insertions.subMap(0,
- seq.getEnd()); // TODO start point should be start of alignment
- // not 0
- seqInserts.forEach(action);
- while ((nextInsertion != -1) && (nextInsertion < seq.getEnd()))
- {
- seq.insertCharsAt(nextInsertion + insertCount, '-');
- // nextInsertion = insertions.nextSetBit(nextInsertion + 1);
- insertCount++;
- }
- }
- }
-
- /*
- 1234567
- ABCDEFG insert 3,4
-
- R1: insert 3 = gap before 3rd res
- AB.CDEFG
-
- R2: insert 4 = gap before 4th res
- ABC.DEFG
-
- R3: insert 3,4 = 2 gaps before 3rd res
- AB..CDEFG
-
- => AB--C-DEFG
-
- So need to store insertions as (pos, length) giving here (3,1),(4,1),(3,2) - then can sub longest length at position so (3,2), (4,1)
- Then when inserting need to include previously inserted count AND determine if sequence already has an insertion or needs gap(s) added
-
- */
- /**
- * Apply the CIGAR string to a read sequence and return the updated read.
- *
- * @param read
- * the read to update
- * @param rec
- * the corresponding SAM record
- * @param gapChar
- * gap character to use
- * @return string representing read with gaps, clipping etc applied
- */
- private String parseCigarToSequence(String read,
- SAMRecord rec, char gapChar)
- {
- Iterator<CigarElement> it = rec.getCigar().getCigarElements()
- .iterator();
-
- StringBuilder newRead = new StringBuilder();
- int next = 0;
- int ap = 0; // position of query region qi.start;
-
- // pad with spaces before read
- for (int alp = rec
- .getReferencePositionAtReadPosition(next + 1); ap < alp;)
- {
- newRead.append(" ");
- ap++;
- }
-
- while (it.hasNext())
- {
- CigarElement el = it.next();
- int length = el.getLength();
- char[] gaps;
-
- switch (el.getOperator())
- {
- case M:
- // matched residues
- newRead.append(
- read.substring(next, next + length));
- next += length;
- break;
- case N: // intron in RNA
- case D: // deletion
- // add gaps
- gaps = new char[length];
- Arrays.fill(gaps, gapChar);
- newRead.append(gaps);
- break;
- case S:
- // soft clipping - just skip this bit of the read
- // do nothing
-
- // work out how many gaps we need before the start of the soft clip -
- // don't do this at the end of the read!
- if (next == 0) // at start of read
- {
- int numgaps = rec.getUnclippedStart();
- gaps = new char[numgaps];
- Arrays.fill(gaps, ' ');
- newRead.append(gaps);
- }
-
- newRead.append(
- read.substring(next, next + length).toLowerCase());
- next += length;
- break;
- case I:
- // the reference sequence and other reads should have been gapped for
- // this insertion, so just add in the residues
- newRead.append(read.substring(next, next + length));
- next += length;
- break;
- case H:
- // hard clipping - this stretch will not appear in the read
- break;
- default:
- // P, X EQ don't know what to do with these
- break;
- }
-
- }
- return newRead.toString();
- }