* @param alignmentStart
* start of alignment to be used as offset when padding reads with
* gaps
+ * @param seq
+ * sequence to be annotated with inserts
* @return string representing read with gaps, clipping etc applied
*/
public String parseCigarToSequence(SAMRecord rec,
- SortedMap<Integer, Integer> insertions, int alignmentStart)
+ SortedMap<Integer, Integer> insertions, int alignmentStart,
+ SequenceI seq)
{
StringBuilder newRead = new StringBuilder();
Iterator<CigarElement> it = rec.getCigar().getCigarElements()
{
// ERROR
int pos = rec.getStart() + refnext;
- System.out.println("Insertion not added to seqInserts: " + pos);
+ System.out.println(
+ "Read " + rec.getReadName() + " has an insertion at "
+ + pos + " which is not in seqInserts");
}
else
{
-
// we just inserted something
// remove these inserts now - should be very first entry
int count = seqInserts.get(rec.getStart() + refnext);
Iterator<Map.Entry<Integer, Integer>> iit = seqInserts.entrySet()
.iterator();
- String nextSegment = applyCigarOp(el, next, rec, iit, override);
+ String nextSegment = applyCigarOp(el, next, rec, iit, override, seq);
newRead.append(nextSegment);
if (el.getOperator().consumesReferenceBases())
* an optional <insertion position, length> which can override a
* <position,length> in it to change the length. Set to null if not
* used.
+ * @param seq
* @return
*/
private String applyCigarOp(CigarElement el, int next,
SAMRecord rec, Iterator<Map.Entry<Integer, Integer>> it,
- int[] override)
+ int[] override, SequenceI seq)
{
StringBuilder newRead = new StringBuilder();
String read = rec.getReadString();
case EQ:
// matched or mismatched residues
newRead = applyCigarConsumeReadAndRef(next, length, rec, it,
- override);
+ override, seq);
break;
case N: // intron in RNA
case D: // deletion
// the reference sequence and other reads should have been gapped for
// this insertion, so just add in the residues
newRead.append(read.substring(nextPos, nextPos + length));
+ seq.addSequenceFeature(new SequenceFeature("INSERTION", "",
+ nextPos + 1, nextPos + length, "bamfile"));
break;
case S:
// soft clipping - just skip this bit of the read
* iterator over insertions in the CIGAR region
* @param override
* insertions which have already been accounted for
+ * @param seq
* @return
*/
private StringBuilder applyCigarConsumeReadAndRef(int next, int length,
SAMRecord rec, Iterator<Map.Entry<Integer, Integer>> it,
- int[] override)
+ int[] override, SequenceI seq)
{
StringBuilder newRead = new StringBuilder();
String read = rec.getReadString();
SAMRecord rec = it.next();
Iterator<CigarElement> cit = rec.getCigar().getCigarElements()
.iterator();
- int next = 0;
+ int next = 1;
while (cit.hasNext())
{
CigarElement el = cit.next();
{
case I:
// add to insertions list, and move along the read
- // location is 1 past the current position of next
- // (note if we try to retrieve +1 position from reference by calling
- // getReferencePositionAtReadPosition(next+1) we get 0 because it's an
- // insertion!)
- int refLocation = rec.getReferencePositionAtReadPosition(next)
- + 1;
+ // location is the start of next CIGAR segment
+ // because getReferencePositionAtReadPosition returns 0 for read
+ // positions which are not in the reference (like insertions)
+ int refLocation = rec.getReferencePositionAtReadPosition(
+ next + el.getLength());
// if there's already an insertion at this location, keep the longest
// insertion; if there's no insertion keep this one