X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FCigarParser.java;h=cfbe842fffa63a48563932a60ebcdea33be3c383;hb=846629a1a7ab7715d14a18296bc3024c5de4ac5c;hp=fd5694c5b5fd387df6664f36b79dca0883b43899;hpb=1cb2e57ec271b4a6747471c9f4eaf03a982230c6;p=jalview.git diff --git a/src/jalview/datamodel/CigarParser.java b/src/jalview/datamodel/CigarParser.java index fd5694c..cfbe842 100644 --- a/src/jalview/datamodel/CigarParser.java +++ b/src/jalview/datamodel/CigarParser.java @@ -30,10 +30,13 @@ public class CigarParser * @param alignmentStart * start of alignment to be used as offset when padding reads with * gaps + * @param seq + * sequence to be annotated with inserts * @return string representing read with gaps, clipping etc applied */ public String parseCigarToSequence(SAMRecord rec, - SortedMap insertions, int alignmentStart) + SortedMap insertions, int alignmentStart, + SequenceI seq) { StringBuilder newRead = new StringBuilder(); Iterator it = rec.getCigar().getCigarElements() @@ -68,11 +71,12 @@ public class CigarParser { // ERROR int pos = rec.getStart() + refnext; - System.out.println("Insertion not added to seqInserts: " + pos); + System.out.println( + "Read " + rec.getReadName() + " has an insertion at " + + pos + " which is not in seqInserts"); } else { - // we just inserted something // remove these inserts now - should be very first entry int count = seqInserts.get(rec.getStart() + refnext); @@ -85,7 +89,7 @@ public class CigarParser Iterator> iit = seqInserts.entrySet() .iterator(); - String nextSegment = applyCigarOp(el, next, rec, iit, override); + String nextSegment = applyCigarOp(el, next, rec, iit, override, seq); newRead.append(nextSegment); if (el.getOperator().consumesReferenceBases()) @@ -123,11 +127,12 @@ public class CigarParser * an optional which can override a * in it to change the length. Set to null if not * used. + * @param seq * @return */ private String applyCigarOp(CigarElement el, int next, SAMRecord rec, Iterator> it, - int[] override) + int[] override, SequenceI seq) { StringBuilder newRead = new StringBuilder(); String read = rec.getReadString(); @@ -142,7 +147,7 @@ public class CigarParser case EQ: // matched or mismatched residues newRead = applyCigarConsumeReadAndRef(next, length, rec, it, - override); + override, seq); break; case N: // intron in RNA case D: // deletion @@ -160,6 +165,8 @@ public class CigarParser // the reference sequence and other reads should have been gapped for // this insertion, so just add in the residues newRead.append(read.substring(nextPos, nextPos + length)); + seq.addSequenceFeature(new SequenceFeature("INSERTION", "", + nextPos + 1, nextPos + length, "bamfile")); break; case S: // soft clipping - just skip this bit of the read @@ -185,11 +192,12 @@ public class CigarParser * iterator over insertions in the CIGAR region * @param override * insertions which have already been accounted for + * @param seq * @return */ private StringBuilder applyCigarConsumeReadAndRef(int next, int length, SAMRecord rec, Iterator> it, - int[] override) + int[] override, SequenceI seq) { StringBuilder newRead = new StringBuilder(); String read = rec.getReadString(); @@ -270,7 +278,7 @@ public class CigarParser SAMRecord rec = it.next(); Iterator cit = rec.getCigar().getCigarElements() .iterator(); - int next = 0; + int next = 1; while (cit.hasNext()) { CigarElement el = cit.next(); @@ -278,12 +286,11 @@ public class CigarParser { case I: // add to insertions list, and move along the read - // location is 1 past the current position of next - // (note if we try to retrieve +1 position from reference by calling - // getReferencePositionAtReadPosition(next+1) we get 0 because it's an - // insertion!) - int refLocation = rec.getReferencePositionAtReadPosition(next) - + 1; + // location is the start of next CIGAR segment + // because getReferencePositionAtReadPosition returns 0 for read + // positions which are not in the reference (like insertions) + int refLocation = rec.getReferencePositionAtReadPosition( + next + el.getLength()); // if there's already an insertion at this location, keep the longest // insertion; if there's no insertion keep this one