From 846629a1a7ab7715d14a18296bc3024c5de4ac5c Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Fri, 2 Mar 2018 10:34:27 +0000 Subject: [PATCH] JAL-2909 add sequence feature for insertions --- src/jalview/datamodel/CigarParser.java | 17 ++++++++++++----- src/jalview/io/BamFile.java | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/jalview/datamodel/CigarParser.java b/src/jalview/datamodel/CigarParser.java index 0abe9bf..cfbe842 100644 --- a/src/jalview/datamodel/CigarParser.java +++ b/src/jalview/datamodel/CigarParser.java @@ -30,10 +30,13 @@ public class CigarParser * @param alignmentStart * start of alignment to be used as offset when padding reads with * gaps + * @param seq + * sequence to be annotated with inserts * @return string representing read with gaps, clipping etc applied */ public String parseCigarToSequence(SAMRecord rec, - SortedMap insertions, int alignmentStart) + SortedMap insertions, int alignmentStart, + SequenceI seq) { StringBuilder newRead = new StringBuilder(); Iterator it = rec.getCigar().getCigarElements() @@ -86,7 +89,7 @@ public class CigarParser Iterator> iit = seqInserts.entrySet() .iterator(); - String nextSegment = applyCigarOp(el, next, rec, iit, override); + String nextSegment = applyCigarOp(el, next, rec, iit, override, seq); newRead.append(nextSegment); if (el.getOperator().consumesReferenceBases()) @@ -124,11 +127,12 @@ public class CigarParser * an optional which can override a * in it to change the length. Set to null if not * used. + * @param seq * @return */ private String applyCigarOp(CigarElement el, int next, SAMRecord rec, Iterator> it, - int[] override) + int[] override, SequenceI seq) { StringBuilder newRead = new StringBuilder(); String read = rec.getReadString(); @@ -143,7 +147,7 @@ public class CigarParser case EQ: // matched or mismatched residues newRead = applyCigarConsumeReadAndRef(next, length, rec, it, - override); + override, seq); break; case N: // intron in RNA case D: // deletion @@ -161,6 +165,8 @@ public class CigarParser // the reference sequence and other reads should have been gapped for // this insertion, so just add in the residues newRead.append(read.substring(nextPos, nextPos + length)); + seq.addSequenceFeature(new SequenceFeature("INSERTION", "", + nextPos + 1, nextPos + length, "bamfile")); break; case S: // soft clipping - just skip this bit of the read @@ -186,11 +192,12 @@ public class CigarParser * iterator over insertions in the CIGAR region * @param override * insertions which have already been accounted for + * @param seq * @return */ private StringBuilder applyCigarConsumeReadAndRef(int next, int length, SAMRecord rec, Iterator> it, - int[] override) + int[] override, SequenceI seq) { StringBuilder newRead = new StringBuilder(); String read = rec.getReadString(); diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java index 277a61d..10e2d0e 100644 --- a/src/jalview/io/BamFile.java +++ b/src/jalview/io/BamFile.java @@ -139,7 +139,7 @@ public class BamFile extends AlignFile seq.setEnd(rec.getReadLength()); String newRead = parser.parseCigarToSequence(rec, insertions, - alignmentStart); + alignmentStart, seq); // make alignment sequences SequenceI alsq = seq.deriveSequence(); -- 1.7.10.2