JAL-2909 add sequence feature for insertions
authorJim Procter <jprocter@issues.jalview.org>
Fri, 2 Mar 2018 10:34:27 +0000 (10:34 +0000)
committerJim Procter <jprocter@issues.jalview.org>
Fri, 2 Mar 2018 10:34:27 +0000 (10:34 +0000)
src/jalview/datamodel/CigarParser.java
src/jalview/io/BamFile.java

index 0abe9bf..cfbe842 100644 (file)
@@ -30,10 +30,13 @@ public class CigarParser
    * @param alignmentStart
    *          start of alignment to be used as offset when padding reads with
    *          gaps
+   * @param seq
+   *          sequence to be annotated with inserts
    * @return string representing read with gaps, clipping etc applied
    */
   public String parseCigarToSequence(SAMRecord rec,
-          SortedMap<Integer, Integer> insertions, int alignmentStart)
+          SortedMap<Integer, Integer> insertions, int alignmentStart,
+          SequenceI seq)
   {
     StringBuilder newRead = new StringBuilder();
     Iterator<CigarElement> it = rec.getCigar().getCigarElements()
@@ -86,7 +89,7 @@ public class CigarParser
       Iterator<Map.Entry<Integer, Integer>> iit = seqInserts.entrySet()
               .iterator();
 
-      String nextSegment = applyCigarOp(el, next, rec, iit, override);
+      String nextSegment = applyCigarOp(el, next, rec, iit, override, seq);
       newRead.append(nextSegment);
 
       if (el.getOperator().consumesReferenceBases())
@@ -124,11 +127,12 @@ public class CigarParser
    *          an optional <insertion position, length> which can override a
    *          <position,length> in it to change the length. Set to null if not
    *          used.
+   * @param seq
    * @return
    */
   private String applyCigarOp(CigarElement el, int next,
           SAMRecord rec, Iterator<Map.Entry<Integer, Integer>> it,
-          int[] override)
+          int[] override, SequenceI seq)
   {
     StringBuilder newRead = new StringBuilder();
     String read = rec.getReadString();
@@ -143,7 +147,7 @@ public class CigarParser
     case EQ:
       // matched or mismatched residues
       newRead = applyCigarConsumeReadAndRef(next, length, rec, it,
-              override);
+              override, seq);
       break;
     case N: // intron in RNA
     case D: // deletion
@@ -161,6 +165,8 @@ public class CigarParser
       // the reference sequence and other reads should have been gapped for
       // this insertion, so just add in the residues
       newRead.append(read.substring(nextPos, nextPos + length));
+      seq.addSequenceFeature(new SequenceFeature("INSERTION", "",
+              nextPos + 1, nextPos + length, "bamfile"));
       break;
     case S:
       // soft clipping - just skip this bit of the read
@@ -186,11 +192,12 @@ public class CigarParser
    *          iterator over insertions in the CIGAR region
    * @param override
    *          insertions which have already been accounted for
+   * @param seq
    * @return
    */
   private StringBuilder applyCigarConsumeReadAndRef(int next, int length,
           SAMRecord rec, Iterator<Map.Entry<Integer, Integer>> it,
-          int[] override)
+          int[] override, SequenceI seq)
   {
     StringBuilder newRead = new StringBuilder();
     String read = rec.getReadString();
index 277a61d..10e2d0e 100644 (file)
@@ -139,7 +139,7 @@ public class BamFile extends AlignFile
         seq.setEnd(rec.getReadLength());
 
         String newRead = parser.parseCigarToSequence(rec, insertions,
-                alignmentStart);
+                alignmentStart, seq);
 
         // make alignment sequences
         SequenceI alsq = seq.deriveSequence();