JAL-2909 Tidy up bam file cigar code
authorkiramt <k.mourao@dundee.ac.uk>
Tue, 27 Feb 2018 09:16:13 +0000 (09:16 +0000)
committerkiramt <k.mourao@dundee.ac.uk>
Tue, 27 Feb 2018 09:16:13 +0000 (09:16 +0000)
src/jalview/datamodel/CigarParser.java
src/jalview/gui/BamFileOptionsChooser.java [new file with mode: 0644]
src/jalview/io/BamFile.java
src/jalview/io/FileLoader.java
test/jalview/datamodel/CigarParserTest.java

index ee0d523..95f1f2d 100644 (file)
@@ -15,9 +15,9 @@ public class CigarParser
 {
   private final char gapChar;
 
-  public CigarParser(char gapCharacter)
+  public CigarParser(char gap)
   {
-    gapChar = gapCharacter;
+    gapChar = gap;
   }
 
   /**
@@ -32,20 +32,21 @@ public class CigarParser
   public String parseCigarToSequence(SAMRecord rec,
           SortedMap<Integer, Integer> insertions)
   {
+    StringBuilder newRead = new StringBuilder();
     Iterator<CigarElement> it = rec.getCigar().getCigarElements()
             .iterator();
 
-    StringBuilder newRead = new StringBuilder();
     int next = 0;
     int refnext = 0;
 
     // pad with spaces before read
     // first count gaps needed to pad to reference position
     // NB position is 1-based so number of gaps = pos-1
-    int gaps = rec.getUnclippedStart() - 1;
+    int gaps = rec.getStart() - 1; // rec.getUnclippedStart() - 1;
     // now count gaps to pad for insertions in other reads
     int insertCount = countInsertionsBeforeRead(rec, insertions);
     addGaps(newRead, gaps + insertCount);
+    // addGaps(newTrimmedRead, gaps + insertCount);
 
     int lastinserts = 0;
     while (it.hasNext())
@@ -60,18 +61,29 @@ public class CigarParser
       int[] override = null;
       if (lastinserts > 0)
       {
-        // we just inserted something
-        // remove these inserts now - should be very first entry
-        int count = seqInserts.get(rec.getStart() + refnext);
-        override = new int[] { rec.getStart() + refnext,
-            count - lastinserts };
-        lastinserts = 0;
+        if (!seqInserts.containsKey(rec.getStart() + refnext))
+        {
+          // ERROR
+          int pos = rec.getStart() + refnext;
+          System.out.println("Insertion not added to seqInserts: " + pos);
+        }
+        else
+        {
+
+          // we just inserted something
+          // remove these inserts now - should be very first entry
+          int count = seqInserts.get(rec.getStart() + refnext);
+          override = new int[] { rec.getStart() + refnext,
+              count - lastinserts };
+          lastinserts = 0;
+        }
       }
 
       Iterator<Map.Entry<Integer, Integer>> iit = seqInserts.entrySet()
               .iterator();
 
-      newRead.append(applyCigarOp(el, next, rec, iit, override));
+      String nextSegment = applyCigarOp(el, next, rec, iit, override);
+      newRead.append(nextSegment);
 
       if (el.getOperator().consumesReferenceBases())
       {
@@ -141,20 +153,13 @@ public class CigarParser
       }
       addGaps(newRead, length + insertCount);
       break;
-    case S:
-      // soft clipping - just skip this bit of the read
-      // do nothing
-
-      newRead.append(
-              read.substring(nextPos, nextPos + length).toLowerCase());
-      // nextPos += length;
-      break;
     case I:
       // the reference sequence and other reads should have been gapped for
       // this insertion, so just add in the residues
       newRead.append(read.substring(nextPos, nextPos + length));
-      // nextPos += length;
       break;
+    case S:
+      // soft clipping - just skip this bit of the read
     case H:
       // hard clipping - this stretch will not appear in the read
     default:
@@ -249,7 +254,7 @@ public class CigarParser
   */
   public SortedMap<Integer, Integer> getInsertions(Iterator<SAMRecord> it)
   {
-    SortedMap<Integer, Integer> insertions = new TreeMap<>();
+    SortedMap<Integer, Integer> inserts = new TreeMap<>();
     while (it.hasNext())
     {
       // check each record for insertions in the CIGAR string
@@ -273,11 +278,11 @@ public class CigarParser
 
           // if there's already an insertion at this location, keep the longest
           // insertion; if there's no insertion keep this one
-          if (!insertions.containsKey(refLocation)
-                  || (insertions.containsKey(refLocation)
-                          && insertions.get(refLocation) < el.getLength()))
+          if (!inserts.containsKey(refLocation)
+                  || (inserts.containsKey(refLocation)
+                          && inserts.get(refLocation) < el.getLength()))
           {
-            insertions.put(refLocation, el.getLength());
+            inserts.put(refLocation, el.getLength());
           }
           next += el.getLength();
           break;
@@ -293,7 +298,7 @@ public class CigarParser
       }
 
     }
-    return insertions;
+    return inserts;
   }
 
   /**
@@ -320,18 +325,18 @@ public class CigarParser
    * 
    * @param rec
    *          the SAMRecord for the read
-   * @param insertions
+   * @param inserts
    *          the map of insertions
    * @return number of insertions before the read starts
    */
   private int countInsertionsBeforeRead(SAMRecord rec,
-          SortedMap<Integer, Integer> insertions)
+          SortedMap<Integer, Integer> inserts)
   {
     int gaps = 0;
 
     // add in any insertion gaps before read
     // TODO start point should be start of alignment not 0
-    SortedMap<Integer, Integer> seqInserts = insertions.subMap(0,
+    SortedMap<Integer, Integer> seqInserts = inserts.subMap(0,
             rec.getStart());
 
     Iterator<Map.Entry<Integer, Integer>> it = seqInserts.entrySet()
diff --git a/src/jalview/gui/BamFileOptionsChooser.java b/src/jalview/gui/BamFileOptionsChooser.java
new file mode 100644 (file)
index 0000000..7f9583f
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui;
+
+import javax.swing.JPanel;
+
+/**
+ * A dialog where a user can choose import options for a bam file
+ */
+public class BamFileOptionsChooser extends JPanel
+{
+
+}
index 0569692..26c14a2 100644 (file)
@@ -90,8 +90,8 @@ public class BamFile extends AlignFile
   @Override
   public void parse() throws IOException
   {
-    CigarParser parser = new CigarParser('-');
     SAMRecordIterator it = fileReader.iterator();
+    CigarParser parser = new CigarParser('-');
     SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
     it.close();
 
@@ -99,22 +99,22 @@ public class BamFile extends AlignFile
     while (it.hasNext())
     {
       SAMRecord rec = it.next();
-      int start = rec.getStart();
-      int end = rec.getEnd();
 
-      SequenceI seq = new Sequence(rec.getReadName(), rec.getReadString());
+      // make dataset sequence: start at 1, end at read length
+      SequenceI seq = new Sequence(rec.getReadName(),
+              rec.getReadString().toLowerCase());
+      seq.setStart(1);
+      seq.setEnd(rec.getReadLength());
 
-      String cigarredRead = parser.parseCigarToSequence(rec, insertions);
+      String newRead = parser.parseCigarToSequence(rec, insertions);
 
+      // make alignment sequences
       SequenceI alsq = seq.deriveSequence();
-      alsq.setSequence(cigarredRead);
-      alsq.setStart(start);
-      alsq.setEnd(end);
+      alsq.setSequence(newRead);
+
+      // set start relative to soft clip; assume end is set by Sequence code
+      alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
       seqs.add(alsq);
     }
   }
-
-
-  
-
 }
index f26d6da..11432bd 100755 (executable)
@@ -308,6 +308,11 @@ public class FileLoader implements Runnable
       loadtime = -System.currentTimeMillis();
       AlignmentI al = null;
 
+      if (FileFormat.Bam.equals(format))
+      {
+        // ask the user which bit of the bam they want to load
+      }
+
       if (FileFormat.Jalview.equals(format))
       {
         if (source != null)
index 84a432e..8df02e3 100644 (file)
@@ -41,10 +41,10 @@ public class CigarParserTest
     String read = "CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC";
 
     return new Object[][] { { "1S84M2I14M", read, 21,
-        "----------------------cGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+        "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
         insertions },
         { "1S84M2I14M", read, 21,
-            "----------------------cGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC",
+            "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC",
             insertions3 },
 
         { "44M1D57M",
@@ -59,7 +59,7 @@ public class CigarParserTest
         { "6M2D76M19S",
             "CGAAGCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTCCC",
             4,
-            "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGAcgaggagctcgttggtccc",
+            "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGA",
             insertions2 },
 
         { "44M1D57M",
@@ -72,7 +72,7 @@ public class CigarParserTest
             "---CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
             noinsertions },
         { "5S96M", read, 7,
-            "-cgaagCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            "------CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
             noinsertions },
         { "96M5H", read, 7,
             "------CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGT",