Merge branch 'develop' into features/JAL-2909_bamImport2_11
authorJim Procter <jprocter@issues.jalview.org>
Wed, 21 Mar 2018 17:16:07 +0000 (17:16 +0000)
committerJim Procter <jprocter@issues.jalview.org>
Wed, 21 Mar 2018 17:16:07 +0000 (17:16 +0000)
19 files changed:
.classpath
lib/htsjdk-1.133.jar [deleted file]
resources/lang/Messages.properties
src/jalview/datamodel/CigarBase.java
src/jalview/datamodel/CigarParser.java [new file with mode: 0644]
src/jalview/datamodel/SeqCigar.java
src/jalview/gui/BamFileOptionsChooser.java [new file with mode: 0644]
src/jalview/io/AlignFile.java
src/jalview/io/AlignmentFileReaderI.java
src/jalview/io/AppletFormatAdapter.java
src/jalview/io/BamFile.java [new file with mode: 0644]
src/jalview/io/FileFormat.java
src/jalview/io/FileLoader.java
src/jalview/io/FileParse.java
src/jalview/io/FormatAdapter.java
src/jalview/io/IdentifyFile.java
test/jalview/datamodel/CigarParserTest.java [new file with mode: 0644]
test/jalview/datamodel/SeqCigarTest.java
utils/checkstyle/import-control.xml

index 17523b9..f4b8cf8 100644 (file)
@@ -66,7 +66,6 @@
        <classpathentry kind="con" path="org.testng.TESTNG_CONTAINER"/>
        <classpathentry kind="lib" path="lib/biojava-core-4.1.0.jar"/>
        <classpathentry kind="lib" path="lib/biojava-ontology-4.1.0.jar"/>
-       <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
        <classpathentry kind="lib" path="lib/htsjdk-2.12.0.jar"/>
        <classpathentry kind="lib" path="lib/groovy-all-2.4.12-indy.jar"/>
        <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
diff --git a/lib/htsjdk-1.133.jar b/lib/htsjdk-1.133.jar
deleted file mode 100644 (file)
index f084258..0000000
Binary files a/lib/htsjdk-1.133.jar and /dev/null differ
index 3f5aa94..703adc4 100644 (file)
@@ -1367,3 +1367,7 @@ label.most_bound_molecules = Most Bound Molecules
 label.most_polymer_residues = Most Polymer Residues
 label.cached_structures = Cached Structures
 label.free_text_search = Free Text Search
+label.bam_file_options = BAM File Options
+label.bam_chromosome = Chromosome to load:
+label.bam_range = Region to load:
+warn.bam_params_not_set = .bam file region parameters have not been set
index 4e7e03f..a44d74b 100644 (file)
@@ -191,76 +191,6 @@ public abstract class CigarBase
   }
 
   /**
-   * turn a cigar string into a series of operation range pairs
-   * 
-   * @param cigarString
-   *          String
-   * @return object[] {char[] operation, int[] range}
-   * @throws java.lang.Exception
-   *           for improperly formated cigar strings or ones with unknown
-   *           operations
-   */
-  public static Object[] parseCigarString(String cigarString)
-          throws Exception
-  {
-    int ops = 0;
-    for (int i = 0, l = cigarString.length(); i < l; i++)
-    {
-      char c = cigarString.charAt(i);
-      if (c == M || c == (M - _case_shift) || c == I
-              || c == (I - _case_shift) || c == D || c == (D - _case_shift))
-      {
-        ops++;
-      }
-    }
-    char[] operation = new char[ops];
-    int[] range = new int[ops];
-    int op = 0;
-    int i = 0, l = cigarString.length();
-    while (i < l)
-    {
-      char c;
-      int j = i;
-      do
-      {
-        c = cigarString.charAt(j++);
-      } while (c >= '0' && c <= '9' && j < l);
-      if (j >= l && c >= '0' && c <= '9')
-      {
-        throw new Exception(MessageManager
-                .getString("exception.unterminated_cigar_string"));
-      }
-      try
-      {
-        String rangeint = cigarString.substring(i, j - 1);
-        range[op] = Integer.parseInt(rangeint);
-        i = j;
-      } catch (Exception e)
-      {
-        throw new Error(MessageManager
-                .getString("error.implementation_bug_parse_cigar_string"));
-      }
-      if (c >= 'a' && c <= 'z')
-      {
-        c -= _case_shift;
-      }
-      if ((c == M || c == I || c == D))
-      {
-        operation[op++] = c;
-      }
-      else
-      {
-        throw new Exception(MessageManager.formatMessage(
-                "exception.unexpected_operation_cigar_string_pos",
-                new String[]
-                { new StringBuffer(c).toString(),
-                    Integer.valueOf(i).toString(), cigarString }));
-      }
-    }
-    return new Object[] { operation, range };
-  }
-
-  /**
    * add an operation to cigar string
    * 
    * @param op
diff --git a/src/jalview/datamodel/CigarParser.java b/src/jalview/datamodel/CigarParser.java
new file mode 100644 (file)
index 0000000..a42c2b1
--- /dev/null
@@ -0,0 +1,438 @@
+package jalview.datamodel;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.CigarOperator;
+import htsjdk.samtools.SAMRecord;
+
+public class CigarParser
+{
+  private final char gapChar;
+
+  public CigarParser(char gap)
+  {
+    gapChar = gap;
+  }
+
+  /**
+   * Apply the CIGAR string to a read sequence and return the updated read.
+   * 
+   * @param rec
+   *          the SAM record for the read
+   * @param insertions
+   *          a map of inserted positions and lengths
+   * @param alignmentStart
+   *          start of alignment to be used as offset when padding reads with
+   *          gaps
+   * @param seq
+   *          sequence to be annotated with inserts
+   * @return string representing read with gaps, clipping etc applied
+   */
+  public String parseCigarToSequence(SAMRecord rec,
+          SortedMap<Integer, Integer> insertions, int alignmentStart,
+          SequenceI seq)
+  {
+    StringBuilder newRead = new StringBuilder();
+    Iterator<CigarElement> it = rec.getCigar().getCigarElements()
+            .iterator();
+
+    int next = 0;
+    int refnext = 0;
+
+    // pad with spaces before read
+    // first count gaps needed to pad to reference position
+    // NB position is 1-based so number of gaps = pos-1
+    int gaps = rec.getStart() - alignmentStart;
+
+    // now count gaps to pad for insertions in other reads
+    int insertCount = countInsertionsBeforeRead(rec, insertions);
+    addGaps(newRead, gaps + insertCount);
+
+    int lastinserts = 0;
+    while (it.hasNext())
+    {
+      CigarElement el = it.next();
+      int length = el.getLength();
+
+      // which insertions coincide with current CIGAR operation?
+      SortedMap<Integer, Integer> seqInserts = insertions.subMap(
+              rec.getStart() + refnext, rec.getStart() + refnext + length);
+
+      int[] override = null;
+      if (lastinserts > 0)
+      {
+        if (!seqInserts.containsKey(rec.getStart() + refnext))
+        {
+          // ERROR
+          int pos = rec.getStart() + refnext;
+          System.out.println(
+                  "Read " + rec.getReadName() + " has an insertion at "
+                          + pos + " which is not in seqInserts");
+        }
+        else
+        {
+          // we just inserted something
+          // remove these inserts now - should be very first entry
+          int count = seqInserts.get(rec.getStart() + refnext);
+          override = new int[] { rec.getStart() + refnext,
+              count - lastinserts };
+          lastinserts = 0;
+        }
+      }
+
+      Iterator<Map.Entry<Integer, Integer>> iit = seqInserts.entrySet()
+              .iterator();
+
+      String nextSegment = applyCigarOp(el, next, rec, iit, override, seq);
+      newRead.append(nextSegment);
+
+      if (el.getOperator().consumesReferenceBases())
+      {
+        refnext += length;
+      }
+      if (el.getOperator().consumesReadBases())
+      {
+        next += length;
+      }
+      if (el.getOperator().equals(CigarOperator.I))
+      {
+        // count how many insertions we made
+        lastinserts = length;
+      }
+    }
+    return newRead.toString();
+  }
+
+  /**
+   * Apply a cigar operation to a segment of a read, accounting for any
+   * insertions to the reference from other reads in the region
+   * 
+   * @param el
+   *          the current CIGAR element
+   * @param next
+   *          location in read to start applying CIGAR op
+   * @param length
+   *          length of CIGAR op
+   * @param rec
+   *          SAMRecord corresponding to read
+   * @param it
+   *          iterator over insertions which coincide with rec
+   * @param override
+   *          an optional <insertion position, length> which can override a
+   *          <position,length> in it to change the length. Set to null if not
+   *          used.
+   * @param seq
+   * @return
+   */
+  private String applyCigarOp(CigarElement el, int next,
+          SAMRecord rec, Iterator<Map.Entry<Integer, Integer>> it,
+          int[] override, SequenceI seq)
+  {
+    StringBuilder newRead = new StringBuilder();
+    String read = rec.getReadString();
+
+    int length = el.getLength();
+    int nextPos = next;
+
+    switch (el.getOperator())
+    {
+    case M:
+    case X:
+    case EQ:
+      // matched or mismatched residues
+      newRead = applyCigarConsumeReadAndRef(next, length, rec, it,
+              override, seq);
+      break;
+    case N: // intron in RNA
+    case D: // deletion
+      int insertCount = 0;
+      while (it.hasNext())
+      {
+        // just count all the insertions we need to add in the deletion/intron
+        // region
+        Entry<Integer, Integer> entry = it.next();
+        insertCount += entry.getValue();
+      }
+      addGaps(newRead, length + insertCount);
+      break;
+    case I:
+      // the reference sequence and other reads should have been gapped for
+      // this insertion, so just add in the residues
+      newRead.append(read.substring(nextPos, nextPos + length));
+      seq.addSequenceFeature(new SequenceFeature("INSERTION", "",
+              nextPos + 1, nextPos + length, "bamfile"));
+      break;
+    case S:
+      // soft clipping - just skip this bit of the read
+      seq.addSequenceFeature(new SequenceFeature("SOFTCLIP", "",
+              nextPos + 1, nextPos + length, "bamfile"));
+      break;
+    case H:
+      // hard clipping - this stretch will not appear in the read
+      seq.addSequenceFeature(new SequenceFeature("HARDCLIP", "",
+              nextPos + 1, nextPos + length, "bamfile"));
+      break;
+    default:
+      break;
+    }
+
+    return newRead.toString();
+  }
+
+  /**
+   * Applies a CIGAR operation which consumes both read and reference i.e. M,=,X
+   * 
+   * @param next
+   *          next position in read
+   * @param length
+   *          length of cigar operation
+   * @param rec
+   *          SAMRecord to apply the CIGAR op to
+   * @param it
+   *          iterator over insertions in the CIGAR region
+   * @param override
+   *          insertions which have already been accounted for
+   * @param seq
+   * @return
+   */
+  private StringBuilder applyCigarConsumeReadAndRef(int next, int length,
+          SAMRecord rec, Iterator<Map.Entry<Integer, Integer>> it,
+          int[] override, SequenceI seq)
+  {
+    StringBuilder newRead = new StringBuilder();
+    String read = rec.getReadString();
+
+    int nextPos = next; // location of next position in read
+    while (nextPos < next + length)
+    {
+      int insertPos = next + length; // location of next insert in read (or end
+                                     // of read)
+      int insertLen = 0; // no of gaps to insert after insertPos
+      if (it.hasNext())
+      {
+        // account for sequence already having insertion
+        // if an override entry exists it gives the number of positions still to
+        // be inserted at this point, after accounting for insertions driven by
+        // the current read.
+        // e.g. if the current read has 2 inserted positions here, and the
+        // insertions list also has 2 inserted positions, the override will
+        // cancel the insert for this read. If the insertions list had 3
+        // insertions, the override would reduce this to 1 insertion.
+        Entry<Integer, Integer> entry = it.next();
+        int key = entry.getKey();
+        insertLen = entry.getValue();
+        if (override != null && key == override[0])
+        {
+          insertLen = override[1];
+        }
+        if (insertLen > 0)
+        {
+          insertPos = rec.getReadPositionAtReferencePosition(key) - 1;
+        }
+        else
+        {
+          insertPos = nextPos; // bugfix - trigger by override + insertions in
+                               // same M stretch
+        }
+      }
+      newRead.append(read.substring(nextPos, insertPos));
+      nextPos = insertPos;
+      addGaps(newRead, insertLen); // add insertions
+    }
+    return newRead;
+  }
+
+  /**
+   * Get list of positions inserted to the reference sequence. Note: assumes all
+   * reads are on the same chromosome.
+   * 
+   * @param it
+   *          Iterator over all the SAMRecords
+   * @return sorted map of insertion positions <position, insertion size>
+   */
+  /*
+  1234567
+  ABCDEFG insert 3,4
+  
+  R1: insert 3 = gap before 3rd res
+  AB.CDEFG
+  
+  R2: insert 4 = gap before 4th res
+  ABC.DEFG
+  
+  R3: insert 3,4 = 2 gaps before 3rd res
+  AB..CDEFG
+  
+  => AB--C-DEFG
+  
+  So need to store insertions as (pos, length) giving here (3,1),(4,1),(3,2) - then can sub longest length at position so (3,2), (4,1)
+  Then when inserting need to include previously inserted count AND determine if sequence already has an insertion or needs gap(s) added
+  
+  */
+  public SortedMap<Integer, Integer>[] getInsertions(Iterator<SAMRecord> it)
+  {
+    return getInsertions(it, null);
+  }
+
+  public int firstAlColumn = -1, firstRposCol = -1;
+
+  public SortedMap<Integer, Integer>[] getInsertions(Iterator<SAMRecord> it,
+          Range[] extent)
+  {
+    SortedMap<Integer, Integer> sinserts[] = new SortedMap[] {
+        new TreeMap<>(), new TreeMap<>() };
+    Range xten = extent != null && extent[0] != null ? extent[0] : null;
+    do
+    {
+      // check extent of read
+      SAMRecord rec = it.next();
+      if (extent != null)
+      {
+
+        int nstart = rec.getAlignmentStart();
+        if (nstart != 0)
+        {
+          nstart = rec.getReferencePositionAtReadPosition(nstart);
+        }
+        int nend = rec.getAlignmentEnd();
+        if (nend != 0)
+        {
+          nend = rec.getReferencePositionAtReadPosition(nend);
+        }
+
+        xten = new Range(
+                nstart <= 0 ? xten.start : Math.min(nstart, xten.start),
+                nend == 0 ? xten.end : Math.max(nend, xten.end));
+      }
+
+      // check each record for insertions in the CIGAR string
+
+      // pick the insert map to update
+      int insmap = rec.getReadNegativeStrandFlag() ? 1 : 0;
+      SortedMap<Integer, Integer> inserts = sinserts[insmap];
+
+      Iterator<CigarElement> cit = rec.getCigar().getCigarElements()
+              .iterator();
+      int refLocation = rec.getAlignmentStart();
+      int rloc = 0;
+      int alcol = 0;
+      int alpos = 0;
+      while (cit.hasNext())
+      {
+        CigarElement el = cit.next();
+        switch (el.getOperator())
+        {
+        case S:
+        case H:
+          // ignore soft/hardclipped
+
+          break;
+        default:
+          if (el.getOperator().consumesReadBases()
+                  && !el.getOperator().consumesReferenceBases())
+          {
+            // add to insertions list, and move along the read
+            // location is the start of next CIGAR segment
+            // because getReferencePositionAtReadPosition returns 0 for read
+            // positions which are not in the reference (like insertions)
+
+            // if there's already an insertion at this location, keep the
+            // longest
+            // insertion; if there's no insertion keep this one
+            if (!inserts.containsKey(refLocation)
+                    || (inserts.containsKey(refLocation)
+                            && inserts.get(refLocation) < el.getLength()))
+            {
+              inserts.put(refLocation, el.getLength());
+            }
+            break;
+          }
+          if (el.getOperator().consumesReferenceBases())
+          {
+            if (el.getOperator().consumesReadBases() && alcol == 0
+                    && refLocation + 1 > extent[0].start)
+            {
+              // first match position : last rloc + first match op
+              alcol = rloc + 1;
+              alpos = rec.getReferencePositionAtReadPosition(rloc + 1);
+            }
+            refLocation += el.getLength();
+          }
+
+        }
+        if (el.getOperator().consumesReadBases()
+                || el.getOperator().consumesReferenceBases())
+        {
+          rloc += el.getLength();
+        }
+      }
+      // Wrong: hack that fails to relocate reference to correct place in
+      // sequence
+      if (firstAlColumn < 0)
+      {
+        firstAlColumn = alcol;
+        firstRposCol = alpos;
+      }
+    } while (it.hasNext());
+    if (xten != null)
+    {
+      extent[0] = xten;
+    }
+    return sinserts;
+  }
+
+  /**
+   * Add sequence of gaps to a read
+   * 
+   * @param read
+   *          read to update
+   * @param length
+   *          number of gaps
+   */
+  private void addGaps(StringBuilder read, int length)
+  {
+    if (length > 0)
+    {
+      char[] gaps = new char[length];
+      Arrays.fill(gaps, gapChar);
+      read.append(gaps);
+    }
+  }
+
+  /**
+   * Get the number of insertions before the start of an aligned read (i.e.
+   * insertions in soft clipped region are counted too)
+   * 
+   * @param rec
+   *          the SAMRecord for the read
+   * @param inserts
+   *          the map of insertions
+   * @return number of insertions before the read starts
+   */
+  private int countInsertionsBeforeRead(SAMRecord rec,
+          SortedMap<Integer, Integer> inserts)
+  {
+    int gaps = 0;
+
+    // add in any insertion gaps before read
+    // although we only need to get the submap from alignmentStart, there won't
+    // be any insertions before that so we can just start at 0
+    SortedMap<Integer, Integer> seqInserts = inserts.subMap(0,
+            rec.getStart());
+
+    Iterator<Map.Entry<Integer, Integer>> it = seqInserts.entrySet()
+            .iterator();
+    while (it.hasNext())
+    {
+      Entry<Integer, Integer> entry = it.next();
+      gaps += entry.getValue();
+    }
+    return gaps;
+  }
+}
index c2a6a9c..afe10fc 100644 (file)
@@ -467,25 +467,6 @@ public class SeqCigar extends CigarSimple
   }
 
   /**
-   * Create a cigar object from a cigar string like '[<I|D|M><range>]+' Will
-   * fail if the given seq already contains gaps (JBPNote: future implementation
-   * will fix)
-   * 
-   * @param seq
-   *          SequenceI object resolvable to a dataset sequence
-   * @param cigarString
-   *          String
-   * @return Cigar
-   */
-  public static SeqCigar parseCigar(SequenceI seq, String cigarString)
-          throws Exception
-  {
-    Object[] opsandrange = parseCigarString(cigarString);
-    return new SeqCigar(seq, (char[]) opsandrange[0],
-            (int[]) opsandrange[1]);
-  }
-
-  /**
    * create an alignment from the given array of cigar sequences and gap
    * character, and marking the given segments as visible in the given
    * hiddenColumns.
diff --git a/src/jalview/gui/BamFileOptionsChooser.java b/src/jalview/gui/BamFileOptionsChooser.java
new file mode 100644 (file)
index 0000000..f456319
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui;
+
+import jalview.io.AlignmentFileReaderI;
+import jalview.io.BamFile;
+import jalview.util.MessageManager;
+
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.util.Arrays;
+
+import javax.swing.BorderFactory;
+import javax.swing.JComboBox;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JTextField;
+
+/**
+ * A dialog where a user can choose import options for a bam file
+ */
+public class BamFileOptionsChooser extends JPanel
+{
+
+  // text field for start of range
+  // JFormattedTextField startRange = new JFormattedTextField(
+  // new NumberFormatter(NumberFormat.getIntegerInstance()));
+
+  // text field for end of range
+  // JFormattedTextField endRange = new JFormattedTextField(
+  // new NumberFormatter(NumberFormat.getIntegerInstance()));
+
+  JTextField startRange = new JTextField(10);
+
+  JTextField endRange = new JTextField(10);
+
+  // combo box for chromosome list
+  JComboBox<String> chroCombo;
+
+  public BamFileOptionsChooser(AlignmentFileReaderI source)
+  {
+    Object[] preprocessResult = source.preprocess();
+    String[] chromosomes = Arrays.copyOf(preprocessResult,
+            preprocessResult.length, String[].class);
+
+    setLayout(new GridBagLayout());
+    GridBagConstraints c = new GridBagConstraints();
+
+    setBorder(BorderFactory.createEmptyBorder(0, 10, 10, 10));
+
+    // set up chromosome input
+    JLabel chroLabel = new JLabel(
+            MessageManager.getString("label.bam_chromosome"));
+    chroCombo = new JComboBox<>(chromosomes);
+    
+    // set up region input
+    JPanel range = new JPanel();
+    JLabel rangeLabel = new JLabel(
+            MessageManager.getString("label.bam_range"));
+    startRange.setColumns(8);
+    endRange.setColumns(8);
+
+    JLabel rangeSep = new JLabel("-");
+    range.add(startRange);
+    range.add(rangeSep);
+    range.add(endRange);
+
+    // chromosome label top left
+    c.gridx = 0;
+    c.gridy = 0;
+    c.anchor = GridBagConstraints.WEST;
+    c.fill = GridBagConstraints.NONE;
+    this.add(chroLabel, c);
+
+    // chromosome combo top right
+    c.gridx = 1;
+    c.weightx = 1;
+    c.anchor = GridBagConstraints.WEST;
+    c.fill = GridBagConstraints.HORIZONTAL;
+    this.add(chroCombo, c);
+
+    // region label mid left
+    c.gridx = 0;
+    c.gridy = 1;
+    c.anchor = GridBagConstraints.WEST;
+    c.fill = GridBagConstraints.NONE;
+    this.add(rangeLabel, c);
+
+    // region input mid right
+    c.gridx = 1;
+    c.weightx = 1;
+    c.anchor = GridBagConstraints.WEST;
+    c.fill = GridBagConstraints.HORIZONTAL;
+    this.add(range, c);
+  }
+
+  public void update(AlignmentFileReaderI source)
+  {
+
+    int start = Integer.parseInt(startRange.getText());
+    int end = Integer.parseInt(endRange.getText());
+    ((BamFile) source).setOptions((String) chroCombo.getSelectedItem(),
+            start, end);
+
+  }
+
+}
index 2340283..c931cb8 100755 (executable)
@@ -304,9 +304,9 @@ public abstract class AlignFile extends FileParse
    */
   protected void initData()
   {
-    seqs = new Vector<SequenceI>();
-    annotations = new Vector<AlignmentAnnotation>();
-    seqGroups = new ArrayList<SequenceGroup>();
+    seqs = new Vector<>();
+    annotations = new Vector<>();
+    seqGroups = new ArrayList<>();
     parseCalled = false;
   }
 
@@ -319,7 +319,7 @@ public abstract class AlignFile extends FileParse
   @Override
   public void setSeqs(SequenceI[] s)
   {
-    seqs = new Vector<SequenceI>();
+    seqs = new Vector<>();
 
     for (int i = 0; i < s.length; i++)
     {
@@ -390,7 +390,7 @@ public abstract class AlignFile extends FileParse
   {
     if (newickStrings == null)
     {
-      newickStrings = new Vector<String[]>();
+      newickStrings = new Vector<>();
     }
     newickStrings.addElement(new String[] { treeName, newickString });
   }
@@ -414,4 +414,13 @@ public abstract class AlignFile extends FileParse
   {
     seqs.add(seq);
   }
+
+  @Override
+  public Object[] preprocess()
+  {
+    // most AlignFiles will not need to return any preprocessing information
+    // those that do should override this method
+    return null;
+  }
+
 }
index a471d9b..fe47c8a 100644 (file)
  */
 package jalview.io;
 
-import jalview.api.AlignExportSettingI;
-import jalview.api.AlignmentViewPanel;
 import jalview.api.FeatureSettingsModelI;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceI;
 
+import java.io.IOException;
+
 public interface AlignmentFileReaderI
 {
 
@@ -47,4 +47,13 @@ public interface AlignmentFileReaderI
 
   FeatureSettingsModelI getFeatureColourScheme();
 
+  /**
+   * Get a string array resulting from preprocessing the file
+   * 
+   * @return preprocess result
+   */
+  Object[] preprocess();
+
+  public abstract void parse() throws IOException;
+
 }
index 5e209e6..da78420 100755 (executable)
@@ -147,6 +147,20 @@ public class AppletFormatAdapter
   public AlignmentI readFile(String file, DataSourceType sourceType,
           FileFormatI fileFormat) throws IOException
   {
+    if (alignFile == null)
+    {
+      prepareFileReader(file, sourceType, fileFormat);
+    }
+    else
+    {
+      alignFile.parse();
+    }
+    return buildAlignmentFromFile();
+  }
+  
+  public void prepareFileReader(String file, DataSourceType sourceType,
+          FileFormatI fileFormat) throws IOException
+  {
     this.inFile = file;
     try
     {
@@ -176,10 +190,9 @@ public class AppletFormatAdapter
       }
       else
       {
-        // alignFile = fileFormat.getAlignmentFile(inFile, sourceType);
         alignFile = fileFormat.getReader(new FileParse(inFile, sourceType));
       }
-      return buildAlignmentFromFile();
+      return;
     } catch (Exception e)
     {
       e.printStackTrace();
@@ -200,7 +213,7 @@ public class AppletFormatAdapter
           // Possible sequence is just residues with no label
           alignFile = new FastaFile(">UNKNOWN\n" + inFile,
                   DataSourceType.PASTE);
-          return buildAlignmentFromFile();
+          return;
 
         } catch (Exception ex)
         {
diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java
new file mode 100644 (file)
index 0000000..8d55c53
--- /dev/null
@@ -0,0 +1,322 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.datamodel.CigarParser;
+import jalview.datamodel.Range;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.PrimitiveIterator.OfInt;
+import java.util.SortedMap;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+
+public class BamFile extends AlignFile
+{
+  // SAM/BAM file reader
+  private SamReader fileReader;
+
+  // start position to read from
+  private int start = -1;
+
+  // end position to read to
+  private int end = -1;
+
+  // chromosome/contig to read
+  private String chromosome = "";
+
+  // first position in alignment
+  private int alignmentStart = -1;
+
+  /**
+   * Creates a new BamFile object.
+   */
+  public BamFile()
+  {
+  }
+
+  /**
+   * Creates a new BamFile object.
+   * 
+   * @param inFile
+   *          Name of file to read
+   * @param sourceType
+   *          Whether data source is file, url or other type of data source
+   * 
+   * @throws IOException
+   */
+  public BamFile(String inFile, DataSourceType sourceType)
+          throws IOException
+  {
+    super(true, inFile, sourceType);
+    final SamReaderFactory factory = SamReaderFactory.makeDefault()
+            .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
+                    SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
+            .validationStringency(ValidationStringency.SILENT);
+    fileReader = factory.open(new File(inFile));
+  }
+
+  /**
+   * Creates a new BamFile object
+   * 
+   * @param source
+   *          wrapper for datasource
+   * @throws IOException
+   */
+  public BamFile(FileParse source) throws IOException
+  {
+    super(true, source);
+    parseSuffix();
+    final SamReaderFactory factory = SamReaderFactory.makeDefault()
+            .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
+                    SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
+            .validationStringency(ValidationStringency.SILENT);
+
+    // File-based bam
+    if (source.getDataSourceType() == DataSourceType.FILE)
+    {
+      fileReader = factory.open(source.inFile);
+    }
+    else
+    {
+      // locate index ?
+      String index = source.getDataName() + ".bai";
+      fileReader = factory.open(SamInputResource.of(source.getDataName())
+              .index(new URL(index)));
+    }
+  }
+  
+  @Override
+  public String print(SequenceI[] seqs, boolean jvsuffix)
+  {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  private StringBuilder insertRefSeq(
+          SortedMap<Integer, Integer> insertions, Range xtent)
+  {
+    StringBuilder refseq = new StringBuilder();
+    int inserted = 0;
+    for (int p = xtent.start; p < xtent.end; p++)
+    {
+      refseq.append('N');
+    }
+    for (Map.Entry<Integer, Integer> insert : insertions.entrySet())
+    {
+      int inspos = insert.getKey() - xtent.start + inserted;
+      if (inspos < 0)
+      {
+        System.out.println("Ignoring -ve insert position " + insert.getKey()
+                + " of " + insert.getValue() +
+                " (alpos: " + inspos + ")");
+        inspos = 0;
+        // continue;
+      }
+      for (int i = 0, j = insert.getValue(); i < j; i++)
+      {
+        inserted++;
+        refseq.insert(inspos, '-');
+      }
+    }
+    return refseq;
+  }
+
+  private void padRef(SequenceI ref, CigarParser cig)
+  { // pad to column
+    int padding = cig.firstAlColumn - ref.findIndex(cig.firstRposCol);
+    System.out.println("Padding " + padding + " to move refseq position "
+            + cig.firstRposCol + " to " + cig.firstAlColumn + "col.");
+
+    ref.insertCharAt(0, padding, '-');
+  }
+
+  @Override
+  public void parse()
+  {
+    // only actually parse if params are set
+    if (chromosome != null && chromosome != "")
+    {
+      SAMRecordIterator it = fileReader.query(chromosome, start, end,
+              false);
+      CigarParser parser = new CigarParser('-');
+      Range[] xtent = new Range[] { new Range(start, end) };
+      SortedMap<Integer, Integer> insertions[] = parser.getInsertions(it,
+              xtent);
+      it.close();
+
+      SequenceI refSeq = new Sequence("chr:" + chromosome,
+              insertRefSeq(insertions[0], xtent[0])
+                      .toString());
+
+      refSeq.setStart(xtent[0].start);
+      refSeq.setEnd(xtent[0].end);
+      SequenceI revRefSeq = new Sequence("rev:chr:" + chromosome,
+              insertRefSeq(insertions[1], xtent[0])
+                      .toString());
+      revRefSeq.setStart(xtent[0].start);
+      revRefSeq.setEnd(xtent[0].end);
+
+      // Hack to move the seqs along
+      padRef(refSeq, parser);
+      padRef(revRefSeq, parser);
+
+      it = fileReader.query(chromosome, start, end, false);
+
+      ArrayList<SequenceI> fwd = new ArrayList(), rev = new ArrayList();
+      while (it.hasNext())
+      {
+        SAMRecord rec = it.next();
+
+        // set the alignment start to be start of first read (we assume reads
+        // are sorted)
+        if (alignmentStart == -1)
+        {
+          alignmentStart = rec.getAlignmentStart();
+        }
+
+        // make dataset sequence: start at 1, end at read length
+        SequenceI seq = new Sequence(
+                "" + (rec.getReadNegativeStrandFlag() ? "rev:" : "")
+                        + rec.getReadName(),
+                rec.getReadString().toLowerCase());
+        seq.setStart(1);
+        seq.setEnd(rec.getReadLength());
+        OfInt q = rec.getBaseQualityString().chars()
+                .iterator();
+        int p = seq.getStart();
+        while (q.hasNext())
+        {
+          seq.addSequenceFeature(new SequenceFeature("QUALITY", "", p, p,
+                  (float) q.next() - ' ', "bamfile"));
+          p++;
+        }
+        String newRead = parser.parseCigarToSequence(rec,
+                insertions[rec.getReadNegativeStrandFlag() ? 1 : 0],
+                alignmentStart, seq);
+
+        // make alignment sequences
+        SequenceI alsq = seq.deriveSequence();
+        alsq.setSequence(newRead);
+
+        // set start relative to soft clip; assume end is set by Sequence code
+        alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
+        if (rec.getReadNegativeStrandFlag())
+        {
+          rev.add(alsq);
+        }
+        else
+        {
+          fwd.add(alsq);
+        }
+      }
+      // Add forward
+      if (fwd.size() > 0)
+      {
+        seqs.add(refSeq); // FIXME needs to be optional, and properly padded
+        seqs.addAll(fwd);
+        fwd.clear();
+      }
+      // and reverse strand reads.
+      if (rev.size() > 0)
+      {
+        seqs.add(revRefSeq); // FIXME needs to be optional and properly padded
+        seqs.addAll(rev);
+        rev.clear();
+      }
+    }
+  }
+
+  /**
+   * Get the list of chromosomes or contigs from the file (listed in SQ entries
+   * in BAM file header)
+   * 
+   * @return array of chromosome/contig strings
+   */
+  @Override
+  public Object[] preprocess()
+  {
+    List<SAMSequenceRecord> refSeqs = fileReader.getFileHeader()
+            .getSequenceDictionary().getSequences();
+    List<String> chrs = new ArrayList<>();
+
+    for (SAMSequenceRecord ref : refSeqs)
+    {
+      chrs.add(ref.getSequenceName());
+    }
+    return chrs.toArray();
+  }
+
+  public void setOptions(String chr, int s, int e)
+  {
+    chromosome = chr;
+    start = s;
+    end = e;
+    suffix = chromosome + ":" + start + "-" + end;
+  }
+
+  public boolean parseSuffix()
+  {
+    if (suffix == null)
+    {
+      return false;
+    }
+    int csep = suffix.indexOf(":");
+    int rsep = suffix.indexOf("-", csep);
+    if (csep < 0 || rsep < 0 || suffix.length() - rsep <= 1)
+    {
+      return false;
+    }
+    String chr, p1, p2;
+    chr = suffix.substring(0, csep);
+    p1 = suffix.substring(csep + 1, rsep);
+    p2 = suffix.substring(rsep + 1);
+    int cstart, cend;
+    try
+    {
+      cstart = Integer.parseInt(p1);
+      cend = Integer.parseInt(p2);
+    } catch (Exception e)
+    {
+      warningMessage = (warningMessage == null ? "" : warningMessage + "\n")
+              + "Couldn't parse range from " + suffix;
+      return false;
+    }
+    chromosome = chr;
+    start = cstart;
+    end = cend;
+    return true;
+  }
+}
index 4b33dbf..21fadd2 100644 (file)
@@ -347,6 +347,27 @@ public enum FileFormat implements FileFormatI
       return true;
     }
   },
+  Bam("bam", "bam", true, true)
+  {
+    @Override
+    public AlignmentFileReaderI getReader(FileParse source)
+            throws IOException
+    {
+      return new BamFile(source);
+    }
+
+    @Override
+    public AlignmentFileWriterI getWriter(AlignmentI al)
+    {
+      return new BamFile();
+    }
+
+    @Override
+    public boolean isStructureFile()
+    {
+      return false;
+    }
+  },
   Jalview("Jalview", "jar,jvp", true, true)
   {
     @Override
index f26d6da..8eb60e5 100755 (executable)
@@ -32,6 +32,7 @@ import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceI;
 import jalview.gui.AlignFrame;
 import jalview.gui.AlignViewport;
+import jalview.gui.BamFileOptionsChooser;
 import jalview.gui.Desktop;
 import jalview.gui.Jalview2XML;
 import jalview.gui.JvOptionPane;
@@ -308,6 +309,43 @@ public class FileLoader implements Runnable
       loadtime = -System.currentTimeMillis();
       AlignmentI al = null;
 
+      if (FileFormat.Bam.equals(format))
+      {
+        FormatAdapter fa = new FormatAdapter();
+        if (source == null)
+        {
+          fa.prepareFileReader(file, protocol, format);
+          source = fa.getAlignFile();
+        }
+        if (!((BamFile) source).parseSuffix())
+        {
+          // configure a window
+          BamFileOptionsChooser bamoptions = new BamFileOptionsChooser(
+                  source);
+          // ask the user which bit of the bam they want to load
+          int confirm = JvOptionPane.showConfirmDialog(null, bamoptions,
+                  MessageManager.getString("label.bam_file_options"),
+                  JvOptionPane.OK_CANCEL_OPTION,
+                  JvOptionPane.PLAIN_MESSAGE);
+
+          if (confirm == JvOptionPane.CANCEL_OPTION
+                  || confirm == JvOptionPane.CLOSED_OPTION)
+          {
+            Desktop.instance.stopLoading();
+            return;
+          }
+          else
+          {
+            bamoptions.update(source);
+            if (file.indexOf("#") == -1)
+            {
+              file = file + "#" + ((BamFile) source).suffix;
+            }
+          }
+        }
+        al = fa.readFile(file, protocol, format);
+      }
+
       if (FileFormat.Jalview.equals(format))
       {
         if (source != null)
@@ -324,37 +362,40 @@ public class FileLoader implements Runnable
         String error = AppletFormatAdapter.getSupportedFormats();
         try
         {
-          if (source != null)
-          {
-            // read from the provided source
-            al = new FormatAdapter().readFromFile(source, format);
-          }
-          else
+          if (al == null)
           {
-
-            // open a new source and read from it
-            FormatAdapter fa = new FormatAdapter();
-            boolean downloadStructureFile = format.isStructureFile()
-                    && protocol.equals(DataSourceType.URL);
-            if (downloadStructureFile)
+            if (source != null)
             {
-              String structExt = format.getExtensions().split(",")[0];
-              String urlLeafName = file.substring(
-                      file.lastIndexOf(
-                              System.getProperty("file.separator")),
-                      file.lastIndexOf("."));
-              String tempStructureFileStr = createNamedJvTempFile(
-                      urlLeafName, structExt);
-              UrlDownloadClient.download(file, tempStructureFileStr);
-              al = fa.readFile(tempStructureFileStr, DataSourceType.FILE,
-                      format);
-              source = fa.getAlignFile();
+              // read from the provided source
+              al = new FormatAdapter().readFromFile(source, format);
             }
             else
             {
-              al = fa.readFile(file, protocol, format);
-              source = fa.getAlignFile(); // keep reference for later if
-                                          // necessary.
+
+              // open a new source and read from it
+              FormatAdapter fa = new FormatAdapter();
+              boolean downloadStructureFile = format.isStructureFile()
+                      && protocol.equals(DataSourceType.URL);
+              if (downloadStructureFile)
+              {
+                String structExt = format.getExtensions().split(",")[0];
+                String urlLeafName = file.substring(
+                        file.lastIndexOf(
+                                System.getProperty("file.separator")),
+                        file.lastIndexOf("."));
+                String tempStructureFileStr = createNamedJvTempFile(
+                        urlLeafName, structExt);
+                UrlDownloadClient.download(file, tempStructureFileStr);
+                al = fa.readFile(tempStructureFileStr, DataSourceType.FILE,
+                        format);
+                source = fa.getAlignFile();
+              }
+              else
+              {
+                al = fa.readFile(file, protocol, format);
+                source = fa.getAlignFile(); // keep reference for later if
+                                            // necessary.
+              }
             }
           }
         } catch (java.io.IOException ex)
index c0328d5..f077e21 100755 (executable)
@@ -344,7 +344,8 @@ public class FileParse
           checkURLSource(fileStr);
           if (suffixSeparator == '#')
           {
-            extractSuffix(fileStr); // URL lref is stored for later reference.
+            dataName = extractSuffix(fileStr); // URL lref is stored for later
+                                               // reference.
           }
         } catch (IOException e)
         {
index 6d3c18a..bc3a560 100755 (executable)
@@ -266,5 +266,4 @@ public class FormatAdapter extends AppletFormatAdapter
             source.getDataSourceType());
     return readFromFile(fp, format);
   }
-
 }
index ff959b0..e65101d 100755 (executable)
@@ -127,14 +127,21 @@ public class IdentifyFile
         {
           // jar files are special - since they contain all sorts of random
           // characters.
-          if (source.inFile != null)
+          if (source.inFile != null || source.getDataName() != null)
           {
-            String fileStr = source.inFile.getName();
+            String fileStr = source.inFile == null ? source.getDataName()
+                    : source.inFile.getName();
             // possibly a Jalview archive.
             if (fileStr.lastIndexOf(".jar") > -1
                     || fileStr.lastIndexOf(".zip") > -1)
             {
               reply = FileFormat.Jalview;
+              // TODO shouldn't there be a break here?
+            }
+            else if (fileStr.lastIndexOf(".bam") > -1)
+            {
+              reply = FileFormat.Bam;
+              break;
             }
           }
           if (!lineswereskipped && data.startsWith("PK"))
diff --git a/test/jalview/datamodel/CigarParserTest.java b/test/jalview/datamodel/CigarParserTest.java
new file mode 100644 (file)
index 0000000..80121ee
--- /dev/null
@@ -0,0 +1,146 @@
+package jalview.datamodel;
+
+import java.util.Iterator;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordSetBuilder;
+
+public class CigarParserTest
+{
+  @BeforeClass(alwaysRun = true)
+  public void setup()
+  {
+
+
+
+  }
+
+  @DataProvider(name = "reads")
+  public Object[][] createReadsData()
+  {
+    SortedMap<Integer, Integer> noinsertions = new TreeMap<>();
+
+    SortedMap<Integer, Integer> insertions = new TreeMap<>();
+    insertions.put(8, 3);
+    insertions.put(105, 2);
+
+    SortedMap<Integer, Integer> insertions2 = new TreeMap<>();
+    insertions2.put(11, 2);
+
+    SortedMap<Integer, Integer> insertions3 = new TreeMap<>();
+    insertions3.put(8, 3);
+    insertions3.put(105, 3);
+
+    SortedMap<Integer, Integer> insertions4 = new TreeMap<>();
+    insertions4.put(8, 3);
+    insertions4.put(105, 2);
+    insertions4.put(109, 3);
+    insertions4.put(112, 1);
+
+    String read = "CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC";
+
+    return new Object[][] { { "1S84M2I14M", read, 21,
+        "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+        insertions }, // first residue is G (accounting for C soft clip) at
+                      // position 21 + 3 (insertions at position 8)
+        { "1S84M2I14M", read, 21,
+            "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC",
+            insertions3 }, // read has 2 insertions accounted for in
+                           // insertions3, 3rd insertion is added as gap at
+                           // position 105
+        { "1S84M2I14M", read, 21,
+            "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAG---CTC-GTTGGTC",
+            insertions4 }, // 2 insertions in read accounted for at position
+                           // 105; 3 insertions at 109 and 1 insertion at 112
+        { "44M1D57M",
+        read,
+        3,
+            "--CGAAG---CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTG-AAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+        insertions },
+        { "101M",
+            read, 4,
+            "---CGAA---GCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            insertions },
+        { "6M2D76M19S",
+            "CGAAGCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTCCC",
+            4,
+            "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGA",
+            insertions2 },
+
+        { "44M1D57M",
+            read,
+            3,
+            "--CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTG-AAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            noinsertions },
+        { "101M",
+            read, 4,
+            "---CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            noinsertions },
+        { "5S96M", read, 7,
+            "------CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            noinsertions },
+        { "96M5H", read, 7,
+            "------CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGT",
+            noinsertions }, };
+  }
+
+  @Test(dataProvider = "reads", groups = { "Functional" })
+  public void testParse(String cigar, String read, int start, String result,
+          SortedMap<Integer, Integer> insertions)
+  {
+    SAMRecord rec = new SAMRecord(null);
+    rec.setCigarString(cigar);
+    rec.setReadString(read);
+    rec.setAlignmentStart(start);
+
+    CigarParser cp = new CigarParser('-');
+    String bfresult = cp.parseCigarToSequence(rec, insertions, 1);
+
+    System.out.println(result);
+    System.out.println(bfresult);
+    Assert.assertEquals(bfresult, result);
+  }
+
+  @Test(groups = { "Functional" })
+  public void testGetInsertions()
+  {
+    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+    builder.addFrag("read_1", 22, 30000, false, false,
+            "101M", "", 0);
+    builder.addFrag("read_2", 22, 28835, false, false,
+            "50M3I48M", "", 0);
+    builder.addFrag("read_3", 22, 28835, false, false, "3M1I75M2I1M", "",
+            0);
+    builder.addFrag("read_4", 22, 28865, false, false, "48M3I49M", "", 0);
+    builder.addFrag("read_5", 22, 28865, false, false, "49M3I47M2D2M", "",
+            0);
+    builder.addFrag("read_6", 22, 27000, false, false, "2M4I90M5S", "", 0);
+    builder.addFrag("read_7", 22, 27000, false, false, "2M1I98M", "", 0);
+
+    builder.addFrag("read_8", 22, 27000, false, false, "3M200N2I5M", "", 0);
+
+    Iterator<SAMRecord> it = builder.iterator();
+    CigarParser cp = new CigarParser('-');
+    SortedMap<Integer, Integer> insertions = cp.getInsertions(it);
+    Assert.assertEquals(insertions.size(), 6);
+    Assert.assertTrue(insertions.containsKey(28838));
+    Assert.assertEquals((int) insertions.get(28838), 1);
+    Assert.assertTrue(insertions.containsKey(28885));
+    Assert.assertEquals((int) insertions.get(28885), 3);
+    Assert.assertTrue(insertions.containsKey(28913));
+    Assert.assertEquals((int) insertions.get(28913), 3);
+    Assert.assertTrue(insertions.containsKey(28914));
+    Assert.assertEquals((int) insertions.get(28914), 3);
+    Assert.assertTrue(insertions.containsKey(27002));
+    Assert.assertEquals((int) insertions.get(27002), 4);
+    Assert.assertTrue(insertions.containsKey(27203));
+    Assert.assertEquals((int) insertions.get(27203), 2);
+  }
+}
index 89169d6..57f1156 100644 (file)
@@ -90,11 +90,6 @@ public class SeqCigarTest
     assertEquals("Failed to recover ungapped sequence cigar operations",
             "42M", cs_null);
     testCigar_string(s_gapped, ex_cs_gapped);
-    SeqCigar gen_sgapped = SeqCigar.parseCigar(s, ex_cs_gapped);
-    assertEquals("Failed parseCigar", ex_cs_gapped,
-            gen_sgapped.getCigarstring());
-
-    testSeqRecovery(gen_sgapped, s_gapped);
 
     /*
      * Test dataset resolution
index c47aaec..0babda8 100644 (file)
@@ -37,6 +37,7 @@
                <subpackage name="datamodel">
                <disallow pkg="jalview.gui"/>
                <allow pkg="fr.orsay.lri.varna"/>
+               <allow pkg="htsjdk"/>
                        <subpackage name="xdb.embl">
                        <allow pkg="org.exolab.castor"/>
                    </subpackage>
                <allow pkg="uk.ac.vamsas"/>
                <allow pkg="fr.orsay.lri.varna"/>
                <allow pkg="MCview"/>
+               <allow pkg="htsjdk.samtools"/>
                </subpackage>       
                                
                <subpackage name="javascript">