JAL-2909 mininal merge of bam import demo to Jalview 2.11.2 develop
[jalview.git] / test / jalview / datamodel / CigarParserTest.java
diff --git a/test/jalview/datamodel/CigarParserTest.java b/test/jalview/datamodel/CigarParserTest.java
new file mode 100644 (file)
index 0000000..476721c
--- /dev/null
@@ -0,0 +1,150 @@
+package jalview.datamodel;
+
+import java.util.Iterator;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordSetBuilder;
+
+public class CigarParserTest
+{
+  @BeforeClass(alwaysRun = true)
+  public void setup()
+  {
+
+
+
+  }
+
+  @DataProvider(name = "reads")
+  public Object[][] createReadsData()
+  {
+    SortedMap<Integer, Integer> noinsertions = new TreeMap<>();
+
+    SortedMap<Integer, Integer> insertions = new TreeMap<>();
+    insertions.put(8, 3);
+    insertions.put(105, 2);
+
+    SortedMap<Integer, Integer> insertions2 = new TreeMap<>();
+    insertions2.put(11, 2);
+
+    SortedMap<Integer, Integer> insertions3 = new TreeMap<>();
+    insertions3.put(8, 3);
+    insertions3.put(105, 3);
+
+    SortedMap<Integer, Integer> insertions4 = new TreeMap<>();
+    insertions4.put(8, 3);
+    insertions4.put(105, 2);
+    insertions4.put(109, 3);
+    insertions4.put(112, 1);
+
+    String read = "CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC";
+
+    return new Object[][] { { "1S84M2I14M", read, 21,
+        "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+        insertions }, // first residue is G (accounting for C soft clip) at
+                      // position 21 + 3 (insertions at position 8)
+        { "1S84M2I14M", read, 21,
+            "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC",
+            insertions3 }, // read has 2 insertions accounted for in
+                           // insertions3, 3rd insertion is added as gap at
+                           // position 105
+        { "1S84M2I14M", read, 21,
+            "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAG---CTC-GTTGGTC",
+            insertions4 }, // 2 insertions in read accounted for at position
+                           // 105; 3 insertions at 109 and 1 insertion at 112
+        { "44M1D57M",
+        read,
+        3,
+            "--CGAAG---CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTG-AAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+        insertions },
+        { "101M",
+            read, 4,
+            "---CGAA---GCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            insertions },
+        { "6M2D76M19S",
+            "CGAAGCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTCCC",
+            4,
+            "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGA",
+            insertions2 },
+
+        { "44M1D57M",
+            read,
+            3,
+            "--CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTG-AAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            noinsertions },
+        { "101M",
+            read, 4,
+            "---CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            noinsertions },
+        { "5S96M", read, 7,
+            "------CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+            noinsertions },
+        { "96M5H", read, 7,
+            "------CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGT",
+            noinsertions }, };
+  }
+
+  @Test(dataProvider = "reads", groups = { "Functional" })
+  public void testParse(String cigar, String read, int start, String result,
+          SortedMap<Integer, Integer> insertions)
+  {
+    SAMRecord rec = new SAMRecord(null);
+    rec.setCigarString(cigar);
+    rec.setReadString(read);
+    rec.setAlignmentStart(start);
+
+    CigarParser cp = new CigarParser('-');
+    String bfresult = cp.parseCigarToSequence(rec, insertions, 1, null);
+
+    System.out.println(result);
+    System.out.println(bfresult);
+    Assert.assertEquals(bfresult, result);
+  }
+
+  @Test(groups = { "Functional" })
+  public void testGetInsertions()
+  {
+    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+    builder.addFrag("read_1", 22, 30000, false, false,
+            "101M", "", 0);
+    builder.addFrag("read_2", 22, 28835, false, false,
+            "50M3I48M", "", 0);
+    builder.addFrag("read_3", 22, 28835, false, false, "3M1I75M2I1M", "",
+            0);
+    builder.addFrag("read_4", 22, 28865, false, false, "48M3I49M", "", 0);
+    builder.addFrag("read_5", 22, 28865, false, false, "49M3I47M2D2M", "",
+            0);
+    builder.addFrag("read_6", 22, 27000, false, false, "2M4I90M5S", "", 0);
+    builder.addFrag("read_7", 22, 27000, false, false, "2M1I98M", "", 0);
+
+    builder.addFrag("read_8", 22, 27000, false, false, "3M200N2I5M", "", 0);
+
+    Iterator<SAMRecord> it = builder.iterator();
+    CigarParser cp = new CigarParser('-');
+    Range xtent[] = new Range[] { new Range(0, 0) };
+    SortedMap<Integer, Integer> insertions,
+            insertStrands[] = cp.getInsertions(it, xtent);
+    Assert.assertEquals(insertStrands.length, 2);
+    insertions = insertStrands[0];
+    Assert.assertEquals(insertions.size(), 6);
+    Assert.assertTrue(insertions.containsKey(28838));
+    Assert.assertEquals((int) insertions.get(28838), 1);
+    Assert.assertTrue(insertions.containsKey(28885));
+    Assert.assertEquals((int) insertions.get(28885), 3);
+    Assert.assertTrue(insertions.containsKey(28913));
+    Assert.assertEquals((int) insertions.get(28913), 3);
+    Assert.assertTrue(insertions.containsKey(28914));
+    Assert.assertEquals((int) insertions.get(28914), 3);
+    Assert.assertTrue(insertions.containsKey(27002));
+    Assert.assertEquals((int) insertions.get(27002), 4);
+    Assert.assertTrue(insertions.containsKey(27203));
+    Assert.assertEquals((int) insertions.get(27203), 2);
+  }
+}