package jalview.datamodel; import java.util.Iterator; import java.util.SortedMap; import java.util.TreeMap; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; public class CigarParserTest { @BeforeClass(alwaysRun = true) public void setup() { } @DataProvider(name = "reads") public Object[][] createReadsData() { SortedMap noinsertions = new TreeMap<>(); SortedMap insertions = new TreeMap<>(); insertions.put(8, 3); insertions.put(105, 2); SortedMap insertions2 = new TreeMap<>(); insertions2.put(11, 2); SortedMap insertions3 = new TreeMap<>(); insertions3.put(8, 3); insertions3.put(105, 3); SortedMap insertions4 = new TreeMap<>(); insertions4.put(8, 3); insertions4.put(105, 2); insertions4.put(109, 3); insertions4.put(112, 1); String read = "CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC"; return new Object[][] { { "1S84M2I14M", read, 21, "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", insertions }, // first residue is G (accounting for C soft clip) at // position 21 + 3 (insertions at position 8) { "1S84M2I14M", read, 21, "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC", insertions3 }, // read has 2 insertions accounted for in // insertions3, 3rd insertion is added as gap at // position 105 { "1S84M2I14M", read, 21, "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAG---CTC-GTTGGTC", insertions4 }, // 2 insertions in read accounted for at position // 105; 3 insertions at 109 and 1 insertion at 112 { "44M1D57M", read, 3, "--CGAAG---CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTG-AAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", insertions }, { "101M", read, 4, "---CGAA---GCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", insertions }, { "6M2D76M19S", "CGAAGCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTCCC", 4, "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGA", insertions2 }, { "44M1D57M", read, 3, "--CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTG-AAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", noinsertions }, { "101M", read, 4, "---CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", noinsertions }, { "5S96M", read, 7, "------CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", noinsertions }, { "96M5H", read, 7, "------CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGT", noinsertions }, }; } @Test(dataProvider = "reads", groups = { "Functional" }) public void testParse(String cigar, String read, int start, String result, SortedMap insertions) { SAMRecord rec = new SAMRecord(null); rec.setCigarString(cigar); rec.setReadString(read); rec.setAlignmentStart(start); CigarParser cp = new CigarParser('-'); String bfresult = cp.parseCigarToSequence(rec, insertions, 1); System.out.println(result); System.out.println(bfresult); Assert.assertEquals(bfresult, result); } @Test(groups = { "Functional" }) public void testGetInsertions() { final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); builder.addFrag("read_1", 22, 30000, false, false, "101M", "", 0); builder.addFrag("read_2", 22, 28835, false, false, "50M3I48M", "", 0); builder.addFrag("read_3", 22, 28835, false, false, "3M1I75M2I1M", "", 0); builder.addFrag("read_4", 22, 28865, false, false, "48M3I49M", "", 0); builder.addFrag("read_5", 22, 28865, false, false, "49M3I47M2D2M", "", 0); builder.addFrag("read_6", 22, 27000, false, false, "2M4I90M5S", "", 0); builder.addFrag("read_7", 22, 27000, false, false, "2M1I98M", "", 0); builder.addFrag("read_8", 22, 27000, false, false, "3M200N2I5M", "", 0); Iterator it = builder.iterator(); CigarParser cp = new CigarParser('-'); SortedMap insertions = cp.getInsertions(it); Assert.assertEquals(insertions.size(), 6); Assert.assertTrue(insertions.containsKey(28838)); Assert.assertEquals((int) insertions.get(28838), 1); Assert.assertTrue(insertions.containsKey(28885)); Assert.assertEquals((int) insertions.get(28885), 3); Assert.assertTrue(insertions.containsKey(28913)); Assert.assertEquals((int) insertions.get(28913), 3); Assert.assertTrue(insertions.containsKey(28914)); Assert.assertEquals((int) insertions.get(28914), 3); Assert.assertTrue(insertions.containsKey(27002)); Assert.assertEquals((int) insertions.get(27002), 4); Assert.assertTrue(insertions.containsKey(27203)); Assert.assertEquals((int) insertions.get(27203), 2); } }