From 0b0b3d3687479204da2d199042c7bc90f3a6fd43 Mon Sep 17 00:00:00 2001 From: kiramt Date: Tue, 27 Feb 2018 09:16:13 +0000 Subject: [PATCH] JAL-2909 Tidy up bam file cigar code --- src/jalview/datamodel/CigarParser.java | 63 +++++++++++++++------------ src/jalview/gui/BamFileOptionsChooser.java | 31 +++++++++++++ src/jalview/io/BamFile.java | 24 +++++----- src/jalview/io/FileLoader.java | 5 +++ test/jalview/datamodel/CigarParserTest.java | 8 ++-- 5 files changed, 86 insertions(+), 45 deletions(-) create mode 100644 src/jalview/gui/BamFileOptionsChooser.java diff --git a/src/jalview/datamodel/CigarParser.java b/src/jalview/datamodel/CigarParser.java index ee0d523..95f1f2d 100644 --- a/src/jalview/datamodel/CigarParser.java +++ b/src/jalview/datamodel/CigarParser.java @@ -15,9 +15,9 @@ public class CigarParser { private final char gapChar; - public CigarParser(char gapCharacter) + public CigarParser(char gap) { - gapChar = gapCharacter; + gapChar = gap; } /** @@ -32,20 +32,21 @@ public class CigarParser public String parseCigarToSequence(SAMRecord rec, SortedMap insertions) { + StringBuilder newRead = new StringBuilder(); Iterator it = rec.getCigar().getCigarElements() .iterator(); - StringBuilder newRead = new StringBuilder(); int next = 0; int refnext = 0; // pad with spaces before read // first count gaps needed to pad to reference position // NB position is 1-based so number of gaps = pos-1 - int gaps = rec.getUnclippedStart() - 1; + int gaps = rec.getStart() - 1; // rec.getUnclippedStart() - 1; // now count gaps to pad for insertions in other reads int insertCount = countInsertionsBeforeRead(rec, insertions); addGaps(newRead, gaps + insertCount); + // addGaps(newTrimmedRead, gaps + insertCount); int lastinserts = 0; while (it.hasNext()) @@ -60,18 +61,29 @@ public class CigarParser int[] override = null; if (lastinserts > 0) { - // we just inserted something - // remove these inserts now - should be very first entry - int count = seqInserts.get(rec.getStart() + refnext); - override = new int[] { rec.getStart() + refnext, - count - lastinserts }; - lastinserts = 0; + if (!seqInserts.containsKey(rec.getStart() + refnext)) + { + // ERROR + int pos = rec.getStart() + refnext; + System.out.println("Insertion not added to seqInserts: " + pos); + } + else + { + + // we just inserted something + // remove these inserts now - should be very first entry + int count = seqInserts.get(rec.getStart() + refnext); + override = new int[] { rec.getStart() + refnext, + count - lastinserts }; + lastinserts = 0; + } } Iterator> iit = seqInserts.entrySet() .iterator(); - newRead.append(applyCigarOp(el, next, rec, iit, override)); + String nextSegment = applyCigarOp(el, next, rec, iit, override); + newRead.append(nextSegment); if (el.getOperator().consumesReferenceBases()) { @@ -141,20 +153,13 @@ public class CigarParser } addGaps(newRead, length + insertCount); break; - case S: - // soft clipping - just skip this bit of the read - // do nothing - - newRead.append( - read.substring(nextPos, nextPos + length).toLowerCase()); - // nextPos += length; - break; case I: // the reference sequence and other reads should have been gapped for // this insertion, so just add in the residues newRead.append(read.substring(nextPos, nextPos + length)); - // nextPos += length; break; + case S: + // soft clipping - just skip this bit of the read case H: // hard clipping - this stretch will not appear in the read default: @@ -249,7 +254,7 @@ public class CigarParser */ public SortedMap getInsertions(Iterator it) { - SortedMap insertions = new TreeMap<>(); + SortedMap inserts = new TreeMap<>(); while (it.hasNext()) { // check each record for insertions in the CIGAR string @@ -273,11 +278,11 @@ public class CigarParser // if there's already an insertion at this location, keep the longest // insertion; if there's no insertion keep this one - if (!insertions.containsKey(refLocation) - || (insertions.containsKey(refLocation) - && insertions.get(refLocation) < el.getLength())) + if (!inserts.containsKey(refLocation) + || (inserts.containsKey(refLocation) + && inserts.get(refLocation) < el.getLength())) { - insertions.put(refLocation, el.getLength()); + inserts.put(refLocation, el.getLength()); } next += el.getLength(); break; @@ -293,7 +298,7 @@ public class CigarParser } } - return insertions; + return inserts; } /** @@ -320,18 +325,18 @@ public class CigarParser * * @param rec * the SAMRecord for the read - * @param insertions + * @param inserts * the map of insertions * @return number of insertions before the read starts */ private int countInsertionsBeforeRead(SAMRecord rec, - SortedMap insertions) + SortedMap inserts) { int gaps = 0; // add in any insertion gaps before read // TODO start point should be start of alignment not 0 - SortedMap seqInserts = insertions.subMap(0, + SortedMap seqInserts = inserts.subMap(0, rec.getStart()); Iterator> it = seqInserts.entrySet() diff --git a/src/jalview/gui/BamFileOptionsChooser.java b/src/jalview/gui/BamFileOptionsChooser.java new file mode 100644 index 0000000..7f9583f --- /dev/null +++ b/src/jalview/gui/BamFileOptionsChooser.java @@ -0,0 +1,31 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.gui; + +import javax.swing.JPanel; + +/** + * A dialog where a user can choose import options for a bam file + */ +public class BamFileOptionsChooser extends JPanel +{ + +} diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java index 0569692..26c14a2 100644 --- a/src/jalview/io/BamFile.java +++ b/src/jalview/io/BamFile.java @@ -90,8 +90,8 @@ public class BamFile extends AlignFile @Override public void parse() throws IOException { - CigarParser parser = new CigarParser('-'); SAMRecordIterator it = fileReader.iterator(); + CigarParser parser = new CigarParser('-'); SortedMap insertions = parser.getInsertions(it); it.close(); @@ -99,22 +99,22 @@ public class BamFile extends AlignFile while (it.hasNext()) { SAMRecord rec = it.next(); - int start = rec.getStart(); - int end = rec.getEnd(); - SequenceI seq = new Sequence(rec.getReadName(), rec.getReadString()); + // make dataset sequence: start at 1, end at read length + SequenceI seq = new Sequence(rec.getReadName(), + rec.getReadString().toLowerCase()); + seq.setStart(1); + seq.setEnd(rec.getReadLength()); - String cigarredRead = parser.parseCigarToSequence(rec, insertions); + String newRead = parser.parseCigarToSequence(rec, insertions); + // make alignment sequences SequenceI alsq = seq.deriveSequence(); - alsq.setSequence(cigarredRead); - alsq.setStart(start); - alsq.setEnd(end); + alsq.setSequence(newRead); + + // set start relative to soft clip; assume end is set by Sequence code + alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1); seqs.add(alsq); } } - - - - } diff --git a/src/jalview/io/FileLoader.java b/src/jalview/io/FileLoader.java index f26d6da..11432bd 100755 --- a/src/jalview/io/FileLoader.java +++ b/src/jalview/io/FileLoader.java @@ -308,6 +308,11 @@ public class FileLoader implements Runnable loadtime = -System.currentTimeMillis(); AlignmentI al = null; + if (FileFormat.Bam.equals(format)) + { + // ask the user which bit of the bam they want to load + } + if (FileFormat.Jalview.equals(format)) { if (source != null) diff --git a/test/jalview/datamodel/CigarParserTest.java b/test/jalview/datamodel/CigarParserTest.java index 84a432e..8df02e3 100644 --- a/test/jalview/datamodel/CigarParserTest.java +++ b/test/jalview/datamodel/CigarParserTest.java @@ -41,10 +41,10 @@ public class CigarParserTest String read = "CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC"; return new Object[][] { { "1S84M2I14M", read, 21, - "----------------------cGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", + "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", insertions }, { "1S84M2I14M", read, 21, - "----------------------cGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC", + "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC", insertions3 }, { "44M1D57M", @@ -59,7 +59,7 @@ public class CigarParserTest { "6M2D76M19S", "CGAAGCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTCCC", 4, - "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGAcgaggagctcgttggtccc", + "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGA", insertions2 }, { "44M1D57M", @@ -72,7 +72,7 @@ public class CigarParserTest "---CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", noinsertions }, { "5S96M", read, 7, - "-cgaagCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", + "------CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC", noinsertions }, { "96M5H", read, 7, "------CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGT", -- 1.7.10.2