From 7798bd6a0235eb86769bb9987db735a3959ad031 Mon Sep 17 00:00:00 2001 From: kiramt Date: Wed, 28 Feb 2018 16:39:49 +0000 Subject: [PATCH] JAL-2909 Gaps at start of reads only from start of alignment window --- src/jalview/datamodel/CigarParser.java | 12 ++++++++---- src/jalview/io/BamFile.java | 13 ++++++++++++- test/jalview/datamodel/CigarParserTest.java | 2 +- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/jalview/datamodel/CigarParser.java b/src/jalview/datamodel/CigarParser.java index 95f1f2d..786896b 100644 --- a/src/jalview/datamodel/CigarParser.java +++ b/src/jalview/datamodel/CigarParser.java @@ -27,10 +27,13 @@ public class CigarParser * the SAM record for the read * @param insertions * a map of inserted positions and lengths + * @param alignmentStart + * start of alignment to be used as offset when padding reads with + * gaps * @return string representing read with gaps, clipping etc applied */ public String parseCigarToSequence(SAMRecord rec, - SortedMap insertions) + SortedMap insertions, int alignmentStart) { StringBuilder newRead = new StringBuilder(); Iterator it = rec.getCigar().getCigarElements() @@ -42,11 +45,11 @@ public class CigarParser // pad with spaces before read // first count gaps needed to pad to reference position // NB position is 1-based so number of gaps = pos-1 - int gaps = rec.getStart() - 1; // rec.getUnclippedStart() - 1; + int gaps = rec.getStart() - alignmentStart; + // now count gaps to pad for insertions in other reads int insertCount = countInsertionsBeforeRead(rec, insertions); addGaps(newRead, gaps + insertCount); - // addGaps(newTrimmedRead, gaps + insertCount); int lastinserts = 0; while (it.hasNext()) @@ -335,7 +338,8 @@ public class CigarParser int gaps = 0; // add in any insertion gaps before read - // TODO start point should be start of alignment not 0 + // although we only need to get the submap from alignmentStart, there won't + // be any insertions before that so we can just start at 0 SortedMap seqInserts = inserts.subMap(0, rec.getStart()); diff --git a/src/jalview/io/BamFile.java b/src/jalview/io/BamFile.java index 1aebbe9..277a61d 100644 --- a/src/jalview/io/BamFile.java +++ b/src/jalview/io/BamFile.java @@ -51,6 +51,9 @@ public class BamFile extends AlignFile // chromosome/contig to read private String chromosome = ""; + // first position in alignment + private int alignmentStart = -1; + /** * Creates a new BamFile object. */ @@ -122,13 +125,21 @@ public class BamFile extends AlignFile { SAMRecord rec = it.next(); + // set the alignment start to be start of first read (we assume reads + // are sorted) + if (alignmentStart == -1) + { + alignmentStart = rec.getAlignmentStart(); + } + // make dataset sequence: start at 1, end at read length SequenceI seq = new Sequence(rec.getReadName(), rec.getReadString().toLowerCase()); seq.setStart(1); seq.setEnd(rec.getReadLength()); - String newRead = parser.parseCigarToSequence(rec, insertions); + String newRead = parser.parseCigarToSequence(rec, insertions, + alignmentStart); // make alignment sequences SequenceI alsq = seq.deriveSequence(); diff --git a/test/jalview/datamodel/CigarParserTest.java b/test/jalview/datamodel/CigarParserTest.java index 8df02e3..db3e198 100644 --- a/test/jalview/datamodel/CigarParserTest.java +++ b/test/jalview/datamodel/CigarParserTest.java @@ -89,7 +89,7 @@ public class CigarParserTest rec.setAlignmentStart(start); CigarParser cp = new CigarParser('-'); - String bfresult = cp.parseCigarToSequence(rec, insertions); + String bfresult = cp.parseCigarToSequence(rec, insertions, 1); System.out.println(result); System.out.println(bfresult); -- 1.7.10.2