{
private final char gapChar;
- public CigarParser(char gapCharacter)
+ public CigarParser(char gap)
{
- gapChar = gapCharacter;
+ gapChar = gap;
}
/**
public String parseCigarToSequence(SAMRecord rec,
SortedMap<Integer, Integer> insertions)
{
+ StringBuilder newRead = new StringBuilder();
Iterator<CigarElement> it = rec.getCigar().getCigarElements()
.iterator();
- StringBuilder newRead = new StringBuilder();
int next = 0;
int refnext = 0;
// pad with spaces before read
// first count gaps needed to pad to reference position
// NB position is 1-based so number of gaps = pos-1
- int gaps = rec.getUnclippedStart() - 1;
+ int gaps = rec.getStart() - 1; // rec.getUnclippedStart() - 1;
// now count gaps to pad for insertions in other reads
int insertCount = countInsertionsBeforeRead(rec, insertions);
addGaps(newRead, gaps + insertCount);
+ // addGaps(newTrimmedRead, gaps + insertCount);
int lastinserts = 0;
while (it.hasNext())
int[] override = null;
if (lastinserts > 0)
{
- // we just inserted something
- // remove these inserts now - should be very first entry
- int count = seqInserts.get(rec.getStart() + refnext);
- override = new int[] { rec.getStart() + refnext,
- count - lastinserts };
- lastinserts = 0;
+ if (!seqInserts.containsKey(rec.getStart() + refnext))
+ {
+ // ERROR
+ int pos = rec.getStart() + refnext;
+ System.out.println("Insertion not added to seqInserts: " + pos);
+ }
+ else
+ {
+
+ // we just inserted something
+ // remove these inserts now - should be very first entry
+ int count = seqInserts.get(rec.getStart() + refnext);
+ override = new int[] { rec.getStart() + refnext,
+ count - lastinserts };
+ lastinserts = 0;
+ }
}
Iterator<Map.Entry<Integer, Integer>> iit = seqInserts.entrySet()
.iterator();
- newRead.append(applyCigarOp(el, next, rec, iit, override));
+ String nextSegment = applyCigarOp(el, next, rec, iit, override);
+ newRead.append(nextSegment);
if (el.getOperator().consumesReferenceBases())
{
}
addGaps(newRead, length + insertCount);
break;
- case S:
- // soft clipping - just skip this bit of the read
- // do nothing
-
- newRead.append(
- read.substring(nextPos, nextPos + length).toLowerCase());
- // nextPos += length;
- break;
case I:
// the reference sequence and other reads should have been gapped for
// this insertion, so just add in the residues
newRead.append(read.substring(nextPos, nextPos + length));
- // nextPos += length;
break;
+ case S:
+ // soft clipping - just skip this bit of the read
case H:
// hard clipping - this stretch will not appear in the read
default:
*/
public SortedMap<Integer, Integer> getInsertions(Iterator<SAMRecord> it)
{
- SortedMap<Integer, Integer> insertions = new TreeMap<>();
+ SortedMap<Integer, Integer> inserts = new TreeMap<>();
while (it.hasNext())
{
// check each record for insertions in the CIGAR string
// if there's already an insertion at this location, keep the longest
// insertion; if there's no insertion keep this one
- if (!insertions.containsKey(refLocation)
- || (insertions.containsKey(refLocation)
- && insertions.get(refLocation) < el.getLength()))
+ if (!inserts.containsKey(refLocation)
+ || (inserts.containsKey(refLocation)
+ && inserts.get(refLocation) < el.getLength()))
{
- insertions.put(refLocation, el.getLength());
+ inserts.put(refLocation, el.getLength());
}
next += el.getLength();
break;
}
}
- return insertions;
+ return inserts;
}
/**
*
* @param rec
* the SAMRecord for the read
- * @param insertions
+ * @param inserts
* the map of insertions
* @return number of insertions before the read starts
*/
private int countInsertionsBeforeRead(SAMRecord rec,
- SortedMap<Integer, Integer> insertions)
+ SortedMap<Integer, Integer> inserts)
{
int gaps = 0;
// add in any insertion gaps before read
// TODO start point should be start of alignment not 0
- SortedMap<Integer, Integer> seqInserts = insertions.subMap(0,
+ SortedMap<Integer, Integer> seqInserts = inserts.subMap(0,
rec.getStart());
Iterator<Map.Entry<Integer, Integer>> it = seqInserts.entrySet()
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui;
+
+import javax.swing.JPanel;
+
+/**
+ * A dialog where a user can choose import options for a bam file
+ */
+public class BamFileOptionsChooser extends JPanel
+{
+
+}
@Override
public void parse() throws IOException
{
- CigarParser parser = new CigarParser('-');
SAMRecordIterator it = fileReader.iterator();
+ CigarParser parser = new CigarParser('-');
SortedMap<Integer, Integer> insertions = parser.getInsertions(it);
it.close();
while (it.hasNext())
{
SAMRecord rec = it.next();
- int start = rec.getStart();
- int end = rec.getEnd();
- SequenceI seq = new Sequence(rec.getReadName(), rec.getReadString());
+ // make dataset sequence: start at 1, end at read length
+ SequenceI seq = new Sequence(rec.getReadName(),
+ rec.getReadString().toLowerCase());
+ seq.setStart(1);
+ seq.setEnd(rec.getReadLength());
- String cigarredRead = parser.parseCigarToSequence(rec, insertions);
+ String newRead = parser.parseCigarToSequence(rec, insertions);
+ // make alignment sequences
SequenceI alsq = seq.deriveSequence();
- alsq.setSequence(cigarredRead);
- alsq.setStart(start);
- alsq.setEnd(end);
+ alsq.setSequence(newRead);
+
+ // set start relative to soft clip; assume end is set by Sequence code
+ alsq.setStart(rec.getStart() - rec.getUnclippedStart() + 1);
seqs.add(alsq);
}
}
-
-
-
-
}
String read = "CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC";
return new Object[][] { { "1S84M2I14M", read, 21,
- "----------------------cGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+ "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
insertions },
{ "1S84M2I14M", read, 21,
- "----------------------cGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC",
+ "-----------------------GAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGA-GGAGCTCGTTGGTC",
insertions3 },
{ "44M1D57M",
{ "6M2D76M19S",
"CGAAGCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTCCC",
4,
- "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGAcgaggagctcgttggtccc",
+ "---CGAAGC----TTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGA",
insertions2 },
{ "44M1D57M",
"---CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
noinsertions },
{ "5S96M", read, 7,
- "-cgaagCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
+ "------CTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGTTGGTC",
noinsertions },
{ "96M5H", read, 7,
"------CGAAGCTCTTTACCCGGAAACCATTGAAATCGGACGGTTTAGTGAAATGGAGGATCAAGTTGGGTTTGGGTTCCGTCCGAACGACGAGGAGCTCGT",