1 package jalview.datamodel;
3 public abstract class CigarBase
6 * Base class for compact idiosyncratic representation of gaps and aligned residues
7 * Regards to Tom Oldfield for his DynamicArray class.
13 // nothing to be done (probably)
16 protected int length = 0;
17 protected int _inc_length = 10; // extension range for addition of new operations
18 protected char[] operation = null;
19 protected int[] range = null;
21 * Range of Hidden residues in seq (translated as deleted in seq)
23 public static final char D = 'D'; /**
24 * Range of insertions to seq
26 public static final char I = 'I'; /**
27 * Range of aligned residues
29 public static final char M = 'M';
30 static protected final char _case_shift = 'a' - 'A';
32 * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs
33 * May return null for an empty cigar string.
34 * May return null for deletion ranges if there are none.
35 * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence)
36 * @param GapChar - the symbol to use for Insert operations
37 * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence
39 public Object[] getSequenceAndDeletions(String reference, char GapChar)
42 int[][] deletions = new int[length][];
43 int[][] trunc_deletions = null;
44 StringBuffer sq = new StringBuffer();
45 int cursor = 0, alcursor=0,start = 0, startpos=0, end = 0, endpos=0, delcount = -1;
46 boolean consecutive_del = false;
52 rlength=reference.length();
53 boolean modstart = true;
54 for (int i = 0; i < length; i++)
61 deletions[++delcount] = new int[]
66 deletions[delcount][1] = cursor - 1;
67 consecutive_del = true;
70 consecutive_del = false;
71 for (int r = 0; r < range[i]; r++)
78 consecutive_del = false;
85 if (reference!=null) {
86 int sbend = cursor+range[i];
88 sq.append(reference.substring(cursor, rlength));
89 while (sbend-- >= rlength)
94 sq.append(reference.substring(cursor, sbend));
103 throw new Error("Unknown SeqCigar operation '" + operation[i] + "'");
108 trunc_deletions = new int[delcount][];
109 System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
114 ((reference!=null) ? sq.toString() : null),
116 start, startpos, end, endpos}, trunc_deletions};
120 * turn a cigar string into a series of operation range pairs
121 * @param cigarString String
122 * @return object[] {char[] operation, int[] range}
123 * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations
125 public static Object[] parseCigarString(String cigarString)
129 for (int i = 0, l = cigarString.length(); i < l; i++)
131 char c = cigarString.charAt(i);
132 if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) ||
133 c == D || c == (D - _case_shift))
138 char[] operation = new char[ops];
139 int[] range = new int[ops];
141 int i = 0, l = cigarString.length();
148 c = cigarString.charAt(j++);
150 while (c >= '0' && c <= '9' && j < l);
151 if (j >= l && c >= '0' && c <= '9')
153 throw new Exception("Unterminated cigar string.");
157 String rangeint = cigarString.substring(i, j - 1);
158 range[op] = Integer.parseInt(rangeint);
163 throw new Error("Implementation bug in parseCigarString");
165 if (c >= 'a' && c <= 'z')
169 if ( (c == M || c == I || c == D))
175 throw new Exception("Unexpected operation '" + c +
176 "' in cigar string (position " + i + " in '" +
186 * add an operation to cigar string
190 public void addOperation(char op, int range)
192 if (op >= 'a' && op <= 'z')
196 if (op != M && op != D && op != I)
198 throw new Error("Implementation error. Invalid operation string.");
201 throw new Error("Invalid range string (must be non-zero positive number)");
203 if (operation == null)
205 this.operation = new char[_inc_length];
206 this.range = new int[_inc_length];
208 if (length + 1 == operation.length)
210 char[] ops = this.operation;
211 this.operation = new char[length + 1 + _inc_length];
212 System.arraycopy(ops, 0, this.operation, 0, length);
214 int[] rng = this.range;
215 this.range = new int[length + 1 + _inc_length];
216 System.arraycopy(rng, 0, this.range, 0, length);
219 if ((length>0) && (operation[length-1]==op))
220 length--; // modify existing operation.
222 this.range[length]=0; // reset range
223 this.operation[length] = op;
224 this.range[length++] += range;
228 * semi-efficient insert an operation on the current cigar string set at column pos (from 1)
229 * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment
230 * Deletion ops will shorten length of result - and affect registration of alignment
231 * Match ops will also affect length of result - affecting registration of alignment
232 * (ie "10M".insert(4,I,3)->"4M3I3M") - (replace?)
233 * (ie "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment)
234 * (ie "5I5M".insert(4,I,3)->"8I5M") - real insertion
235 * (ie "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms changed to Ds
236 * (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to M, Ds changed to M.
237 * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3
238 * ( "10D5M".insert(-1,M,3)->"3M7D5M")
239 * ( "10D5M".insert(0,M,3)->"7D8M")
240 * ( "10D5M".insert(1,M,3)->"10D8M")
242 * ( "1M10D5M".insert(0,M,3)->"1M10D8M")
243 * ( "1M10D5M".insert(1,M,3)->"
245 * if pos is beyond width - I operations are added before the operation
246 * @param pos int -1, 0-length of visible region, or greater to append new ops (with insertions in between)
249 public void addOperationAt(int pos, char op, int range)
251 int cursor = -1; // mark the position for the current operation being edited.
253 boolean last_d = false; // previous op was a deletion.
255 throw new Error("pos<-1 is not supported.");
257 if (operation[o] != D)
259 if ( (cursor + this.range[o]) < pos)
261 cursor += this.range[o];
276 // must insert more operations before pos
278 addInsertion(pos-cursor);
279 // then just add the new operation. Regardless of what it is.
280 addOperation(op, range);
282 int diff = pos - cursor;
284 int e_length = length-o; // new edit operation array length.
285 // diff<0 - can only happen before first insertion or match. - affects op and all following
286 // dif==0 - only when at first position of existing op -
287 // diff>0 - must preserve some existing operations
288 int[] e_range = new int[e_length];
289 System.arraycopy(this.range, o, e_range, 0, e_length);
290 char[] e_op = new char[e_length];
291 System.arraycopy(this.operation, o, e_op, 0, e_length);
292 length = o; // can now use add_operation to extend list.
293 int e_o=0; // current operation being edited.
299 if (last_d && diff <= 0)
301 // reduce D's, if possible
302 if (range<=this.range[o-1]) {
303 this.range[o - 1] -= range;
307 if (this.range[o-1]==0)
308 o--; // lose this op.
310 e_range[e_o] += range; // just add more matched residues
313 // change from insertion to match
314 if (last_d && diff<=0)
316 // reduce D's, if possible
317 if (range<=this.range[o-1]) {
318 this.range[o - 1] -= range;
322 if (this.range[o-1]==0)
323 o--; // lose this op.
338 throw new Error("Implementation Error: Unknown operation in addOperation!");
340 // finally, add remaining ops.
341 while (e_o<e_length) {
342 addOperation(e_op[e_o], e_range[e_o]);
349 * Mark residues from start to end (inclusive) as deleted from the alignment, and removes any insertions.
353 public void deleteRange(int start, int end) {
355 // nothing to do here
358 if (start<0 || start>end)
359 throw new Error("Implementation Error: deleteRange out of bounds: start must be non-negative and less than end.");
361 int cursor = 0; // mark the position for the current operation being edited.
362 int rlength=1+end-start; // number of positions to delete
365 boolean editing=false;
366 char[] oldops = operation;
367 int[] oldrange = range;
371 while (o<oldlen && cursor<=end && rlength>0) {
372 if (oldops[o] == D) {
373 // absorbed into new deleted region.
374 addDeleted(oldrange[o++]);
378 int remain = oldrange[o]; // number of op characters left to edit
380 if ( (cursor + remain) <= start)
382 addOperation(oldops[o],oldrange[o]);
383 cursor+=oldrange[o++];
384 continue; // next operation
387 // add operations before hidden region
388 if (start-cursor>0) {
389 addOperation(oldops[o], start- cursor);
390 remain -= start - cursor;
393 // start inserting new ops
394 if (o<oldlen && editing && rlength>0 && remain>0) {
397 if (rlength>remain) {
401 if (remain-rlength>0)
402 this.addOperation(M,remain-rlength); // add remaining back.
408 if (remain-rlength>0) {
409 // only remove some gaps
410 addInsertion(remain-rlength);
415 throw new Error("Implementation error."); // do nothing;
417 throw new Error("Implementation Error! Unknown operation '"+oldops[o]+"'");
420 remain = oldrange[++o]; // number of op characters left to edit
425 addOperation(oldops[o],oldrange[o++]);
427 //if (cursor<(start+1)) {
428 // ran out of ops - nothing to do here ?
429 // addInsertion(start-cursor);
433 * sum of ranges in cigar string
434 * @return int number of residues hidden, matched, or gaps inserted into sequence
436 public int getFullWidth()
441 for (int i = 0; i < length; i++)
450 * Visible length of aligned sequence
451 * @return int length of including gaps and less hidden regions
453 public int getWidth()
458 for (int i = 0; i < length; i++)
460 if (operation[i] == M || operation[i] == I)
469 * mark a range of inserted residues
472 public void addInsertion(int range)
474 this.addOperation(I, range);
478 * mark the next range residues as hidden (not aligned) or deleted
481 public void addDeleted(int range)
483 this.addOperation(D, range);
487 * Modifies operation list to delete columns from start to end (inclusive)
488 * editing will remove insertion operations, and convert matches to deletions
489 * @param start alignment column
490 * @param end alignment column
491 * @return boolean true if residues were marked as deleted.
492 public boolean deleteRange(int start, int end)
494 boolean deleted = false;
495 int op = 0, prevop = -1, firstm = -1,
496 lastm = -1, postop = -1;
497 int width = 0; // zero'th column
500 // find operation bracketing start of the range
503 if (operation[op] != D)
505 width += range[prevop = op];
509 while (op < length && width < start);
513 // run off end - add more operations up to deletion.
514 addInsertion(start - width);
518 // edit existing operations.
520 width -= range[prevop];
521 int[] oldrange = range;
522 char[] oldops = operation;
523 range = new int[oldrange.length];
524 operation = new char[oldops.length];
529 if (operation[op] != D)
531 width += range[postop = op];
535 while (op < length && width <= end);
540 addDeleted(end - start + 1);
546 * Return an ENSEMBL style cigar string where D may indicates excluded parts of seq
547 * @return String of form ([0-9]+[IMD])+
549 public String getCigarstring()
551 StringBuffer cigarString = new StringBuffer();
552 for (int i = 0; i < length; i++)
554 cigarString.append("" + range[i]);
555 cigarString.append(operation[i]);
557 return cigarString.toString();