1 package jalview.datamodel;
3 import java.util.Vector;
5 public abstract class CigarBase
8 * Base class for compact idiosyncratic representation of gaps and aligned residues
9 * Regards to Tom Oldfield for his DynamicArray class.
15 // nothing to be done (probably)
18 protected int length = 0;
19 protected int _inc_length = 10; // extension range for addition of new operations
20 protected char[] operation = null;
21 protected int[] range = null;
23 * Range of Hidden residues in seq (translated as deleted in seq)
25 public static final char D = 'D'; /**
26 * Range of insertions to seq
28 public static final char I = 'I'; /**
29 * Range of aligned residues
31 public static final char M = 'M';
32 static protected final char _case_shift = 'a' - 'A';
34 * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs
35 * May return null for an empty cigar string.
36 * May return null for deletion ranges if there are none.
37 * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence)
38 * @param GapChar - the symbol to use for Insert operations
39 * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence
41 public Object[] getSequenceAndDeletions(String reference, char GapChar)
44 int[][] deletions = new int[length][];
45 int[][] trunc_deletions = null;
46 StringBuffer sq = new StringBuffer();
47 int cursor = 0, alcursor=0,start = 0, startpos=0, end = 0, endpos=0, delcount = -1;
48 boolean consecutive_del = false;
54 rlength=reference.length();
55 boolean modstart = true;
56 for (int i = 0; i < length; i++)
63 deletions[++delcount] = new int[]
68 deletions[delcount][1] = cursor - 1;
69 consecutive_del = true;
72 consecutive_del = false;
73 for (int r = 0; r < range[i]; r++)
80 consecutive_del = false;
87 if (reference!=null) {
88 int sbend = cursor+range[i];
90 sq.append(reference.substring(cursor, rlength));
91 while (sbend-- >= rlength)
96 sq.append(reference.substring(cursor, sbend));
105 throw new Error("Unknown SeqCigar operation '" + operation[i] + "'");
110 trunc_deletions = new int[delcount][];
111 System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
116 ((reference!=null) ? sq.toString() : null),
118 start, startpos, end, endpos}, trunc_deletions};
120 protected void compact_operations() {
124 char last = operation[0];
126 if (last==operation[i]) {
127 range[i-1]+=range[i];
130 System.arraycopy(range, i + 1, range, i, r);
131 System.arraycopy(operation, i + 1, operation, i, r);
135 last = operation[i++];
140 * turn a cigar string into a series of operation range pairs
141 * @param cigarString String
142 * @return object[] {char[] operation, int[] range}
143 * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations
145 public static Object[] parseCigarString(String cigarString)
149 for (int i = 0, l = cigarString.length(); i < l; i++)
151 char c = cigarString.charAt(i);
152 if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) ||
153 c == D || c == (D - _case_shift))
158 char[] operation = new char[ops];
159 int[] range = new int[ops];
161 int i = 0, l = cigarString.length();
168 c = cigarString.charAt(j++);
170 while (c >= '0' && c <= '9' && j < l);
171 if (j >= l && c >= '0' && c <= '9')
173 throw new Exception("Unterminated cigar string.");
177 String rangeint = cigarString.substring(i, j - 1);
178 range[op] = Integer.parseInt(rangeint);
183 throw new Error("Implementation bug in parseCigarString");
185 if (c >= 'a' && c <= 'z')
189 if ( (c == M || c == I || c == D))
195 throw new Exception("Unexpected operation '" + c +
196 "' in cigar string (position " + i + " in '" +
206 * add an operation to cigar string
210 public void addOperation(char op, int range)
212 if (op >= 'a' && op <= 'z')
216 if (op != M && op != D && op != I)
218 throw new Error("Implementation error. Invalid operation string.");
221 throw new Error("Invalid range string (must be non-zero positive number)");
223 if (operation == null)
225 this.operation = new char[_inc_length];
226 this.range = new int[_inc_length];
228 if (length + 1 == operation.length)
230 char[] ops = this.operation;
231 this.operation = new char[length + 1 + _inc_length];
232 System.arraycopy(ops, 0, this.operation, 0, length);
234 int[] rng = this.range;
235 this.range = new int[length + 1 + _inc_length];
236 System.arraycopy(rng, 0, this.range, 0, length);
239 if ((length>0) && (operation[length-1]==op))
240 length--; // modify existing operation.
242 this.range[length]=0; // reset range
243 this.operation[length] = op;
244 this.range[length++] += range;
248 * semi-efficient insert an operation on the current cigar string set at column pos (from 1)
249 * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment
250 * Deletion ops will shorten length of result - and affect registration of alignment
251 * Match ops will also affect length of result - affecting registration of alignment
252 * (ie "10M".insert(4,I,3)->"4M3I3M") - (replace?)
253 * (ie "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment)
254 * (ie "5I5M".insert(4,I,3)->"8I5M") - real insertion
255 * (ie "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms changed to Ds
256 * (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to M, Ds changed to M.
257 * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3
258 * ( "10D5M".insert(-1,M,3)->"3M7D5M")
259 * ( "10D5M".insert(0,M,3)->"7D8M")
260 * ( "10D5M".insert(1,M,3)->"10D8M")
262 * ( "1M10D5M".insert(0,M,3)->"1M10D8M")
263 * ( "1M10D5M".insert(1,M,3)->"
265 * if pos is beyond width - I operations are added before the operation
266 * @param pos int -1, 0-length of visible region, or greater to append new ops (with insertions in between)
269 public void addOperationAt(int pos, char op, int range)
271 int cursor = -1; // mark the position for the current operation being edited.
273 boolean last_d = false; // previous op was a deletion.
275 throw new Error("pos<-1 is not supported.");
277 if (operation[o] != D)
279 if ( (cursor + this.range[o]) < pos)
281 cursor += this.range[o];
296 // must insert more operations before pos
298 addInsertion(pos-cursor);
299 // then just add the new operation. Regardless of what it is.
300 addOperation(op, range);
302 int diff = pos - cursor;
304 int e_length = length-o; // new edit operation array length.
305 // diff<0 - can only happen before first insertion or match. - affects op and all following
306 // dif==0 - only when at first position of existing op -
307 // diff>0 - must preserve some existing operations
308 int[] e_range = new int[e_length];
309 System.arraycopy(this.range, o, e_range, 0, e_length);
310 char[] e_op = new char[e_length];
311 System.arraycopy(this.operation, o, e_op, 0, e_length);
312 length = o; // can now use add_operation to extend list.
313 int e_o=0; // current operation being edited.
319 if (last_d && diff <= 0)
321 // reduce D's, if possible
322 if (range<=this.range[o-1]) {
323 this.range[o - 1] -= range;
327 if (this.range[o-1]==0)
328 o--; // lose this op.
330 e_range[e_o] += range; // just add more matched residues
333 // change from insertion to match
334 if (last_d && diff<=0)
336 // reduce D's, if possible
337 if (range<=this.range[o-1]) {
338 this.range[o - 1] -= range;
342 if (this.range[o-1]==0)
343 o--; // lose this op.
358 throw new Error("Implementation Error: Unknown operation in addOperation!");
360 // finally, add remaining ops.
361 while (e_o<e_length) {
362 addOperation(e_op[e_o], e_range[e_o]);
369 * Mark residues from start to end (inclusive) as deleted from the alignment, and removes any insertions.
372 * @return deleted int - number of symbols marked as deleted
374 public int deleteRange(int start, int end) {
377 // nothing to do here
380 if (start<0 || start>end)
381 throw new Error("Implementation Error: deleteRange out of bounds: start must be non-negative and less than end.");
383 int cursor = 0; // mark the position for the current operation being edited.
384 int rlength=1+end-start; // number of positions to delete
387 boolean editing=false;
388 char[] oldops = operation;
389 int[] oldrange = range;
393 compact_operations();
394 while (o<oldlen && cursor<=end && rlength>0) {
395 if (oldops[o] == D) {
396 // absorbed into new deleted region.
397 addDeleted(oldrange[o++]);
401 int remain = oldrange[o]; // number of op characters left to edit
403 if ( (cursor + remain) <= start)
405 addOperation(oldops[o],oldrange[o]);
406 cursor+=oldrange[o++];
407 continue; // next operation
410 // add operations before hidden region
411 if (start-cursor>0) {
412 addOperation(oldops[o], start- cursor);
413 remain -= start - cursor;
416 // start inserting new ops
417 if (o<oldlen && editing && rlength>0 && remain>0) {
420 if (rlength>remain) {
426 if (remain-rlength>0)
427 this.addOperation(M,remain-rlength); // add remaining back.
433 if (remain-rlength>0) {
434 // only remove some gaps
435 addInsertion(remain-rlength);
440 throw new Error("Implementation error."); // do nothing;
442 throw new Error("Implementation Error! Unknown operation '"+oldops[o]+"'");
445 remain = oldrange[++o]; // number of op characters left to edit
450 addOperation(oldops[o],oldrange[o++]);
452 //if (cursor<(start+1)) {
453 // ran out of ops - nothing to do here ?
454 // addInsertion(start-cursor);
460 * Deleted regions mean that there will be discontinuous sequence numbering in the
461 * sequence returned by getSeq(char).
462 * @return true if there deletions
464 public boolean hasDeletedRegions()
466 for (int i = 0; i<length ; i++)
468 if (operation[i] == D)
476 * enumerate the ranges on seq that are marked as deleted in this cigar
477 * @return int[] { vis_start, sym_start, length }
479 public int[] getDeletedRegions() {
482 Vector dr = new Vector();
483 int cursor=0, vcursor=0;
484 for (int i=0;i<length;i++) {
485 switch (operation[i]) {
492 dr.add(new int[] { vcursor, cursor, range[i]});
498 int[] delregions = new int[dr.size()*3];
499 for (int i=0,l=dr.size(); i<l; i++) {
500 int[] reg = (int[]) dr.get(i);
501 delregions[i*3] = reg[0];
502 delregions[i*3+1] = reg[1];
503 delregions[i*3+2] = reg[2];
508 * sum of ranges in cigar string
509 * @return int number of residues hidden, matched, or gaps inserted into sequence
511 public int getFullWidth()
516 for (int i = 0; i < length; i++)
525 * Visible length of aligned sequence
526 * @return int length of including gaps and less hidden regions
528 public int getWidth()
533 for (int i = 0; i < length; i++)
535 if (operation[i] == M || operation[i] == I)
544 * mark a range of inserted residues
547 public void addInsertion(int range)
549 this.addOperation(I, range);
553 * mark the next range residues as hidden (not aligned) or deleted
556 public void addDeleted(int range)
558 this.addOperation(D, range);
562 * Modifies operation list to delete columns from start to end (inclusive)
563 * editing will remove insertion operations, and convert matches to deletions
564 * @param start alignment column
565 * @param end alignment column
566 * @return boolean true if residues were marked as deleted.
567 public boolean deleteRange(int start, int end)
569 boolean deleted = false;
570 int op = 0, prevop = -1, firstm = -1,
571 lastm = -1, postop = -1;
572 int width = 0; // zero'th column
575 // find operation bracketing start of the range
578 if (operation[op] != D)
580 width += range[prevop = op];
584 while (op < length && width < start);
588 // run off end - add more operations up to deletion.
589 addInsertion(start - width);
593 // edit existing operations.
595 width -= range[prevop];
596 int[] oldrange = range;
597 char[] oldops = operation;
598 range = new int[oldrange.length];
599 operation = new char[oldops.length];
604 if (operation[op] != D)
606 width += range[postop = op];
610 while (op < length && width <= end);
615 addDeleted(end - start + 1);
621 * Return an ENSEMBL style cigar string where D may indicates excluded parts of seq
622 * @return String of form ([0-9]+[IMD])+
624 public String getCigarstring()
626 StringBuffer cigarString = new StringBuffer();
627 for (int i = 0; i < length; i++)
629 cigarString.append("" + range[i]);
630 cigarString.append(operation[i]);
632 return cigarString.toString();