2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.util.MessageManager;
25 import java.util.Vector;
27 public abstract class CigarBase
30 * Base class for compact idiosyncratic representation of gaps and aligned
31 * residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006
36 // nothing to be done (probably)
39 protected int length = 0;
41 protected int _inc_length = 10; // extension range for addition of new
45 protected char[] operation = null;
47 protected int[] range = null;
50 * Range of Hidden residues in seq (translated as deleted in seq)
52 public static final char D = 'D';
55 * Range of insertions to seq
57 public static final char I = 'I';
60 * Range of aligned residues
62 public static final char M = 'M';
64 static protected final char _case_shift = 'a' - 'A';
67 * Ugly function to get edited sequence string, start and end symbol positions
68 * and the deletion regions as an array of int pairs May return null for an
69 * empty cigar string. May return null for deletion ranges if there are none.
72 * - the symbol sequence to apply the cigar operations to (or null if
75 * - the symbol to use for Insert operations
76 * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3]
77 * {start, end, col} or null} the gapped sequence, first and last
78 * residue index, and the deletion ranges on the reference sequence
80 public Object[] getSequenceAndDeletions(String reference, char GapChar)
83 int[][] deletions = new int[length][];
84 int[][] trunc_deletions = null;
85 StringBuffer sq = new StringBuffer();
86 int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0,
87 endpos = 0, delcount = -1;
88 boolean consecutive_del = false;
93 if (reference != null)
95 rlength = reference.length();
97 boolean modstart = true;
98 for (int i = 0; i < length; i++)
100 switch (operation[i])
103 if (!consecutive_del)
105 deletions[++delcount] = new int[] { cursor, 0, alcursor };
108 deletions[delcount][1] = cursor - 1;
109 consecutive_del = true;
112 consecutive_del = false;
113 for (int r = 0; r < range[i]; r++)
120 consecutive_del = false;
127 if (reference != null)
129 int sbend = cursor + range[i];
132 sq.append(reference.substring(cursor, rlength));
133 while (sbend-- >= rlength)
140 sq.append(reference.substring(cursor, sbend));
143 alcursor += range[i];
149 throw new Error(MessageManager.formatMessage(
150 "error.unknown_seq_cigar_operation", new String[]
151 { new StringBuffer(operation[i]).toString() }));
156 trunc_deletions = new int[delcount][];
157 System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
160 return new Object[] { ((reference != null) ? sq.toString() : null),
162 { start, startpos, end, endpos }, trunc_deletions };
165 protected void compact_operations()
168 if (operation == null)
172 char last = operation[0];
175 if (last == operation[i])
177 range[i - 1] += range[i];
181 System.arraycopy(range, i + 1, range, i, r);
182 System.arraycopy(operation, i + 1, operation, i, r);
188 last = operation[i++];
194 * add an operation to cigar string
201 public void addOperation(char op, int range)
203 if (op >= 'a' && op <= 'z')
207 if (op != M && op != D && op != I)
209 throw new Error(MessageManager.getString(
210 "error.implementation_error_invalid_operation_string"));
214 return; // No Operation to add.
219 MessageManager.getString("error.invalid_range_string"));
222 if (operation == null)
224 this.operation = new char[_inc_length];
225 this.range = new int[_inc_length];
227 if (length + 1 == operation.length)
229 char[] ops = this.operation;
230 this.operation = new char[length + 1 + _inc_length];
231 System.arraycopy(ops, 0, this.operation, 0, length);
233 int[] rng = this.range;
234 this.range = new int[length + 1 + _inc_length];
235 System.arraycopy(rng, 0, this.range, 0, length);
238 if ((length > 0) && (operation[length - 1] == op))
240 length--; // modify existing operation.
244 this.range[length] = 0; // reset range
246 this.operation[length] = op;
247 this.range[length++] += range;
251 * semi-efficient insert an operation on the current cigar string set at
252 * column pos (from 1) NOTE: Insertion operations simply extend width of cigar
253 * result - affecting registration of alignment Deletion ops will shorten
254 * length of result - and affect registration of alignment Match ops will also
255 * affect length of result - affecting registration of alignment (ie
256 * "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie
257 * "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie
258 * "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie
259 * "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms
260 * changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to
261 * M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts
262 * sequence left by 1 residue and extends it by 3 (
263 * "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") (
264 * "10D5M".insert(1,M,3)->"10D8M") ( "1M10D5M".insert(0,M,3)->"1M10D8M") (
265 * "1M10D5M".insert(1,M,3)->"
267 * if pos is beyond width - I operations are added before the operation
270 * int -1, 0-length of visible region, or greater to append new ops
271 * (with insertions in between)
275 * int public void addOperationAt(int pos, char op, int range) { int
276 * cursor = -1; // mark the position for the current operation being
277 * edited. int o = 0; boolean last_d = false; // previous op was a
278 * deletion. if (pos < -1) throw new Error("pos<-1 is not
279 * supported."); while (o<length) { if (operation[o] != D) { if (
280 * (cursor + this.range[o]) < pos) { cursor += this.range[o]; o++;
281 * last_d=false; } else { break; } } else { last_d=true; o++; } } if
282 * (o==length) { // must insert more operations before pos if
283 * (pos-cursor>0) addInsertion(pos-cursor); // then just add the new
284 * operation. Regardless of what it is. addOperation(op, range); }
285 * else { int diff = pos - cursor;
287 * int e_length = length-o; // new edit operation array length. //
288 * diff<0 - can only happen before first insertion or match. -
289 * affects op and all following // dif==0 - only when at first
290 * position of existing op - // diff>0 - must preserve some existing
291 * operations int[] e_range = new int[e_length];
292 * System.arraycopy(this.range, o, e_range, 0, e_length); char[] e_op
293 * = new char[e_length]; System.arraycopy(this.operation, o, e_op, 0,
294 * e_length); length = o; // can now use add_operation to extend
295 * list. int e_o=0; // current operation being edited. switch (op) {
296 * case M: switch (e_op[e_o]) { case M: if (last_d && diff <= 0) { //
297 * reduce D's, if possible if (range<=this.range[o-1]) { this.range[o
298 * - 1] -= range; } else { this.range[o-1]=0; } if
299 * (this.range[o-1]==0) o--; // lose this op. } e_range[e_o] +=
300 * range; // just add more matched residues break; case I: // change
301 * from insertion to match if (last_d && diff<=0) { // reduce D's, if
302 * possible if (range<=this.range[o-1]) { this.range[o - 1] -= range;
303 * } else { this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose
304 * this op. } e_range[e_o] break; default: throw new Inp }
306 * break; case I: break; case D: } break; default: throw new
307 * Error("Implementation Error: Unknown operation in addOperation!");
308 * } // finally, add remaining ops. while (e_o<e_length) {
309 * addOperation(e_op[e_o], e_range[e_o]); e_o++; } } }
312 * Mark residues from start to end (inclusive) as deleted from the alignment,
313 * and removes any insertions.
319 * @return deleted int - number of symbols marked as deleted
321 public int deleteRange(int start, int end)
326 // nothing to do here
329 if (start < 0 || start > end)
331 throw new Error(MessageManager.getString(
332 "error.implementation_error_delete_range_out_of_bounds"));
335 int cursor = 0; // mark the position for the current operation being edited.
336 int rlength = 1 + end - start; // number of positions to delete
339 boolean editing = false;
340 char[] oldops = operation;
341 int[] oldrange = range;
345 compact_operations();
346 while (o < oldlen && cursor <= end && rlength > 0)
350 // absorbed into new deleted region.
351 addDeleted(oldrange[o++]);
355 int remain = oldrange[o]; // number of op characters left to edit
358 if ((cursor + remain) <= start)
360 addOperation(oldops[o], oldrange[o]);
361 cursor += oldrange[o++];
362 continue; // next operation
365 // add operations before hidden region
366 if (start - cursor > 0)
368 addOperation(oldops[o], start - cursor);
369 remain -= start - cursor;
372 // start inserting new ops
373 if (o < oldlen && editing && rlength > 0 && remain > 0)
378 if (rlength > remain)
387 if (remain - rlength > 0)
389 this.addOperation(M, remain - rlength); // add remaining back.
396 if (remain - rlength > 0)
398 // only remove some gaps
399 addInsertion(remain - rlength);
405 MessageManager.getString("error.implementation_error")); // do
408 throw new Error(MessageManager.formatMessage(
409 "error.implementation_error_unknown_operation",
411 { new StringBuffer(oldops[o]).toString() }));
414 remain = oldrange[++o]; // number of op characters left to edit
420 addOperation(oldops[o], oldrange[o++]);
422 // if (cursor<(start+1)) {
423 // ran out of ops - nothing to do here ?
424 // addInsertion(start-cursor);
430 * Deleted regions mean that there will be discontinuous sequence numbering in
431 * the sequence returned by getSeq(char).
433 * @return true if there deletions
435 public boolean hasDeletedRegions()
437 for (int i = 0; i < length; i++)
439 if (operation[i] == D)
448 * enumerate the ranges on seq that are marked as deleted in this cigar
450 * @return int[] { vis_start, sym_start, length }
452 public int[] getDeletedRegions()
458 Vector dr = new Vector();
459 int cursor = 0, vcursor = 0;
460 for (int i = 0; i < length; i++)
462 switch (operation[i])
471 dr.addElement(new int[] { vcursor, cursor, range[i] });
479 int[] delregions = new int[dr.size() * 3];
480 for (int i = 0, l = dr.size(); i < l; i++)
482 int[] reg = (int[]) dr.elementAt(i);
483 delregions[i * 3] = reg[0];
484 delregions[i * 3 + 1] = reg[1];
485 delregions[i * 3 + 2] = reg[2];
491 * sum of ranges in cigar string
493 * @return int number of residues hidden, matched, or gaps inserted into
496 public int getFullWidth()
501 for (int i = 0; i < length; i++)
510 * Visible length of aligned sequence
512 * @return int length of including gaps and less hidden regions
514 public int getWidth()
519 for (int i = 0; i < length; i++)
521 if (operation[i] == M || operation[i] == I)
531 * mark a range of inserted residues
536 public void addInsertion(int range)
538 this.addOperation(I, range);
542 * mark the next range residues as hidden (not aligned) or deleted
547 public void addDeleted(int range)
549 this.addOperation(D, range);
553 * Modifies operation list to delete columns from start to end (inclusive)
554 * editing will remove insertion operations, and convert matches to deletions
560 * @return boolean true if residues were marked as deleted. public boolean
561 * deleteRange(int start, int end) { boolean deleted = false; int op =
562 * 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width =
563 * 0; // zero'th column if (length > 0) { // find operation bracketing
564 * start of the range do { if (operation[op] != D) { width +=
565 * range[prevop = op]; } op++; } while (op < length && width < start);
566 * } if (width < start) { // run off end - add more operations up to
567 * deletion. addInsertion(start - width); } else { // edit existing
568 * operations. op = prevop; width -= range[prevop]; int[] oldrange =
569 * range; char[] oldops = operation; range = new int[oldrange.length];
570 * operation = new char[oldops.length]; if (op < length) { do { if
571 * (operation[op] != D) { width += range[postop = op]; } op++; } while
572 * (op < length && width <= end); } } if (deleted == true) {
573 * addDeleted(end - start + 1); } return deleted; }
576 * Return an ENSEMBL style cigar string where D may indicates excluded parts
579 * @return String of form ([0-9]+[IMD])+
581 public String getCigarstring()
583 StringBuffer cigarString = new StringBuffer();
584 for (int i = 0; i < length; i++)
586 cigarString.append("" + range[i]);
587 cigarString.append(operation[i]);
589 return cigarString.toString();