1 package jalview.datamodel;
3 public abstract class CigarBase
6 * Base class for compact idiosyncratic representation of gaps and aligned residues
7 * Regards to Tom Oldfield for his DynamicArray class.
13 // nothing to be done (probably)
16 protected int length = 0;
17 protected int _inc_length = 10; // extension range for addition of new operations
18 protected char[] operation = null;
19 protected int[] range = null;
21 * Range of Hidden residues in seq (translated as deleted in seq)
23 public static final char D = 'D'; /**
24 * Range of insertions to seq
26 public static final char I = 'I'; /**
27 * Range of aligned residues
29 public static final char M = 'M';
30 static protected final char _case_shift = 'a' - 'A';
32 * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs
33 * May return null for an empty cigar string.
34 * May return null for deletion ranges if there are none.
35 * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence)
36 * @param GapChar - the symbol to use for Insert operations
37 * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence
39 public Object[] getSequenceAndDeletions(String reference, char GapChar)
42 int[][] deletions = new int[length][];
43 int[][] trunc_deletions = null;
44 StringBuffer sq = new StringBuffer();
45 int cursor = 0, alcursor=0,start = 0, startpos=0, end = 0, endpos=0, delcount = -1;
46 boolean consecutive_del = false;
52 rlength=reference.length();
53 boolean modstart = true;
54 for (int i = 0; i < length; i++)
61 deletions[++delcount] = new int[]
66 deletions[delcount][1] = cursor - 1;
67 consecutive_del = true;
70 consecutive_del = false;
71 for (int r = 0; r < range[i]; r++)
78 consecutive_del = false;
85 if (reference!=null) {
86 int sbend = cursor+range[i];
88 sq.append(reference.substring(cursor, rlength));
89 while (sbend-- >= rlength)
94 sq.append(reference.substring(cursor, sbend));
103 throw new Error("Unknown SeqCigar operation '" + operation[i] + "'");
108 trunc_deletions = new int[delcount][];
109 System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
114 ((reference!=null) ? sq.toString() : null),
116 start, startpos, end, endpos}, trunc_deletions};
120 * turn a cigar string into a series of operation range pairs
121 * @param cigarString String
122 * @return object[] {char[] operation, int[] range}
123 * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations
125 public static Object[] parseCigarString(String cigarString)
129 for (int i = 0, l = cigarString.length(); i < l; i++)
131 char c = cigarString.charAt(i);
132 if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) ||
133 c == D || c == (D - _case_shift))
138 char[] operation = new char[ops];
139 int[] range = new int[ops];
141 int i = 0, l = cigarString.length();
148 c = cigarString.charAt(j++);
150 while (c >= '0' && c <= '9' && j < l);
151 if (j >= l && c >= '0' && c <= '9')
153 throw new Exception("Unterminated cigar string.");
157 String rangeint = cigarString.substring(i, j - 1);
158 range[op] = Integer.parseInt(rangeint);
163 throw new Error("Implementation bug in parseCigarString");
165 if (c >= 'a' && c <= 'z')
169 if ( (c == M || c == I || c == D))
175 throw new Exception("Unexpected operation '" + c +
176 "' in cigar string (position " + i + " in '" +
186 * inefficient add one operation to cigar string
190 public void addOperation(char op, int range)
192 if (op >= 'a' && op <= 'z')
196 if (op != M && op != D && op != I)
198 throw new Error("Implementation error. Invalid operation string.");
201 throw new Error("Invalid range string (must be non-zero positive number)");
203 if (operation == null)
205 this.operation = new char[_inc_length];
206 this.range = new int[_inc_length];
208 if (length + 1 == operation.length)
210 char[] ops = this.operation;
211 this.operation = new char[length + 1 + _inc_length];
212 System.arraycopy(ops, 0, this.operation, 0, length);
214 int[] rng = this.range;
215 this.range = new int[length + 1 + _inc_length];
216 System.arraycopy(rng, 0, this.range, 0, length);
219 if ((length>0) && (operation[length-1]==op))
220 length--; // modify existing operation.
222 this.range[length]=0; // reset range
223 this.operation[length] = op;
224 this.range[length++] += range;
228 * semi-efficient insert an operation on the current cigar string set at column pos (from 1)
229 * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment
230 * Deletion ops will shorten length of result - and affect registration of alignment
231 * Match ops will also affect length of result - affecting registration of alignment
232 * (ie "10M".insert(4,I,3)->"4M3I3M")
233 * (ie "10M".insert(4,D,3)->"4M3D3M")
234 * (ie "5I5M".insert(4,I,3)->"8I5M")
235 * (ie "5I5M".insert(4,D,3)->"4I3M")
236 * if pos is beyond width - I operations are added before the operation
237 * (ie "10M".insert(4,M,3)->"13M")
238 * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3
243 public void addOperationAt(int pos, char op, int range)
249 * sum of ranges in cigar string
250 * @return int number of residues hidden, matched, or gaps inserted into sequence
252 public int getFullWidth()
257 for (int i = 0; i < length; i++)
266 * Visible length of aligned sequence
267 * @return int length of including gaps and less hidden regions
269 public int getWidth()
274 for (int i = 0; i < length; i++)
276 if (operation[i] == M || operation[i] == I)
285 * mark a range of inserted residues
288 public void addInsertion(int range)
290 this.addOperation(I, range);
294 * mark the next range residues as hidden (not aligned) or deleted
297 public void addDeleted(int range)
299 this.addOperation(D, range);
303 * Modifies operation list to delete columns from start to end (inclusive)
304 * editing will remove insertion operations, and convert matches to deletions
305 * @param start alignment column
306 * @param end alignment column
307 * @return boolean true if residues were marked as deleted.
309 public boolean deleteRange(int start, int end)
311 boolean deleted = false;
312 int op = 0, prevop = -1, firstm = -1,
313 lastm = -1, postop = -1;
314 int width = 0; // zero'th column
317 // find operation bracketing start of the range
320 if (operation[op] != D)
322 width += range[prevop = op];
326 while (op < length && width < start);
330 // run off end - add more operations up to deletion.
331 addInsertion(start - width);
335 // edit existing operations.
337 width -= range[prevop];
338 int[] oldrange = range;
339 char[] oldops = operation;
340 range = new int[oldrange.length];
341 operation = new char[oldops.length];
346 if (operation[op] != D)
348 width += range[postop = op];
352 while (op < length && width <= end);
357 addDeleted(end - start + 1);
363 * Return an ENSEMBL style cigar string where D may indicates excluded parts of seq
364 * @return String of form ([0-9]+[IMD])+
366 public String getCigarstring()
368 StringBuffer cigarString = new StringBuffer();
369 for (int i = 0; i < length; i++)
371 cigarString.append("" + range[i]);
372 cigarString.append(operation[i]);
374 return cigarString.toString();