package jalview.datamodel; public abstract class CigarBase { /** * Base class for compact idiosyncratic representation of gaps and aligned residues * Regards to Tom Oldfield for his DynamicArray class. * 17th July 2006 * Not thread safe. */ public CigarBase() { // nothing to be done (probably) } protected int length = 0; protected int _inc_length = 10; // extension range for addition of new operations protected char[] operation = null; protected int[] range = null; /** * Range of Hidden residues in seq (translated as deleted in seq) */ public static final char D = 'D'; /** * Range of insertions to seq */ public static final char I = 'I'; /** * Range of aligned residues */ public static final char M = 'M'; static protected final char _case_shift = 'a' - 'A'; /** * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs * May return null for an empty cigar string. * May return null for deletion ranges if there are none. * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence) * @param GapChar - the symbol to use for Insert operations * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence */ public Object[] getSequenceAndDeletions(String reference, char GapChar) { int rlength=0; int[][] deletions = new int[length][]; int[][] trunc_deletions = null; StringBuffer sq = new StringBuffer(); int cursor = 0, alcursor=0,start = 0, startpos=0, end = 0, endpos=0, delcount = -1; boolean consecutive_del = false; if (length == 0) { return null; } if (reference!=null) rlength=reference.length(); boolean modstart = true; for (int i = 0; i < length; i++) { switch (operation[i]) { case D: if (!consecutive_del) { deletions[++delcount] = new int[] { cursor, 0, alcursor}; } cursor += range[i]; deletions[delcount][1] = cursor - 1; consecutive_del = true; break; case I: consecutive_del = false; for (int r = 0; r < range[i]; r++) { sq.append(GapChar); alcursor++; } break; case M: consecutive_del = false; if (modstart) { start = cursor; startpos=alcursor; modstart = false; } if (reference!=null) { int sbend = cursor+range[i]; if (sbend>=rlength) { sq.append(reference.substring(cursor, rlength)); while (sbend-- >= rlength) { sq.append(GapChar); } } else { sq.append(reference.substring(cursor, sbend)); } } alcursor+=range[i]; cursor += range[i]; end = cursor - 1; endpos = alcursor; break; default: throw new Error("Unknown SeqCigar operation '" + operation[i] + "'"); } } if (++delcount > 0) { trunc_deletions = new int[delcount][]; System.arraycopy(deletions, 0, trunc_deletions, 0, delcount); } deletions = null; return new Object[] { ((reference!=null) ? sq.toString() : null), new int[] { start, startpos, end, endpos}, trunc_deletions}; } /** * turn a cigar string into a series of operation range pairs * @param cigarString String * @return object[] {char[] operation, int[] range} * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations */ public static Object[] parseCigarString(String cigarString) throws Exception { int ops = 0; for (int i = 0, l = cigarString.length(); i < l; i++) { char c = cigarString.charAt(i); if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) || c == D || c == (D - _case_shift)) { ops++; } } char[] operation = new char[ops]; int[] range = new int[ops]; int op = 0; int i = 0, l = cigarString.length(); while (i < l) { char c; int j = i; do { c = cigarString.charAt(j++); } while (c >= '0' && c <= '9' && j < l); if (j >= l && c >= '0' && c <= '9') { throw new Exception("Unterminated cigar string."); } try { String rangeint = cigarString.substring(i, j - 1); range[op] = Integer.parseInt(rangeint); i = j; } catch (Exception e) { throw new Error("Implementation bug in parseCigarString"); } if (c >= 'a' && c <= 'z') { c -= _case_shift; } if ( (c == M || c == I || c == D)) { operation[op++] = c; } else { throw new Exception("Unexpected operation '" + c + "' in cigar string (position " + i + " in '" + cigarString + "'"); } } return new Object[] { operation, range}; } /** * inefficient add one operation to cigar string * @param op char * @param range int */ public void addOperation(char op, int range) { if (op >= 'a' && op <= 'z') { op -= _case_shift; } if (op != M && op != D && op != I) { throw new Error("Implementation error. Invalid operation string."); } if (range<=0) throw new Error("Invalid range string (must be non-zero positive number)"); int lngth = 0; if (operation == null) { this.operation = new char[_inc_length]; this.range = new int[_inc_length]; } if (length + 1 == operation.length) { char[] ops = this.operation; this.operation = new char[length + 1 + _inc_length]; System.arraycopy(ops, 0, this.operation, 0, length); ops = null; int[] rng = this.range; this.range = new int[length + 1 + _inc_length]; System.arraycopy(rng, 0, this.range, 0, length); rng = null; } if ((length>0) && (operation[length-1]==op)) length--; // modify existing operation. else this.range[length]=0; // reset range this.operation[length] = op; this.range[length++] += range; } /** * semi-efficient insert an operation on the current cigar string set at column pos (from 1) * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment * Deletion ops will shorten length of result - and affect registration of alignment * Match ops will also affect length of result - affecting registration of alignment * (ie "10M".insert(4,I,3)->"4M3I3M") * (ie "10M".insert(4,D,3)->"4M3D3M") * (ie "5I5M".insert(4,I,3)->"8I5M") * (ie "5I5M".insert(4,D,3)->"4I3M") * if pos is beyond width - I operations are added before the operation * (ie "10M".insert(4,M,3)->"13M") * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3 * @param pos int * @param op char * @param range int */ public void addOperationAt(int pos, char op, int range) { } /** * sum of ranges in cigar string * @return int number of residues hidden, matched, or gaps inserted into sequence */ public int getFullWidth() { int w = 0; if (range != null) { for (int i = 0; i < length; i++) { w += range[i]; } } return w; } /** * Visible length of aligned sequence * @return int length of including gaps and less hidden regions */ public int getWidth() { int w = 0; if (range != null) { for (int i = 0; i < length; i++) { if (operation[i] == M || operation[i] == I) { w += range[i]; } } } return w; } /** * mark a range of inserted residues * @param range int */ public void addInsertion(int range) { this.addOperation(I, range); } /** * mark the next range residues as hidden (not aligned) or deleted * @param range int */ public void addDeleted(int range) { this.addOperation(D, range); } /** * Modifies operation list to delete columns from start to end (inclusive) * editing will remove insertion operations, and convert matches to deletions * @param start alignment column * @param end alignment column * @return boolean true if residues were marked as deleted. */ public boolean deleteRange(int start, int end) { boolean deleted = false; int op = 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width = 0; // zero'th column if (length > 0) { // find operation bracketing start of the range do { if (operation[op] != D) { width += range[prevop = op]; } op++; } while (op < length && width < start); } if (width < start) { // run off end - add more operations up to deletion. addInsertion(start - width); } else { // edit existing operations. op = prevop; width -= range[prevop]; int[] oldrange = range; char[] oldops = operation; range = new int[oldrange.length]; operation = new char[oldops.length]; if (op < length) { do { if (operation[op] != D) { width += range[postop = op]; } op++; } while (op < length && width <= end); } } if (deleted == true) { addDeleted(end - start + 1); } return deleted; } /** * Return an ENSEMBL style cigar string where D may indicates excluded parts of seq * @return String of form ([0-9]+[IMD])+ */ public String getCigarstring() { StringBuffer cigarString = new StringBuffer(); for (int i = 0; i < length; i++) { cigarString.append("" + range[i]); cigarString.append(operation[i]); } return cigarString.toString(); } }