From e2c91fd4e77e2be8942eb872e5b3a30cabb6a9bc Mon Sep 17 00:00:00 2001 From: amwaterhouse Date: Thu, 10 Aug 2006 09:50:14 +0000 Subject: [PATCH] 1.1 compatible --- src/jalview/datamodel/CigarBase.java | 1268 +++++++++++++++++----------------- 1 file changed, 634 insertions(+), 634 deletions(-) diff --git a/src/jalview/datamodel/CigarBase.java b/src/jalview/datamodel/CigarBase.java index 1e0705a..24c568b 100644 --- a/src/jalview/datamodel/CigarBase.java +++ b/src/jalview/datamodel/CigarBase.java @@ -1,634 +1,634 @@ -package jalview.datamodel; - -import java.util.Vector; - -public abstract class CigarBase -{ - /** - * Base class for compact idiosyncratic representation of gaps and aligned residues - * Regards to Tom Oldfield for his DynamicArray class. - * 17th July 2006 - * Not thread safe. - */ - public CigarBase() - { - // nothing to be done (probably) - } - - protected int length = 0; - protected int _inc_length = 10; // extension range for addition of new operations - protected char[] operation = null; - protected int[] range = null; - /** - * Range of Hidden residues in seq (translated as deleted in seq) - */ - public static final char D = 'D'; /** - * Range of insertions to seq - */ - public static final char I = 'I'; /** - * Range of aligned residues - */ - public static final char M = 'M'; - static protected final char _case_shift = 'a' - 'A'; - /** - * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs - * May return null for an empty cigar string. - * May return null for deletion ranges if there are none. - * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence) - * @param GapChar - the symbol to use for Insert operations - * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence - */ - public Object[] getSequenceAndDeletions(String reference, char GapChar) - { - int rlength=0; - int[][] deletions = new int[length][]; - int[][] trunc_deletions = null; - StringBuffer sq = new StringBuffer(); - int cursor = 0, alcursor=0,start = 0, startpos=0, end = 0, endpos=0, delcount = -1; - boolean consecutive_del = false; - if (length == 0) - { - return null; - } - if (reference!=null) - rlength=reference.length(); - boolean modstart = true; - for (int i = 0; i < length; i++) - { - switch (operation[i]) - { - case D: - if (!consecutive_del) - { - deletions[++delcount] = new int[] - { - cursor, 0, alcursor}; - } - cursor += range[i]; - deletions[delcount][1] = cursor - 1; - consecutive_del = true; - break; - case I: - consecutive_del = false; - for (int r = 0; r < range[i]; r++) - { - sq.append(GapChar); - alcursor++; - } - break; - case M: - consecutive_del = false; - if (modstart) - { - start = cursor; - startpos=alcursor; - modstart = false; - } - if (reference!=null) { - int sbend = cursor+range[i]; - if (sbend>rlength) { - sq.append(reference.substring(cursor, rlength)); - while (sbend-- >= rlength) - { - sq.append(GapChar); - } - } else { - sq.append(reference.substring(cursor, sbend)); - } - } - alcursor+=range[i]; - cursor += range[i]; - end = cursor - 1; - endpos = alcursor; - break; - default: - throw new Error("Unknown SeqCigar operation '" + operation[i] + "'"); - } - } - if (++delcount > 0) - { - trunc_deletions = new int[delcount][]; - System.arraycopy(deletions, 0, trunc_deletions, 0, delcount); - } - deletions = null; - return new Object[] - { - ((reference!=null) ? sq.toString() : null), - new int[] { - start, startpos, end, endpos}, trunc_deletions}; - } - protected void compact_operations() { - int i=1; - if (operation==null) - return; - char last = operation[0]; - while (i0) { - System.arraycopy(range, i + 1, range, i, r); - System.arraycopy(operation, i + 1, operation, i, r); - } - length--; - } else { - last = operation[i++]; - } - } - } - /** - * turn a cigar string into a series of operation range pairs - * @param cigarString String - * @return object[] {char[] operation, int[] range} - * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations - */ - public static Object[] parseCigarString(String cigarString) - throws Exception - { - int ops = 0; - for (int i = 0, l = cigarString.length(); i < l; i++) - { - char c = cigarString.charAt(i); - if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) || - c == D || c == (D - _case_shift)) - { - ops++; - } - } - char[] operation = new char[ops]; - int[] range = new int[ops]; - int op = 0; - int i = 0, l = cigarString.length(); - while (i < l) - { - char c; - int j = i; - do - { - c = cigarString.charAt(j++); - } - while (c >= '0' && c <= '9' && j < l); - if (j >= l && c >= '0' && c <= '9') - { - throw new Exception("Unterminated cigar string."); - } - try - { - String rangeint = cigarString.substring(i, j - 1); - range[op] = Integer.parseInt(rangeint); - i = j; - } - catch (Exception e) - { - throw new Error("Implementation bug in parseCigarString"); - } - if (c >= 'a' && c <= 'z') - { - c -= _case_shift; - } - if ( (c == M || c == I || c == D)) - { - operation[op++] = c; - } - else - { - throw new Exception("Unexpected operation '" + c + - "' in cigar string (position " + i + " in '" + - cigarString + "'"); - } - } - return new Object[] - { - operation, range}; - } - - /** - * add an operation to cigar string - * @param op char - * @param range int - */ - public void addOperation(char op, int range) - { - if (op >= 'a' && op <= 'z') - { - op -= _case_shift; - } - if (op != M && op != D && op != I) - { - throw new Error("Implementation error. Invalid operation string."); - } - if (range<=0) - throw new Error("Invalid range string (must be non-zero positive number)"); - int lngth = 0; - if (operation == null) - { - this.operation = new char[_inc_length]; - this.range = new int[_inc_length]; - } - if (length + 1 == operation.length) - { - char[] ops = this.operation; - this.operation = new char[length + 1 + _inc_length]; - System.arraycopy(ops, 0, this.operation, 0, length); - ops = null; - int[] rng = this.range; - this.range = new int[length + 1 + _inc_length]; - System.arraycopy(rng, 0, this.range, 0, length); - rng = null; - } - if ((length>0) && (operation[length-1]==op)) - length--; // modify existing operation. - else - this.range[length]=0; // reset range - this.operation[length] = op; - this.range[length++] += range; - } - - /** - * semi-efficient insert an operation on the current cigar string set at column pos (from 1) - * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment - * Deletion ops will shorten length of result - and affect registration of alignment - * Match ops will also affect length of result - affecting registration of alignment - * (ie "10M".insert(4,I,3)->"4M3I3M") - (replace?) - * (ie "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) - * (ie "5I5M".insert(4,I,3)->"8I5M") - real insertion - * (ie "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms changed to Ds - * (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to M, Ds changed to M. - * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3 - * ( "10D5M".insert(-1,M,3)->"3M7D5M") - * ( "10D5M".insert(0,M,3)->"7D8M") - * ( "10D5M".insert(1,M,3)->"10D8M") - * - * ( "1M10D5M".insert(0,M,3)->"1M10D8M") - * ( "1M10D5M".insert(1,M,3)->" - * - * if pos is beyond width - I operations are added before the operation - * @param pos int -1, 0-length of visible region, or greater to append new ops (with insertions in between) - * @param op char - * @param range int - public void addOperationAt(int pos, char op, int range) - { - int cursor = -1; // mark the position for the current operation being edited. - int o = 0; - boolean last_d = false; // previous op was a deletion. - if (pos < -1) - throw new Error("pos<-1 is not supported."); - while (o0) - addInsertion(pos-cursor); - // then just add the new operation. Regardless of what it is. - addOperation(op, range); - } else { - int diff = pos - cursor; - - int e_length = length-o; // new edit operation array length. - // diff<0 - can only happen before first insertion or match. - affects op and all following - // dif==0 - only when at first position of existing op - - // diff>0 - must preserve some existing operations - int[] e_range = new int[e_length]; - System.arraycopy(this.range, o, e_range, 0, e_length); - char[] e_op = new char[e_length]; - System.arraycopy(this.operation, o, e_op, 0, e_length); - length = o; // can now use add_operation to extend list. - int e_o=0; // current operation being edited. - switch (op) { - case M: - switch (e_op[e_o]) - { - case M: - if (last_d && diff <= 0) - { - // reduce D's, if possible - if (range<=this.range[o-1]) { - this.range[o - 1] -= range; - } else { - this.range[o-1]=0; - } - if (this.range[o-1]==0) - o--; // lose this op. - } - e_range[e_o] += range; // just add more matched residues - break; - case I: - // change from insertion to match - if (last_d && diff<=0) - { - // reduce D's, if possible - if (range<=this.range[o-1]) { - this.range[o - 1] -= range; - } else { - this.range[o-1]=0; - } - if (this.range[o-1]==0) - o--; // lose this op. - } - e_range[e_o] - break; - default: - throw new Inp - } - - break; - case I: - break; - case D: - } - break; - default: - throw new Error("Implementation Error: Unknown operation in addOperation!"); - } - // finally, add remaining ops. - while (e_oend) - throw new Error("Implementation Error: deleteRange out of bounds: start must be non-negative and less than end."); - // find beginning - int cursor = 0; // mark the position for the current operation being edited. - int rlength=1+end-start; // number of positions to delete - int oldlen=length; - int o = 0; - boolean editing=false; - char[] oldops = operation; - int[] oldrange = range; - length=0; - operation = null; - range = null; - compact_operations(); - while (o0) { - if (oldops[o] == D) { - // absorbed into new deleted region. - addDeleted(oldrange[o++]); - continue; - } - - int remain = oldrange[o]; // number of op characters left to edit - if (!editing) { - if ( (cursor + remain) <= start) - { - addOperation(oldops[o],oldrange[o]); - cursor+=oldrange[o++]; - continue; // next operation - } - editing=true; - // add operations before hidden region - if (start-cursor>0) { - addOperation(oldops[o], start- cursor); - remain -= start - cursor; - } - } - // start inserting new ops - if (o0 && remain>0) { - switch (oldops[o]) { - case M: - if (rlength>remain) { - addDeleted(remain); - deleted+=remain; - } else { - deleted+=rlength; - addDeleted(rlength); - if (remain-rlength>0) - this.addOperation(M,remain-rlength); // add remaining back. - rlength=0; - remain=0; - } - break; - case I: - if (remain-rlength>0) { - // only remove some gaps - addInsertion(remain-rlength); - rlength=0; - } - break; - case D: - throw new Error("Implementation error."); // do nothing; - default: - throw new Error("Implementation Error! Unknown operation '"+oldops[o]+"'"); - } - rlength-=remain; - remain = oldrange[++o]; // number of op characters left to edit - } - } - // add remaining - while (o