-/*\r
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)\r
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
- * \r
- * This file is part of Jalview.\r
- * \r
- * Jalview is free software: you can redistribute it and/or\r
- * modify it under the terms of the GNU General Public License \r
- * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.\r
- * \r
- * Jalview is distributed in the hope that it will be useful, but \r
- * WITHOUT ANY WARRANTY; without even the implied warranty \r
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR \r
- * PURPOSE. See the GNU General Public License for more details.\r
- * \r
- * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.\r
- */\r
-package jalview.datamodel;\r
-\r
-import java.util.*;\r
-\r
-public abstract class CigarBase\r
-{\r
- /**\r
- * Base class for compact idiosyncratic representation of gaps and aligned\r
- * residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006\r
- * Not thread safe.\r
- */\r
- public CigarBase()\r
- {\r
- // nothing to be done (probably)\r
- }\r
-\r
- protected int length = 0;\r
-\r
- protected int _inc_length = 10; // extension range for addition of new\r
-\r
- // operations\r
-\r
- protected char[] operation = null;\r
-\r
- protected int[] range = null;\r
-\r
- /**\r
- * Range of Hidden residues in seq (translated as deleted in seq)\r
- */\r
- public static final char D = 'D';\r
-\r
- /**\r
- * Range of insertions to seq\r
- */\r
- public static final char I = 'I';\r
-\r
- /**\r
- * Range of aligned residues\r
- */\r
- public static final char M = 'M';\r
-\r
- static protected final char _case_shift = 'a' - 'A';\r
-\r
- /**\r
- * Ugly function to get edited sequence string, start and end symbol positions\r
- * and the deletion regions as an array of int pairs May return null for an\r
- * empty cigar string. May return null for deletion ranges if there are none.\r
- * \r
- * @param reference\r
- * - the symbol sequence to apply the cigar operations to (or null if\r
- * no sequence)\r
- * @param GapChar\r
- * - the symbol to use for Insert operations\r
- * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3]\r
- * {start, end, col} or null} the gapped sequence, first and last\r
- * residue index, and the deletion ranges on the reference sequence\r
- */\r
- public Object[] getSequenceAndDeletions(String reference, char GapChar)\r
- {\r
- int rlength = 0;\r
- int[][] deletions = new int[length][];\r
- int[][] trunc_deletions = null;\r
- StringBuffer sq = new StringBuffer();\r
- int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0, endpos = 0, delcount = -1;\r
- boolean consecutive_del = false;\r
- if (length == 0)\r
- {\r
- return null;\r
- }\r
- if (reference != null)\r
- {\r
- rlength = reference.length();\r
- }\r
- boolean modstart = true;\r
- for (int i = 0; i < length; i++)\r
- {\r
- switch (operation[i])\r
- {\r
- case D:\r
- if (!consecutive_del)\r
- {\r
- deletions[++delcount] = new int[]\r
- { cursor, 0, alcursor };\r
- }\r
- cursor += range[i];\r
- deletions[delcount][1] = cursor - 1;\r
- consecutive_del = true;\r
- break;\r
- case I:\r
- consecutive_del = false;\r
- for (int r = 0; r < range[i]; r++)\r
- {\r
- sq.append(GapChar);\r
- alcursor++;\r
- }\r
- break;\r
- case M:\r
- consecutive_del = false;\r
- if (modstart)\r
- {\r
- start = cursor;\r
- startpos = alcursor;\r
- modstart = false;\r
- }\r
- if (reference != null)\r
- {\r
- int sbend = cursor + range[i];\r
- if (sbend > rlength)\r
- {\r
- sq.append(reference.substring(cursor, rlength));\r
- while (sbend-- >= rlength)\r
- {\r
- sq.append(GapChar);\r
- }\r
- }\r
- else\r
- {\r
- sq.append(reference.substring(cursor, sbend));\r
- }\r
- }\r
- alcursor += range[i];\r
- cursor += range[i];\r
- end = cursor - 1;\r
- endpos = alcursor;\r
- break;\r
- default:\r
- throw new Error("Unknown SeqCigar operation '" + operation[i] + "'");\r
- }\r
- }\r
- if (++delcount > 0)\r
- {\r
- trunc_deletions = new int[delcount][];\r
- System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);\r
- }\r
- deletions = null;\r
- return new Object[]\r
- { ((reference != null) ? sq.toString() : null), new int[]\r
- { start, startpos, end, endpos }, trunc_deletions };\r
- }\r
-\r
- protected void compact_operations()\r
- {\r
- int i = 1;\r
- if (operation == null)\r
- {\r
- return;\r
- }\r
- char last = operation[0];\r
- while (i < length)\r
- {\r
- if (last == operation[i])\r
- {\r
- range[i - 1] += range[i];\r
- int r = length - i;\r
- if (r > 0)\r
- {\r
- System.arraycopy(range, i + 1, range, i, r);\r
- System.arraycopy(operation, i + 1, operation, i, r);\r
- }\r
- length--;\r
- }\r
- else\r
- {\r
- last = operation[i++];\r
- }\r
- }\r
- }\r
-\r
- /**\r
- * turn a cigar string into a series of operation range pairs\r
- * \r
- * @param cigarString\r
- * String\r
- * @return object[] {char[] operation, int[] range}\r
- * @throws java.lang.Exception\r
- * for improperly formated cigar strings or ones with unknown\r
- * operations\r
- */\r
- public static Object[] parseCigarString(String cigarString)\r
- throws Exception\r
- {\r
- int ops = 0;\r
- for (int i = 0, l = cigarString.length(); i < l; i++)\r
- {\r
- char c = cigarString.charAt(i);\r
- if (c == M || c == (M - _case_shift) || c == I\r
- || c == (I - _case_shift) || c == D || c == (D - _case_shift))\r
- {\r
- ops++;\r
- }\r
- }\r
- char[] operation = new char[ops];\r
- int[] range = new int[ops];\r
- int op = 0;\r
- int i = 0, l = cigarString.length();\r
- while (i < l)\r
- {\r
- char c;\r
- int j = i;\r
- do\r
- {\r
- c = cigarString.charAt(j++);\r
- } while (c >= '0' && c <= '9' && j < l);\r
- if (j >= l && c >= '0' && c <= '9')\r
- {\r
- throw new Exception("Unterminated cigar string.");\r
- }\r
- try\r
- {\r
- String rangeint = cigarString.substring(i, j - 1);\r
- range[op] = Integer.parseInt(rangeint);\r
- i = j;\r
- } catch (Exception e)\r
- {\r
- throw new Error("Implementation bug in parseCigarString");\r
- }\r
- if (c >= 'a' && c <= 'z')\r
- {\r
- c -= _case_shift;\r
- }\r
- if ((c == M || c == I || c == D))\r
- {\r
- operation[op++] = c;\r
- }\r
- else\r
- {\r
- throw new Exception("Unexpected operation '" + c\r
- + "' in cigar string (position " + i + " in '"\r
- + cigarString + "'");\r
- }\r
- }\r
- return new Object[]\r
- { operation, range };\r
- }\r
-\r
- /**\r
- * add an operation to cigar string\r
- * \r
- * @param op\r
- * char\r
- * @param range\r
- * int\r
- */\r
- public void addOperation(char op, int range)\r
- {\r
- if (op >= 'a' && op <= 'z')\r
- {\r
- op -= _case_shift;\r
- }\r
- if (op != M && op != D && op != I)\r
- {\r
- throw new Error("Implementation error. Invalid operation string.");\r
- }\r
- if (range <= 0)\r
- {\r
- throw new Error(\r
- "Invalid range string (must be non-zero positive number)");\r
- }\r
- int lngth = 0;\r
- if (operation == null)\r
- {\r
- this.operation = new char[_inc_length];\r
- this.range = new int[_inc_length];\r
- }\r
- if (length + 1 == operation.length)\r
- {\r
- char[] ops = this.operation;\r
- this.operation = new char[length + 1 + _inc_length];\r
- System.arraycopy(ops, 0, this.operation, 0, length);\r
- ops = null;\r
- int[] rng = this.range;\r
- this.range = new int[length + 1 + _inc_length];\r
- System.arraycopy(rng, 0, this.range, 0, length);\r
- rng = null;\r
- }\r
- if ((length > 0) && (operation[length - 1] == op))\r
- {\r
- length--; // modify existing operation.\r
- }\r
- else\r
- {\r
- this.range[length] = 0; // reset range\r
- }\r
- this.operation[length] = op;\r
- this.range[length++] += range;\r
- }\r
-\r
- /**\r
- * semi-efficient insert an operation on the current cigar string set at\r
- * column pos (from 1) NOTE: Insertion operations simply extend width of cigar\r
- * result - affecting registration of alignment Deletion ops will shorten\r
- * length of result - and affect registration of alignment Match ops will also\r
- * affect length of result - affecting registration of alignment (ie\r
- * "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie\r
- * "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie\r
- * "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie\r
- * "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms\r
- * changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to\r
- * M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts\r
- * sequence left by 1 residue and extends it by 3 (\r
- * "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") (\r
- * "10D5M".insert(1,M,3)->"10D8M") ( "1M10D5M".insert(0,M,3)->"1M10D8M") (\r
- * "1M10D5M".insert(1,M,3)->"\r
- * \r
- * if pos is beyond width - I operations are added before the operation\r
- * \r
- * @param pos\r
- * int -1, 0-length of visible region, or greater to append new ops\r
- * (with insertions in between)\r
- * @param op\r
- * char\r
- * @param range\r
- * int public void addOperationAt(int pos, char op, int range) { int\r
- * cursor = -1; // mark the position for the current operation being\r
- * edited. int o = 0; boolean last_d = false; // previous op was a\r
- * deletion. if (pos < -1) throw new\r
- * Error("pos<-1 is not supported."); while (o<length) { if\r
- * (operation[o] != D) { if ( (cursor + this.range[o]) < pos) {\r
- * cursor += this.range[o]; o++; last_d=false; } else { break; } }\r
- * else { last_d=true; o++; } } if (o==length) { // must insert more\r
- * operations before pos if (pos-cursor>0) addInsertion(pos-cursor);\r
- * // then just add the new operation. Regardless of what it is.\r
- * addOperation(op, range); } else { int diff = pos - cursor;\r
- * \r
- * int e_length = length-o; // new edit operation array length. //\r
- * diff<0 - can only happen before first insertion or match. -\r
- * affects op and all following // dif==0 - only when at first\r
- * position of existing op - // diff>0 - must preserve some existing\r
- * operations int[] e_range = new int[e_length];\r
- * System.arraycopy(this.range, o, e_range, 0, e_length); char[] e_op\r
- * = new char[e_length]; System.arraycopy(this.operation, o, e_op, 0,\r
- * e_length); length = o; // can now use add_operation to extend\r
- * list. int e_o=0; // current operation being edited. switch (op) {\r
- * case M: switch (e_op[e_o]) { case M: if (last_d && diff <= 0) { //\r
- * reduce D's, if possible if (range<=this.range[o-1]) { this.range[o\r
- * - 1] -= range; } else { this.range[o-1]=0; } if\r
- * (this.range[o-1]==0) o--; // lose this op. } e_range[e_o] +=\r
- * range; // just add more matched residues break; case I: // change\r
- * from insertion to match if (last_d && diff<=0) { // reduce D's, if\r
- * possible if (range<=this.range[o-1]) { this.range[o - 1] -= range;\r
- * } else { this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose\r
- * this op. } e_range[e_o] break; default: throw new Inp }\r
- * \r
- * break; case I: break; case D: } break; default: throw new\r
- * Error("Implementation Error: Unknown operation in addOperation!");\r
- * } // finally, add remaining ops. while (e_o<e_length) {\r
- * addOperation(e_op[e_o], e_range[e_o]); e_o++; } } }\r
- */\r
- /**\r
- * Mark residues from start to end (inclusive) as deleted from the alignment,\r
- * and removes any insertions.\r
- * \r
- * @param start\r
- * int\r
- * @param end\r
- * int\r
- * @return deleted int - number of symbols marked as deleted\r
- */\r
- public int deleteRange(int start, int end)\r
- {\r
- int deleted = 0;\r
- if (length == 0)\r
- {\r
- // nothing to do here\r
- return deleted;\r
- }\r
- if (start < 0 || start > end)\r
- {\r
- throw new Error(\r
- "Implementation Error: deleteRange out of bounds: start must be non-negative and less than end.");\r
- }\r
- // find beginning\r
- int cursor = 0; // mark the position for the current operation being edited.\r
- int rlength = 1 + end - start; // number of positions to delete\r
- int oldlen = length;\r
- int o = 0;\r
- boolean editing = false;\r
- char[] oldops = operation;\r
- int[] oldrange = range;\r
- length = 0;\r
- operation = null;\r
- range = null;\r
- compact_operations();\r
- while (o < oldlen && cursor <= end && rlength > 0)\r
- {\r
- if (oldops[o] == D)\r
- {\r
- // absorbed into new deleted region.\r
- addDeleted(oldrange[o++]);\r
- continue;\r
- }\r
-\r
- int remain = oldrange[o]; // number of op characters left to edit\r
- if (!editing)\r
- {\r
- if ((cursor + remain) <= start)\r
- {\r
- addOperation(oldops[o], oldrange[o]);\r
- cursor += oldrange[o++];\r
- continue; // next operation\r
- }\r
- editing = true;\r
- // add operations before hidden region\r
- if (start - cursor > 0)\r
- {\r
- addOperation(oldops[o], start - cursor);\r
- remain -= start - cursor;\r
- }\r
- }\r
- // start inserting new ops\r
- if (o < oldlen && editing && rlength > 0 && remain > 0)\r
- {\r
- switch (oldops[o])\r
- {\r
- case M:\r
- if (rlength > remain)\r
- {\r
- addDeleted(remain);\r
- deleted += remain;\r
- }\r
- else\r
- {\r
- deleted += rlength;\r
- addDeleted(rlength);\r
- if (remain - rlength > 0)\r
- {\r
- this.addOperation(M, remain - rlength); // add remaining back.\r
- }\r
- rlength = 0;\r
- remain = 0;\r
- }\r
- break;\r
- case I:\r
- if (remain - rlength > 0)\r
- {\r
- // only remove some gaps\r
- addInsertion(remain - rlength);\r
- rlength = 0;\r
- }\r
- break;\r
- case D:\r
- throw new Error("Implementation error."); // do nothing;\r
- default:\r
- throw new Error("Implementation Error! Unknown operation '"\r
- + oldops[o] + "'");\r
- }\r
- rlength -= remain;\r
- remain = oldrange[++o]; // number of op characters left to edit\r
- }\r
- }\r
- // add remaining\r
- while (o < oldlen)\r
- {\r
- addOperation(oldops[o], oldrange[o++]);\r
- }\r
- // if (cursor<(start+1)) {\r
- // ran out of ops - nothing to do here ?\r
- // addInsertion(start-cursor);\r
- // }\r
- return deleted;\r
- }\r
-\r
- /**\r
- * Deleted regions mean that there will be discontinuous sequence numbering in\r
- * the sequence returned by getSeq(char).\r
- * \r
- * @return true if there deletions\r
- */\r
- public boolean hasDeletedRegions()\r
- {\r
- for (int i = 0; i < length; i++)\r
- {\r
- if (operation[i] == D)\r
- {\r
- return true;\r
- }\r
- }\r
- return false;\r
- }\r
-\r
- /**\r
- * enumerate the ranges on seq that are marked as deleted in this cigar\r
- * \r
- * @return int[] { vis_start, sym_start, length }\r
- */\r
- public int[] getDeletedRegions()\r
- {\r
- if (length == 0)\r
- {\r
- return null;\r
- }\r
- Vector dr = new Vector();\r
- int cursor = 0, vcursor = 0;\r
- for (int i = 0; i < length; i++)\r
- {\r
- switch (operation[i])\r
- {\r
- case M:\r
- cursor += range[i];\r
- case I:\r
- vcursor += range[i];\r
- break;\r
- case D:\r
- dr.addElement(new int[]\r
- { vcursor, cursor, range[i] });\r
- cursor += range[i];\r
- }\r
- }\r
- if (dr.size() == 0)\r
- {\r
- return null;\r
- }\r
- int[] delregions = new int[dr.size() * 3];\r
- for (int i = 0, l = dr.size(); i < l; i++)\r
- {\r
- int[] reg = (int[]) dr.elementAt(i);\r
- delregions[i * 3] = reg[0];\r
- delregions[i * 3 + 1] = reg[1];\r
- delregions[i * 3 + 2] = reg[2];\r
- }\r
- return delregions;\r
- }\r
-\r
- /**\r
- * sum of ranges in cigar string\r
- * \r
- * @return int number of residues hidden, matched, or gaps inserted into\r
- * sequence\r
- */\r
- public int getFullWidth()\r
- {\r
- int w = 0;\r
- if (range != null)\r
- {\r
- for (int i = 0; i < length; i++)\r
- {\r
- w += range[i];\r
- }\r
- }\r
- return w;\r
- }\r
-\r
- /**\r
- * Visible length of aligned sequence\r
- * \r
- * @return int length of including gaps and less hidden regions\r
- */\r
- public int getWidth()\r
- {\r
- int w = 0;\r
- if (range != null)\r
- {\r
- for (int i = 0; i < length; i++)\r
- {\r
- if (operation[i] == M || operation[i] == I)\r
- {\r
- w += range[i];\r
- }\r
- }\r
- }\r
- return w;\r
- }\r
-\r
- /**\r
- * mark a range of inserted residues\r
- * \r
- * @param range\r
- * int\r
- */\r
- public void addInsertion(int range)\r
- {\r
- this.addOperation(I, range);\r
- }\r
-\r
- /**\r
- * mark the next range residues as hidden (not aligned) or deleted\r
- * \r
- * @param range\r
- * int\r
- */\r
- public void addDeleted(int range)\r
- {\r
- this.addOperation(D, range);\r
- }\r
-\r
- /**\r
- * Modifies operation list to delete columns from start to end (inclusive)\r
- * editing will remove insertion operations, and convert matches to deletions\r
- * \r
- * @param start\r
- * alignment column\r
- * @param end\r
- * alignment column\r
- * @return boolean true if residues were marked as deleted. public boolean\r
- * deleteRange(int start, int end) { boolean deleted = false; int op =\r
- * 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width =\r
- * 0; // zero'th column if (length > 0) { // find operation bracketing\r
- * start of the range do { if (operation[op] != D) { width +=\r
- * range[prevop = op]; } op++; } while (op < length && width < start);\r
- * } if (width < start) { // run off end - add more operations up to\r
- * deletion. addInsertion(start - width); } else { // edit existing\r
- * operations. op = prevop; width -= range[prevop]; int[] oldrange =\r
- * range; char[] oldops = operation; range = new int[oldrange.length];\r
- * operation = new char[oldops.length]; if (op < length) { do { if\r
- * (operation[op] != D) { width += range[postop = op]; } op++; } while\r
- * (op < length && width <= end); } } if (deleted == true) {\r
- * addDeleted(end - start + 1); } return deleted; }\r
- */\r
- /**\r
- * Return an ENSEMBL style cigar string where D may indicates excluded parts\r
- * of seq\r
- * \r
- * @return String of form ([0-9]+[IMD])+\r
- */\r
- public String getCigarstring()\r
- {\r
- StringBuffer cigarString = new StringBuffer();\r
- for (int i = 0; i < length; i++)\r
- {\r
- cigarString.append("" + range[i]);\r
- cigarString.append(operation[i]);\r
- }\r
- return cigarString.toString();\r
- }\r
-}\r
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.datamodel;
+
+import jalview.util.MessageManager;
+
+import java.util.Vector;
+
+public abstract class CigarBase
+{
+ /**
+ * Base class for compact idiosyncratic representation of gaps and aligned
+ * residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006
+ * Not thread safe.
+ */
+ public CigarBase()
+ {
+ // nothing to be done (probably)
+ }
+
+ protected int length = 0;
+
+ protected int _inc_length = 10; // extension range for addition of new
+
+ // operations
+
+ protected char[] operation = null;
+
+ protected int[] range = null;
+
+ /**
+ * Range of Hidden residues in seq (translated as deleted in seq)
+ */
+ public static final char D = 'D';
+
+ /**
+ * Range of insertions to seq
+ */
+ public static final char I = 'I';
+
+ /**
+ * Range of aligned residues
+ */
+ public static final char M = 'M';
+
+ static protected final char _case_shift = 'a' - 'A';
+
+ /**
+ * Ugly function to get edited sequence string, start and end symbol positions
+ * and the deletion regions as an array of int pairs May return null for an
+ * empty cigar string. May return null for deletion ranges if there are none.
+ *
+ * @param reference
+ * - the symbol sequence to apply the cigar operations to (or null if
+ * no sequence)
+ * @param GapChar
+ * - the symbol to use for Insert operations
+ * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3]
+ * {start, end, col} or null} the gapped sequence, first and last
+ * residue index, and the deletion ranges on the reference sequence
+ */
+ public Object[] getSequenceAndDeletions(String reference, char GapChar)
+ {
+ int rlength = 0;
+ int[][] deletions = new int[length][];
+ int[][] trunc_deletions = null;
+ StringBuffer sq = new StringBuffer();
+ int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0,
+ endpos = 0, delcount = -1;
+ boolean consecutive_del = false;
+ if (length == 0)
+ {
+ return null;
+ }
+ if (reference != null)
+ {
+ rlength = reference.length();
+ }
+ boolean modstart = true;
+ for (int i = 0; i < length; i++)
+ {
+ switch (operation[i])
+ {
+ case D:
+ if (!consecutive_del)
+ {
+ deletions[++delcount] = new int[] { cursor, 0, alcursor };
+ }
+ cursor += range[i];
+ deletions[delcount][1] = cursor - 1;
+ consecutive_del = true;
+ break;
+ case I:
+ consecutive_del = false;
+ for (int r = 0; r < range[i]; r++)
+ {
+ sq.append(GapChar);
+ alcursor++;
+ }
+ break;
+ case M:
+ consecutive_del = false;
+ if (modstart)
+ {
+ start = cursor;
+ startpos = alcursor;
+ modstart = false;
+ }
+ if (reference != null)
+ {
+ int sbend = cursor + range[i];
+ if (sbend > rlength)
+ {
+ sq.append(reference.substring(cursor, rlength));
+ while (sbend-- >= rlength)
+ {
+ sq.append(GapChar);
+ }
+ }
+ else
+ {
+ sq.append(reference.substring(cursor, sbend));
+ }
+ }
+ alcursor += range[i];
+ cursor += range[i];
+ end = cursor - 1;
+ endpos = alcursor;
+ break;
+ default:
+ throw new Error(MessageManager.formatMessage(
+ "error.unknown_seq_cigar_operation", new String[]
+ { new StringBuffer(operation[i]).toString() }));
+ }
+ }
+ if (++delcount > 0)
+ {
+ trunc_deletions = new int[delcount][];
+ System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
+ }
+ deletions = null;
+ return new Object[] { ((reference != null) ? sq.toString() : null),
+ new int[]
+ { start, startpos, end, endpos }, trunc_deletions };
+ }
+
+ protected void compact_operations()
+ {
+ int i = 1;
+ if (operation == null)
+ {
+ return;
+ }
+ char last = operation[0];
+ while (i < length)
+ {
+ if (last == operation[i])
+ {
+ range[i - 1] += range[i];
+ int r = length - i;
+ if (r > 0)
+ {
+ System.arraycopy(range, i + 1, range, i, r);
+ System.arraycopy(operation, i + 1, operation, i, r);
+ }
+ length--;
+ }
+ else
+ {
+ last = operation[i++];
+ }
+ }
+ }
+
+ /**
+ * turn a cigar string into a series of operation range pairs
+ *
+ * @param cigarString
+ * String
+ * @return object[] {char[] operation, int[] range}
+ * @throws java.lang.Exception
+ * for improperly formated cigar strings or ones with unknown
+ * operations
+ */
+ public static Object[] parseCigarString(String cigarString)
+ throws Exception
+ {
+ int ops = 0;
+ for (int i = 0, l = cigarString.length(); i < l; i++)
+ {
+ char c = cigarString.charAt(i);
+ if (c == M || c == (M - _case_shift) || c == I
+ || c == (I - _case_shift) || c == D || c == (D - _case_shift))
+ {
+ ops++;
+ }
+ }
+ char[] operation = new char[ops];
+ int[] range = new int[ops];
+ int op = 0;
+ int i = 0, l = cigarString.length();
+ while (i < l)
+ {
+ char c;
+ int j = i;
+ do
+ {
+ c = cigarString.charAt(j++);
+ } while (c >= '0' && c <= '9' && j < l);
+ if (j >= l && c >= '0' && c <= '9')
+ {
+ throw new Exception(MessageManager
+ .getString("exception.unterminated_cigar_string"));
+ }
+ try
+ {
+ String rangeint = cigarString.substring(i, j - 1);
+ range[op] = Integer.parseInt(rangeint);
+ i = j;
+ } catch (Exception e)
+ {
+ throw new Error(MessageManager
+ .getString("error.implementation_bug_parse_cigar_string"));
+ }
+ if (c >= 'a' && c <= 'z')
+ {
+ c -= _case_shift;
+ }
+ if ((c == M || c == I || c == D))
+ {
+ operation[op++] = c;
+ }
+ else
+ {
+ throw new Exception(MessageManager.formatMessage(
+ "exception.unexpected_operation_cigar_string_pos",
+ new String[]
+ { new StringBuffer(c).toString(),
+ Integer.valueOf(i).toString(), cigarString }));
+ }
+ }
+ return new Object[] { operation, range };
+ }
+
+ /**
+ * add an operation to cigar string
+ *
+ * @param op
+ * char
+ * @param range
+ * int
+ */
+ public void addOperation(char op, int range)
+ {
+ if (op >= 'a' && op <= 'z')
+ {
+ op -= _case_shift;
+ }
+ if (op != M && op != D && op != I)
+ {
+ throw new Error(MessageManager.getString(
+ "error.implementation_error_invalid_operation_string"));
+ }
+ if (range == 0)
+ {
+ return; // No Operation to add.
+ }
+ if (range < 0)
+ {
+ throw new Error(
+ MessageManager.getString("error.invalid_range_string"));
+ }
+ int lngth = 0;
+ if (operation == null)
+ {
+ this.operation = new char[_inc_length];
+ this.range = new int[_inc_length];
+ }
+ if (length + 1 == operation.length)
+ {
+ char[] ops = this.operation;
+ this.operation = new char[length + 1 + _inc_length];
+ System.arraycopy(ops, 0, this.operation, 0, length);
+ ops = null;
+ int[] rng = this.range;
+ this.range = new int[length + 1 + _inc_length];
+ System.arraycopy(rng, 0, this.range, 0, length);
+ rng = null;
+ }
+ if ((length > 0) && (operation[length - 1] == op))
+ {
+ length--; // modify existing operation.
+ }
+ else
+ {
+ this.range[length] = 0; // reset range
+ }
+ this.operation[length] = op;
+ this.range[length++] += range;
+ }
+
+ /**
+ * semi-efficient insert an operation on the current cigar string set at
+ * column pos (from 1) NOTE: Insertion operations simply extend width of cigar
+ * result - affecting registration of alignment Deletion ops will shorten
+ * length of result - and affect registration of alignment Match ops will also
+ * affect length of result - affecting registration of alignment (ie
+ * "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie
+ * "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie
+ * "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie
+ * "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms
+ * changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to
+ * M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts
+ * sequence left by 1 residue and extends it by 3 (
+ * "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") (
+ * "10D5M".insert(1,M,3)->"10D8M") ( "1M10D5M".insert(0,M,3)->"1M10D8M") (
+ * "1M10D5M".insert(1,M,3)->"
+ *
+ * if pos is beyond width - I operations are added before the operation
+ *
+ * @param pos
+ * int -1, 0-length of visible region, or greater to append new ops
+ * (with insertions in between)
+ * @param op
+ * char
+ * @param range
+ * int public void addOperationAt(int pos, char op, int range) { int
+ * cursor = -1; // mark the position for the current operation being
+ * edited. int o = 0; boolean last_d = false; // previous op was a
+ * deletion. if (pos < -1) throw new Error("pos<-1 is not
+ * supported."); while (o<length) { if (operation[o] != D) { if (
+ * (cursor + this.range[o]) < pos) { cursor += this.range[o]; o++;
+ * last_d=false; } else { break; } } else { last_d=true; o++; } } if
+ * (o==length) { // must insert more operations before pos if
+ * (pos-cursor>0) addInsertion(pos-cursor); // then just add the new
+ * operation. Regardless of what it is. addOperation(op, range); }
+ * else { int diff = pos - cursor;
+ *
+ * int e_length = length-o; // new edit operation array length. //
+ * diff<0 - can only happen before first insertion or match. -
+ * affects op and all following // dif==0 - only when at first
+ * position of existing op - // diff>0 - must preserve some existing
+ * operations int[] e_range = new int[e_length];
+ * System.arraycopy(this.range, o, e_range, 0, e_length); char[] e_op
+ * = new char[e_length]; System.arraycopy(this.operation, o, e_op, 0,
+ * e_length); length = o; // can now use add_operation to extend
+ * list. int e_o=0; // current operation being edited. switch (op) {
+ * case M: switch (e_op[e_o]) { case M: if (last_d && diff <= 0) { //
+ * reduce D's, if possible if (range<=this.range[o-1]) { this.range[o
+ * - 1] -= range; } else { this.range[o-1]=0; } if
+ * (this.range[o-1]==0) o--; // lose this op. } e_range[e_o] +=
+ * range; // just add more matched residues break; case I: // change
+ * from insertion to match if (last_d && diff<=0) { // reduce D's, if
+ * possible if (range<=this.range[o-1]) { this.range[o - 1] -= range;
+ * } else { this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose
+ * this op. } e_range[e_o] break; default: throw new Inp }
+ *
+ * break; case I: break; case D: } break; default: throw new
+ * Error("Implementation Error: Unknown operation in addOperation!");
+ * } // finally, add remaining ops. while (e_o<e_length) {
+ * addOperation(e_op[e_o], e_range[e_o]); e_o++; } } }
+ */
+ /**
+ * Mark residues from start to end (inclusive) as deleted from the alignment,
+ * and removes any insertions.
+ *
+ * @param start
+ * int
+ * @param end
+ * int
+ * @return deleted int - number of symbols marked as deleted
+ */
+ public int deleteRange(int start, int end)
+ {
+ int deleted = 0;
+ if (length == 0)
+ {
+ // nothing to do here
+ return deleted;
+ }
+ if (start < 0 || start > end)
+ {
+ throw new Error(MessageManager.getString(
+ "error.implementation_error_delete_range_out_of_bounds"));
+ }
+ // find beginning
+ int cursor = 0; // mark the position for the current operation being edited.
+ int rlength = 1 + end - start; // number of positions to delete
+ int oldlen = length;
+ int o = 0;
+ boolean editing = false;
+ char[] oldops = operation;
+ int[] oldrange = range;
+ length = 0;
+ operation = null;
+ range = null;
+ compact_operations();
+ while (o < oldlen && cursor <= end && rlength > 0)
+ {
+ if (oldops[o] == D)
+ {
+ // absorbed into new deleted region.
+ addDeleted(oldrange[o++]);
+ continue;
+ }
+
+ int remain = oldrange[o]; // number of op characters left to edit
+ if (!editing)
+ {
+ if ((cursor + remain) <= start)
+ {
+ addOperation(oldops[o], oldrange[o]);
+ cursor += oldrange[o++];
+ continue; // next operation
+ }
+ editing = true;
+ // add operations before hidden region
+ if (start - cursor > 0)
+ {
+ addOperation(oldops[o], start - cursor);
+ remain -= start - cursor;
+ }
+ }
+ // start inserting new ops
+ if (o < oldlen && editing && rlength > 0 && remain > 0)
+ {
+ switch (oldops[o])
+ {
+ case M:
+ if (rlength > remain)
+ {
+ addDeleted(remain);
+ deleted += remain;
+ }
+ else
+ {
+ deleted += rlength;
+ addDeleted(rlength);
+ if (remain - rlength > 0)
+ {
+ this.addOperation(M, remain - rlength); // add remaining back.
+ }
+ rlength = 0;
+ remain = 0;
+ }
+ break;
+ case I:
+ if (remain - rlength > 0)
+ {
+ // only remove some gaps
+ addInsertion(remain - rlength);
+ rlength = 0;
+ }
+ break;
+ case D:
+ throw new Error(
+ MessageManager.getString("error.implementation_error")); // do
+ // nothing;
+ default:
+ throw new Error(MessageManager.formatMessage(
+ "error.implementation_error_unknown_operation",
+ new String[]
+ { new StringBuffer(oldops[o]).toString() }));
+ }
+ rlength -= remain;
+ remain = oldrange[++o]; // number of op characters left to edit
+ }
+ }
+ // add remaining
+ while (o < oldlen)
+ {
+ addOperation(oldops[o], oldrange[o++]);
+ }
+ // if (cursor<(start+1)) {
+ // ran out of ops - nothing to do here ?
+ // addInsertion(start-cursor);
+ // }
+ return deleted;
+ }
+
+ /**
+ * Deleted regions mean that there will be discontinuous sequence numbering in
+ * the sequence returned by getSeq(char).
+ *
+ * @return true if there deletions
+ */
+ public boolean hasDeletedRegions()
+ {
+ for (int i = 0; i < length; i++)
+ {
+ if (operation[i] == D)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * enumerate the ranges on seq that are marked as deleted in this cigar
+ *
+ * @return int[] { vis_start, sym_start, length }
+ */
+ public int[] getDeletedRegions()
+ {
+ if (length == 0)
+ {
+ return null;
+ }
+ Vector dr = new Vector();
+ int cursor = 0, vcursor = 0;
+ for (int i = 0; i < length; i++)
+ {
+ switch (operation[i])
+ {
+ case M:
+ cursor += range[i];
+ break;
+ case I:
+ vcursor += range[i];
+ break;
+ case D:
+ dr.addElement(new int[] { vcursor, cursor, range[i] });
+ cursor += range[i];
+ }
+ }
+ if (dr.size() == 0)
+ {
+ return null;
+ }
+ int[] delregions = new int[dr.size() * 3];
+ for (int i = 0, l = dr.size(); i < l; i++)
+ {
+ int[] reg = (int[]) dr.elementAt(i);
+ delregions[i * 3] = reg[0];
+ delregions[i * 3 + 1] = reg[1];
+ delregions[i * 3 + 2] = reg[2];
+ }
+ return delregions;
+ }
+
+ /**
+ * sum of ranges in cigar string
+ *
+ * @return int number of residues hidden, matched, or gaps inserted into
+ * sequence
+ */
+ public int getFullWidth()
+ {
+ int w = 0;
+ if (range != null)
+ {
+ for (int i = 0; i < length; i++)
+ {
+ w += range[i];
+ }
+ }
+ return w;
+ }
+
+ /**
+ * Visible length of aligned sequence
+ *
+ * @return int length of including gaps and less hidden regions
+ */
+ public int getWidth()
+ {
+ int w = 0;
+ if (range != null)
+ {
+ for (int i = 0; i < length; i++)
+ {
+ if (operation[i] == M || operation[i] == I)
+ {
+ w += range[i];
+ }
+ }
+ }
+ return w;
+ }
+
+ /**
+ * mark a range of inserted residues
+ *
+ * @param range
+ * int
+ */
+ public void addInsertion(int range)
+ {
+ this.addOperation(I, range);
+ }
+
+ /**
+ * mark the next range residues as hidden (not aligned) or deleted
+ *
+ * @param range
+ * int
+ */
+ public void addDeleted(int range)
+ {
+ this.addOperation(D, range);
+ }
+
+ /**
+ * Modifies operation list to delete columns from start to end (inclusive)
+ * editing will remove insertion operations, and convert matches to deletions
+ *
+ * @param start
+ * alignment column
+ * @param end
+ * alignment column
+ * @return boolean true if residues were marked as deleted. public boolean
+ * deleteRange(int start, int end) { boolean deleted = false; int op =
+ * 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width =
+ * 0; // zero'th column if (length > 0) { // find operation bracketing
+ * start of the range do { if (operation[op] != D) { width +=
+ * range[prevop = op]; } op++; } while (op < length && width < start);
+ * } if (width < start) { // run off end - add more operations up to
+ * deletion. addInsertion(start - width); } else { // edit existing
+ * operations. op = prevop; width -= range[prevop]; int[] oldrange =
+ * range; char[] oldops = operation; range = new int[oldrange.length];
+ * operation = new char[oldops.length]; if (op < length) { do { if
+ * (operation[op] != D) { width += range[postop = op]; } op++; } while
+ * (op < length && width <= end); } } if (deleted == true) {
+ * addDeleted(end - start + 1); } return deleted; }
+ */
+ /**
+ * Return an ENSEMBL style cigar string where D may indicates excluded parts
+ * of seq
+ *
+ * @return String of form ([0-9]+[IMD])+
+ */
+ public String getCigarstring()
+ {
+ StringBuffer cigarString = new StringBuffer();
+ for (int i = 0; i < length; i++)
+ {
+ cigarString.append("" + range[i]);
+ cigarString.append(operation[i]);
+ }
+ return cigarString.toString();
+ }
+}