X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FCigarBase.java;h=b0efe6c1ae3ec0d0f1a267ef3dcb5e64a7355e39;hb=506d60f0e188723ddc91c26824b41ac7034df3fe;hp=d44d5993a881a4f546eaa3ac356dc76e2abcd8c2;hpb=60f2d6c034560415fd0139c8bc7df0c19cae1186;p=jalview.git diff --git a/src/jalview/datamodel/CigarBase.java b/src/jalview/datamodel/CigarBase.java index d44d599..b0efe6c 100644 --- a/src/jalview/datamodel/CigarBase.java +++ b/src/jalview/datamodel/CigarBase.java @@ -1,17 +1,17 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) + * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -23,9 +23,8 @@ import java.util.*; public abstract class CigarBase { /** - * Base class for compact idiosyncratic representation of gaps and aligned residues - * Regards to Tom Oldfield for his DynamicArray class. - * 17th July 2006 + * Base class for compact idiosyncratic representation of gaps and aligned + * residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006 * Not thread safe. */ public CigarBase() @@ -34,27 +33,44 @@ public abstract class CigarBase } protected int length = 0; - protected int _inc_length = 10; // extension range for addition of new operations + + protected int _inc_length = 10; // extension range for addition of new + // operations + protected char[] operation = null; + protected int[] range = null; + /** * Range of Hidden residues in seq (translated as deleted in seq) */ - public static final char D = 'D'; /** - * Range of insertions to seq - */ - public static final char I = 'I'; /** - * Range of aligned residues - */ - public static final char M = 'M'; + public static final char D = 'D'; + + /** + * Range of insertions to seq + */ + public static final char I = 'I'; + + /** + * Range of aligned residues + */ + public static final char M = 'M'; + static protected final char _case_shift = 'a' - 'A'; + /** - * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs - * May return null for an empty cigar string. - * May return null for deletion ranges if there are none. - * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence) - * @param GapChar - the symbol to use for Insert operations - * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence + * Ugly function to get edited sequence string, start and end symbol positions + * and the deletion regions as an array of int pairs May return null for an + * empty cigar string. May return null for deletion ranges if there are none. + * + * @param reference - + * the symbol sequence to apply the cigar operations to (or + * null if no sequence) + * @param GapChar - + * the symbol to use for Insert operations + * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] + * {start, end, col} or null} the gapped sequence, first and last + * residue index, and the deletion ranges on the reference sequence */ public Object[] getSequenceAndDeletions(String reference, char GapChar) { @@ -62,8 +78,7 @@ public abstract class CigarBase int[][] deletions = new int[length][]; int[][] trunc_deletions = null; StringBuffer sq = new StringBuffer(); - int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0, endpos = 0, - delcount = -1; + int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0, endpos = 0, delcount = -1; boolean consecutive_del = false; if (length == 0) { @@ -78,56 +93,55 @@ public abstract class CigarBase { switch (operation[i]) { - case D: - if (!consecutive_del) - { - deletions[++delcount] = new int[] - { - cursor, 0, alcursor}; - } - cursor += range[i]; - deletions[delcount][1] = cursor - 1; - consecutive_del = true; - break; - case I: - consecutive_del = false; - for (int r = 0; r < range[i]; r++) - { - sq.append(GapChar); - alcursor++; - } - break; - case M: - consecutive_del = false; - if (modstart) - { - start = cursor; - startpos = alcursor; - modstart = false; - } - if (reference != null) + case D: + if (!consecutive_del) + { + deletions[++delcount] = new int[] + { cursor, 0, alcursor }; + } + cursor += range[i]; + deletions[delcount][1] = cursor - 1; + consecutive_del = true; + break; + case I: + consecutive_del = false; + for (int r = 0; r < range[i]; r++) + { + sq.append(GapChar); + alcursor++; + } + break; + case M: + consecutive_del = false; + if (modstart) + { + start = cursor; + startpos = alcursor; + modstart = false; + } + if (reference != null) + { + int sbend = cursor + range[i]; + if (sbend > rlength) { - int sbend = cursor + range[i]; - if (sbend > rlength) + sq.append(reference.substring(cursor, rlength)); + while (sbend-- >= rlength) { - sq.append(reference.substring(cursor, rlength)); - while (sbend-- >= rlength) - { - sq.append(GapChar); - } - } - else - { - sq.append(reference.substring(cursor, sbend)); + sq.append(GapChar); } } - alcursor += range[i]; - cursor += range[i]; - end = cursor - 1; - endpos = alcursor; - break; - default: - throw new Error("Unknown SeqCigar operation '" + operation[i] + "'"); + else + { + sq.append(reference.substring(cursor, sbend)); + } + } + alcursor += range[i]; + cursor += range[i]; + end = cursor - 1; + endpos = alcursor; + break; + default: + throw new Error("Unknown SeqCigar operation '" + operation[i] + "'"); } } if (++delcount > 0) @@ -137,11 +151,8 @@ public abstract class CigarBase } deletions = null; return new Object[] - { - ( (reference != null) ? sq.toString() : null), - new int[] - { - start, startpos, end, endpos}, trunc_deletions}; + { ((reference != null) ? sq.toString() : null), new int[] + { start, startpos, end, endpos }, trunc_deletions }; } protected void compact_operations() @@ -174,19 +185,23 @@ public abstract class CigarBase /** * turn a cigar string into a series of operation range pairs - * @param cigarString String + * + * @param cigarString + * String * @return object[] {char[] operation, int[] range} - * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations + * @throws java.lang.Exception + * for improperly formated cigar strings or ones with unknown + * operations */ public static Object[] parseCigarString(String cigarString) - throws Exception + throws Exception { int ops = 0; for (int i = 0, l = cigarString.length(); i < l; i++) { char c = cigarString.charAt(i); - if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) || - c == D || c == (D - _case_shift)) + if (c == M || c == (M - _case_shift) || c == I + || c == (I - _case_shift) || c == D || c == (D - _case_shift)) { ops++; } @@ -202,8 +217,7 @@ public abstract class CigarBase do { c = cigarString.charAt(j++); - } - while (c >= '0' && c <= '9' && j < l); + } while (c >= '0' && c <= '9' && j < l); if (j >= l && c >= '0' && c <= '9') { throw new Exception("Unterminated cigar string."); @@ -213,8 +227,7 @@ public abstract class CigarBase String rangeint = cigarString.substring(i, j - 1); range[op] = Integer.parseInt(rangeint); i = j; - } - catch (Exception e) + } catch (Exception e) { throw new Error("Implementation bug in parseCigarString"); } @@ -222,26 +235,28 @@ public abstract class CigarBase { c -= _case_shift; } - if ( (c == M || c == I || c == D)) + if ((c == M || c == I || c == D)) { operation[op++] = c; } else { - throw new Exception("Unexpected operation '" + c + - "' in cigar string (position " + i + " in '" + - cigarString + "'"); + throw new Exception("Unexpected operation '" + c + + "' in cigar string (position " + i + " in '" + + cigarString + "'"); } } return new Object[] - { - operation, range}; + { operation, range }; } /** * add an operation to cigar string - * @param op char - * @param range int + * + * @param op + * char + * @param range + * int */ public void addOperation(char op, int range) { @@ -255,7 +270,8 @@ public abstract class CigarBase } if (range <= 0) { - throw new Error("Invalid range string (must be non-zero positive number)"); + throw new Error( + "Invalid range string (must be non-zero positive number)"); } int lngth = 0; if (operation == null) @@ -274,7 +290,7 @@ public abstract class CigarBase System.arraycopy(rng, 0, this.range, 0, length); rng = null; } - if ( (length > 0) && (operation[length - 1] == op)) + if ((length > 0) && (operation[length - 1] == op)) { length--; // modify existing operation. } @@ -287,130 +303,73 @@ public abstract class CigarBase } /** - * semi-efficient insert an operation on the current cigar string set at column pos (from 1) - * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment - * Deletion ops will shorten length of result - and affect registration of alignment - * Match ops will also affect length of result - affecting registration of alignment - * (ie "10M".insert(4,I,3)->"4M3I3M") - (replace?) - * (ie "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) - * (ie "5I5M".insert(4,I,3)->"8I5M") - real insertion - * (ie "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms changed to Ds - * (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to M, Ds changed to M. - * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3 - * ( "10D5M".insert(-1,M,3)->"3M7D5M") - * ( "10D5M".insert(0,M,3)->"7D8M") - * ( "10D5M".insert(1,M,3)->"10D8M") - * - * ( "1M10D5M".insert(0,M,3)->"1M10D8M") - * ( "1M10D5M".insert(1,M,3)->" - * + * semi-efficient insert an operation on the current cigar string set at + * column pos (from 1) NOTE: Insertion operations simply extend width of cigar + * result - affecting registration of alignment Deletion ops will shorten + * length of result - and affect registration of alignment Match ops will also + * affect length of result - affecting registration of alignment (ie + * "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie + * "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie + * "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie + * "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms + * changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to + * M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts + * sequence left by 1 residue and extends it by 3 ( + * "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") ( + * "10D5M".insert(1,M,3)->"10D8M") + * ( "1M10D5M".insert(0,M,3)->"1M10D8M") ( "1M10D5M".insert(1,M,3)->" + * * if pos is beyond width - I operations are added before the operation - * @param pos int -1, 0-length of visible region, or greater to append new ops (with insertions in between) - * @param op char - * @param range int - public void addOperationAt(int pos, char op, int range) - { - int cursor = -1; // mark the position for the current operation being edited. - int o = 0; - boolean last_d = false; // previous op was a deletion. - if (pos < -1) - throw new Error("pos<-1 is not supported."); - while (o0) - addInsertion(pos-cursor); - // then just add the new operation. Regardless of what it is. - addOperation(op, range); - } else { - int diff = pos - cursor; - - int e_length = length-o; // new edit operation array length. - // diff<0 - can only happen before first insertion or match. - affects op and all following - // dif==0 - only when at first position of existing op - - // diff>0 - must preserve some existing operations - int[] e_range = new int[e_length]; - System.arraycopy(this.range, o, e_range, 0, e_length); - char[] e_op = new char[e_length]; - System.arraycopy(this.operation, o, e_op, 0, e_length); - length = o; // can now use add_operation to extend list. - int e_o=0; // current operation being edited. - switch (op) { - case M: - switch (e_op[e_o]) - { - case M: - if (last_d && diff <= 0) - { - // reduce D's, if possible - if (range<=this.range[o-1]) { - this.range[o - 1] -= range; - } else { - this.range[o-1]=0; - } - if (this.range[o-1]==0) - o--; // lose this op. - } - e_range[e_o] += range; // just add more matched residues - break; - case I: - // change from insertion to match - if (last_d && diff<=0) - { - // reduce D's, if possible - if (range<=this.range[o-1]) { - this.range[o - 1] -= range; - } else { - this.range[o-1]=0; - } - if (this.range[o-1]==0) - o--; // lose this op. - } - e_range[e_o] - break; - default: - throw new Inp - } - - break; - case I: - break; - case D: - } - break; - default: - throw new Error("Implementation Error: Unknown operation in addOperation!"); - } - // finally, add remaining ops. - while (e_o0) + * addInsertion(pos-cursor); // then just add the new + * operation. Regardless of what it is. addOperation(op, + * range); } else { int diff = pos - cursor; + * + * int e_length = length-o; // new edit operation array length. // diff<0 - + * can only happen before first insertion or match. - affects op and all + * following // dif==0 - only when at first position of existing op - // + * diff>0 - must preserve some existing operations int[] e_range = new + * int[e_length]; System.arraycopy(this.range, o, e_range, 0, e_length); + * char[] e_op = new char[e_length]; System.arraycopy(this.operation, o, e_op, + * 0, e_length); length = o; // can now use add_operation to extend list. int + * e_o=0; // current operation being edited. switch (op) { case M: switch + * (e_op[e_o]) { case M: if (last_d && diff <= 0) { // reduce D's, if possible + * if (range<=this.range[o-1]) { this.range[o - 1] -= range; } else { + * this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose this op. } + * e_range[e_o] += range; // just add more matched residues break; case I: // + * change from insertion to match if (last_d && diff<=0) { // reduce D's, if + * possible if (range<=this.range[o-1]) { this.range[o - 1] -= range; } else { + * this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose this op. } + * e_range[e_o] break; default: throw new Inp } + * + * break; case I: break; case D: } break; default: throw new + * Error("Implementation Error: Unknown operation in addOperation!"); } // + * finally, add remaining ops. while (e_o end) { - throw new Error("Implementation Error: deleteRange out of bounds: start must be non-negative and less than end."); + throw new Error( + "Implementation Error: deleteRange out of bounds: start must be non-negative and less than end."); } // find beginning int cursor = 0; // mark the position for the current operation being edited. @@ -449,7 +409,7 @@ public abstract class CigarBase int remain = oldrange[o]; // number of op characters left to edit if (!editing) { - if ( (cursor + remain) <= start) + if ((cursor + remain) <= start) { addOperation(oldops[o], oldrange[o]); cursor += oldrange[o++]; @@ -468,37 +428,37 @@ public abstract class CigarBase { switch (oldops[o]) { - case M: - if (rlength > remain) - { - addDeleted(remain); - deleted += remain; - } - else - { - deleted += rlength; - addDeleted(rlength); - if (remain - rlength > 0) - { - this.addOperation(M, remain - rlength); // add remaining back. - } - rlength = 0; - remain = 0; - } - break; - case I: + case M: + if (rlength > remain) + { + addDeleted(remain); + deleted += remain; + } + else + { + deleted += rlength; + addDeleted(rlength); if (remain - rlength > 0) { - // only remove some gaps - addInsertion(remain - rlength); - rlength = 0; + this.addOperation(M, remain - rlength); // add remaining back. } - break; - case D: - throw new Error("Implementation error."); // do nothing; - default: - throw new Error("Implementation Error! Unknown operation '" + - oldops[o] + "'"); + rlength = 0; + remain = 0; + } + break; + case I: + if (remain - rlength > 0) + { + // only remove some gaps + addInsertion(remain - rlength); + rlength = 0; + } + break; + case D: + throw new Error("Implementation error."); // do nothing; + default: + throw new Error("Implementation Error! Unknown operation '" + + oldops[o] + "'"); } rlength -= remain; remain = oldrange[++o]; // number of op characters left to edit @@ -509,16 +469,17 @@ public abstract class CigarBase { addOperation(oldops[o], oldrange[o++]); } - //if (cursor<(start+1)) { + // if (cursor<(start+1)) { // ran out of ops - nothing to do here ? // addInsertion(start-cursor); - //} + // } return deleted; } /** - * Deleted regions mean that there will be discontinuous sequence numbering in the - * sequence returned by getSeq(char). + * Deleted regions mean that there will be discontinuous sequence numbering in + * the sequence returned by getSeq(char). + * * @return true if there deletions */ public boolean hasDeletedRegions() @@ -535,6 +496,7 @@ public abstract class CigarBase /** * enumerate the ranges on seq that are marked as deleted in this cigar + * * @return int[] { vis_start, sym_start, length } */ public int[] getDeletedRegions() @@ -549,15 +511,15 @@ public abstract class CigarBase { switch (operation[i]) { - case M: - cursor += range[i]; - case I: - vcursor += range[i]; - break; - case D: - dr.addElement(new int[] - {vcursor, cursor, range[i]}); - cursor += range[i]; + case M: + cursor += range[i]; + case I: + vcursor += range[i]; + break; + case D: + dr.addElement(new int[] + { vcursor, cursor, range[i] }); + cursor += range[i]; } } if (dr.size() == 0) @@ -577,7 +539,9 @@ public abstract class CigarBase /** * sum of ranges in cigar string - * @return int number of residues hidden, matched, or gaps inserted into sequence + * + * @return int number of residues hidden, matched, or gaps inserted into + * sequence */ public int getFullWidth() { @@ -594,6 +558,7 @@ public abstract class CigarBase /** * Visible length of aligned sequence + * * @return int length of including gaps and less hidden regions */ public int getWidth() @@ -614,7 +579,9 @@ public abstract class CigarBase /** * mark a range of inserted residues - * @param range int + * + * @param range + * int */ public void addInsertion(int range) { @@ -623,7 +590,9 @@ public abstract class CigarBase /** * mark the next range residues as hidden (not aligned) or deleted - * @param range int + * + * @param range + * int */ public void addDeleted(int range) { @@ -633,64 +602,30 @@ public abstract class CigarBase /** * Modifies operation list to delete columns from start to end (inclusive) * editing will remove insertion operations, and convert matches to deletions - * @param start alignment column - * @param end alignment column - * @return boolean true if residues were marked as deleted. - public boolean deleteRange(int start, int end) - { - boolean deleted = false; - int op = 0, prevop = -1, firstm = -1, - lastm = -1, postop = -1; - int width = 0; // zero'th column - if (length > 0) - { - // find operation bracketing start of the range - do - { - if (operation[op] != D) - { - width += range[prevop = op]; - } - op++; - } - while (op < length && width < start); - } - if (width < start) - { - // run off end - add more operations up to deletion. - addInsertion(start - width); - } - else - { - // edit existing operations. - op = prevop; - width -= range[prevop]; - int[] oldrange = range; - char[] oldops = operation; - range = new int[oldrange.length]; - operation = new char[oldops.length]; - if (op < length) - { - do - { - if (operation[op] != D) - { - width += range[postop = op]; - } - op++; - } - while (op < length && width <= end); - } - } - if (deleted == true) - { - addDeleted(end - start + 1); - } - return deleted; - } + * + * @param start + * alignment column + * @param end + * alignment column + * @return boolean true if residues were marked as deleted. public boolean + * deleteRange(int start, int end) { boolean deleted = false; int op = + * 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width = + * 0; // zero'th column if (length > 0) { // find operation bracketing + * start of the range do { if (operation[op] != D) { width += + * range[prevop = op]; } op++; } while (op < length && width < start); } + * if (width < start) { // run off end - add more operations up to + * deletion. addInsertion(start - width); } else { // edit existing + * operations. op = prevop; width -= range[prevop]; int[] oldrange = + * range; char[] oldops = operation; range = new int[oldrange.length]; + * operation = new char[oldops.length]; if (op < length) { do { if + * (operation[op] != D) { width += range[postop = op]; } op++; } while + * (op < length && width <= end); } } if (deleted == true) { + * addDeleted(end - start + 1); } return deleted; } */ /** - * Return an ENSEMBL style cigar string where D may indicates excluded parts of seq + * Return an ENSEMBL style cigar string where D may indicates excluded parts + * of seq + * * @return String of form ([0-9]+[IMD])+ */ public String getCigarstring()