X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fdatamodel%2FCigarBase.java;h=0dfa84d2d177767ec994b795b09e3ef2a246a2f6;hb=17e77c3f2949a0729322b4a8d907f3f34b6a9914;hp=1e0705acc43dda0e01fcccac8c30ead4a9bdd13e;hpb=108d9a136d1794445f2169c04d80b7662085390d;p=jalview.git
diff --git a/src/jalview/datamodel/CigarBase.java b/src/jalview/datamodel/CigarBase.java
index 1e0705a..0dfa84d 100644
--- a/src/jalview/datamodel/CigarBase.java
+++ b/src/jalview/datamodel/CigarBase.java
@@ -1,13 +1,34 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9)
+ * Copyright (C) 2015 The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see .
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.datamodel;
+import jalview.util.MessageManager;
+
import java.util.Vector;
public abstract class CigarBase
{
/**
- * Base class for compact idiosyncratic representation of gaps and aligned residues
- * Regards to Tom Oldfield for his DynamicArray class.
- * 17th July 2006
+ * Base class for compact idiosyncratic representation of gaps and aligned
+ * residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006
* Not thread safe.
*/
public CigarBase()
@@ -16,93 +37,117 @@ public abstract class CigarBase
}
protected int length = 0;
- protected int _inc_length = 10; // extension range for addition of new operations
+
+ protected int _inc_length = 10; // extension range for addition of new
+
+ // operations
+
protected char[] operation = null;
+
protected int[] range = null;
+
/**
* Range of Hidden residues in seq (translated as deleted in seq)
*/
- public static final char D = 'D'; /**
- * Range of insertions to seq
- */
- public static final char I = 'I'; /**
- * Range of aligned residues
- */
- public static final char M = 'M';
+ public static final char D = 'D';
+
+ /**
+ * Range of insertions to seq
+ */
+ public static final char I = 'I';
+
+ /**
+ * Range of aligned residues
+ */
+ public static final char M = 'M';
+
static protected final char _case_shift = 'a' - 'A';
+
/**
- * Ugly function to get edited sequence string, start and end symbol positions and the deletion regions as an array of int pairs
- * May return null for an empty cigar string.
- * May return null for deletion ranges if there are none.
- * @param reference - the symbol sequence to apply the cigar operations to (or null if no sequence)
- * @param GapChar - the symbol to use for Insert operations
- * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3] {start, end, col} or null} the gapped sequence, first and last residue index, and the deletion ranges on the reference sequence
+ * Ugly function to get edited sequence string, start and end symbol positions
+ * and the deletion regions as an array of int pairs May return null for an
+ * empty cigar string. May return null for deletion ranges if there are none.
+ *
+ * @param reference
+ * - the symbol sequence to apply the cigar operations to (or null if
+ * no sequence)
+ * @param GapChar
+ * - the symbol to use for Insert operations
+ * @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3]
+ * {start, end, col} or null} the gapped sequence, first and last
+ * residue index, and the deletion ranges on the reference sequence
*/
public Object[] getSequenceAndDeletions(String reference, char GapChar)
{
- int rlength=0;
+ int rlength = 0;
int[][] deletions = new int[length][];
int[][] trunc_deletions = null;
StringBuffer sq = new StringBuffer();
- int cursor = 0, alcursor=0,start = 0, startpos=0, end = 0, endpos=0, delcount = -1;
+ int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0, endpos = 0, delcount = -1;
boolean consecutive_del = false;
if (length == 0)
{
return null;
}
- if (reference!=null)
- rlength=reference.length();
+ if (reference != null)
+ {
+ rlength = reference.length();
+ }
boolean modstart = true;
for (int i = 0; i < length; i++)
{
switch (operation[i])
{
- case D:
- if (!consecutive_del)
- {
- deletions[++delcount] = new int[]
- {
- cursor, 0, alcursor};
- }
- cursor += range[i];
- deletions[delcount][1] = cursor - 1;
- consecutive_del = true;
- break;
- case I:
- consecutive_del = false;
- for (int r = 0; r < range[i]; r++)
+ case D:
+ if (!consecutive_del)
+ {
+ deletions[++delcount] = new int[] { cursor, 0, alcursor };
+ }
+ cursor += range[i];
+ deletions[delcount][1] = cursor - 1;
+ consecutive_del = true;
+ break;
+ case I:
+ consecutive_del = false;
+ for (int r = 0; r < range[i]; r++)
+ {
+ sq.append(GapChar);
+ alcursor++;
+ }
+ break;
+ case M:
+ consecutive_del = false;
+ if (modstart)
+ {
+ start = cursor;
+ startpos = alcursor;
+ modstart = false;
+ }
+ if (reference != null)
+ {
+ int sbend = cursor + range[i];
+ if (sbend > rlength)
{
- sq.append(GapChar);
- alcursor++;
+ sq.append(reference.substring(cursor, rlength));
+ while (sbend-- >= rlength)
+ {
+ sq.append(GapChar);
+ }
}
- break;
- case M:
- consecutive_del = false;
- if (modstart)
+ else
{
- start = cursor;
- startpos=alcursor;
- modstart = false;
+ sq.append(reference.substring(cursor, sbend));
}
- if (reference!=null) {
- int sbend = cursor+range[i];
- if (sbend>rlength) {
- sq.append(reference.substring(cursor, rlength));
- while (sbend-- >= rlength)
- {
- sq.append(GapChar);
- }
- } else {
- sq.append(reference.substring(cursor, sbend));
- }
- }
- alcursor+=range[i];
- cursor += range[i];
- end = cursor - 1;
- endpos = alcursor;
- break;
- default:
- throw new Error("Unknown SeqCigar operation '" + operation[i] + "'");
+ }
+ alcursor += range[i];
+ cursor += range[i];
+ end = cursor - 1;
+ endpos = alcursor;
+ break;
+ default:
+ throw new Error(MessageManager.formatMessage(
+ "error.unknown_seq_cigar_operation",
+ new String[] { new StringBuffer(operation[i]).toString() }));
}
}
if (++delcount > 0)
@@ -111,46 +156,57 @@ public abstract class CigarBase
System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
}
deletions = null;
- return new Object[]
- {
- ((reference!=null) ? sq.toString() : null),
- new int[] {
- start, startpos, end, endpos}, trunc_deletions};
+ return new Object[] { ((reference != null) ? sq.toString() : null),
+ new int[] { start, startpos, end, endpos }, trunc_deletions };
}
- protected void compact_operations() {
- int i=1;
- if (operation==null)
+
+ protected void compact_operations()
+ {
+ int i = 1;
+ if (operation == null)
+ {
return;
+ }
char last = operation[0];
- while (i0) {
+ while (i < length)
+ {
+ if (last == operation[i])
+ {
+ range[i - 1] += range[i];
+ int r = length - i;
+ if (r > 0)
+ {
System.arraycopy(range, i + 1, range, i, r);
System.arraycopy(operation, i + 1, operation, i, r);
}
length--;
- } else {
+ }
+ else
+ {
last = operation[i++];
}
}
}
+
/**
* turn a cigar string into a series of operation range pairs
- * @param cigarString String
+ *
+ * @param cigarString
+ * String
* @return object[] {char[] operation, int[] range}
- * @throws java.lang.Exception for improperly formated cigar strings or ones with unknown operations
+ * @throws java.lang.Exception
+ * for improperly formated cigar strings or ones with unknown
+ * operations
*/
public static Object[] parseCigarString(String cigarString)
- throws Exception
+ throws Exception
{
int ops = 0;
for (int i = 0, l = cigarString.length(); i < l; i++)
{
char c = cigarString.charAt(i);
- if (c == M || c == (M - _case_shift) || c == I || c == (I - _case_shift) ||
- c == D || c == (D - _case_shift))
+ if (c == M || c == (M - _case_shift) || c == I
+ || c == (I - _case_shift) || c == D || c == (D - _case_shift))
{
ops++;
}
@@ -166,46 +222,50 @@ public abstract class CigarBase
do
{
c = cigarString.charAt(j++);
- }
- while (c >= '0' && c <= '9' && j < l);
+ } while (c >= '0' && c <= '9' && j < l);
if (j >= l && c >= '0' && c <= '9')
{
- throw new Exception("Unterminated cigar string.");
+ throw new Exception(
+ MessageManager
+ .getString("exception.unterminated_cigar_string"));
}
try
{
String rangeint = cigarString.substring(i, j - 1);
range[op] = Integer.parseInt(rangeint);
i = j;
- }
- catch (Exception e)
+ } catch (Exception e)
{
- throw new Error("Implementation bug in parseCigarString");
+ throw new Error(
+ MessageManager
+ .getString("error.implementation_bug_parse_cigar_string"));
}
if (c >= 'a' && c <= 'z')
{
c -= _case_shift;
}
- if ( (c == M || c == I || c == D))
+ if ((c == M || c == I || c == D))
{
operation[op++] = c;
}
else
{
- throw new Exception("Unexpected operation '" + c +
- "' in cigar string (position " + i + " in '" +
- cigarString + "'");
+ throw new Exception(MessageManager.formatMessage(
+ "exception.unexpected_operation_cigar_string_pos",
+ new String[] { new StringBuffer(c).toString(),
+ Integer.valueOf(i).toString(), cigarString }));
}
}
- return new Object[]
- {
- operation, range};
+ return new Object[] { operation, range };
}
/**
* add an operation to cigar string
- * @param op char
- * @param range int
+ *
+ * @param op
+ * char
+ * @param range
+ * int
*/
public void addOperation(char op, int range)
{
@@ -215,10 +275,19 @@ public abstract class CigarBase
}
if (op != M && op != D && op != I)
{
- throw new Error("Implementation error. Invalid operation string.");
+ throw new Error(
+ MessageManager
+ .getString("error.implementation_error_invalid_operation_string"));
+ }
+ if (range == 0)
+ {
+ return; // No Operation to add.
+ }
+ if (range < 0)
+ {
+ throw new Error(
+ MessageManager.getString("error.invalid_range_string"));
}
- if (range<=0)
- throw new Error("Invalid range string (must be non-zero positive number)");
int lngth = 0;
if (operation == null)
{
@@ -236,234 +305,206 @@ public abstract class CigarBase
System.arraycopy(rng, 0, this.range, 0, length);
rng = null;
}
- if ((length>0) && (operation[length-1]==op))
+ if ((length > 0) && (operation[length - 1] == op))
+ {
length--; // modify existing operation.
+ }
else
- this.range[length]=0; // reset range
+ {
+ this.range[length] = 0; // reset range
+ }
this.operation[length] = op;
this.range[length++] += range;
}
/**
- * semi-efficient insert an operation on the current cigar string set at column pos (from 1)
- * NOTE: Insertion operations simply extend width of cigar result - affecting registration of alignment
- * Deletion ops will shorten length of result - and affect registration of alignment
- * Match ops will also affect length of result - affecting registration of alignment
- * (ie "10M".insert(4,I,3)->"4M3I3M") - (replace?)
- * (ie "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment)
- * (ie "5I5M".insert(4,I,3)->"8I5M") - real insertion
- * (ie "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms changed to Ds
- * (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to M, Ds changed to M.
- * (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts sequence left by 1 residue and extends it by 3
- * ( "10D5M".insert(-1,M,3)->"3M7D5M")
- * ( "10D5M".insert(0,M,3)->"7D8M")
- * ( "10D5M".insert(1,M,3)->"10D8M")
- *
- * ( "1M10D5M".insert(0,M,3)->"1M10D8M")
- * ( "1M10D5M".insert(1,M,3)->"
- *
+ * semi-efficient insert an operation on the current cigar string set at
+ * column pos (from 1) NOTE: Insertion operations simply extend width of cigar
+ * result - affecting registration of alignment Deletion ops will shorten
+ * length of result - and affect registration of alignment Match ops will also
+ * affect length of result - affecting registration of alignment (ie
+ * "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie
+ * "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie
+ * "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie
+ * "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms
+ * changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to
+ * M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts
+ * sequence left by 1 residue and extends it by 3 (
+ * "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") (
+ * "10D5M".insert(1,M,3)->"10D8M") ( "1M10D5M".insert(0,M,3)->"1M10D8M") (
+ * "1M10D5M".insert(1,M,3)->"
+ *
* if pos is beyond width - I operations are added before the operation
- * @param pos int -1, 0-length of visible region, or greater to append new ops (with insertions in between)
- * @param op char
- * @param range int
- public void addOperationAt(int pos, char op, int range)
- {
- int cursor = -1; // mark the position for the current operation being edited.
- int o = 0;
- boolean last_d = false; // previous op was a deletion.
- if (pos < -1)
- throw new Error("pos<-1 is not supported.");
- while (o0)
- addInsertion(pos-cursor);
- // then just add the new operation. Regardless of what it is.
- addOperation(op, range);
- } else {
- int diff = pos - cursor;
-
- int e_length = length-o; // new edit operation array length.
- // diff<0 - can only happen before first insertion or match. - affects op and all following
- // dif==0 - only when at first position of existing op -
- // diff>0 - must preserve some existing operations
- int[] e_range = new int[e_length];
- System.arraycopy(this.range, o, e_range, 0, e_length);
- char[] e_op = new char[e_length];
- System.arraycopy(this.operation, o, e_op, 0, e_length);
- length = o; // can now use add_operation to extend list.
- int e_o=0; // current operation being edited.
- switch (op) {
- case M:
- switch (e_op[e_o])
- {
- case M:
- if (last_d && diff <= 0)
- {
- // reduce D's, if possible
- if (range<=this.range[o-1]) {
- this.range[o - 1] -= range;
- } else {
- this.range[o-1]=0;
- }
- if (this.range[o-1]==0)
- o--; // lose this op.
- }
- e_range[e_o] += range; // just add more matched residues
- break;
- case I:
- // change from insertion to match
- if (last_d && diff<=0)
- {
- // reduce D's, if possible
- if (range<=this.range[o-1]) {
- this.range[o - 1] -= range;
- } else {
- this.range[o-1]=0;
- }
- if (this.range[o-1]==0)
- o--; // lose this op.
- }
- e_range[e_o]
- break;
- default:
- throw new Inp
- }
-
- break;
- case I:
- break;
- case D:
- }
- break;
- default:
- throw new Error("Implementation Error: Unknown operation in addOperation!");
- }
- // finally, add remaining ops.
- while (e_o0) addInsertion(pos-cursor);
+ * // then just add the new operation. Regardless of what it is.
+ * addOperation(op, range); } else { int diff = pos - cursor;
+ *
+ * int e_length = length-o; // new edit operation array length. //
+ * diff<0 - can only happen before first insertion or match. -
+ * affects op and all following // dif==0 - only when at first
+ * position of existing op - // diff>0 - must preserve some existing
+ * operations int[] e_range = new int[e_length];
+ * System.arraycopy(this.range, o, e_range, 0, e_length); char[] e_op
+ * = new char[e_length]; System.arraycopy(this.operation, o, e_op, 0,
+ * e_length); length = o; // can now use add_operation to extend
+ * list. int e_o=0; // current operation being edited. switch (op) {
+ * case M: switch (e_op[e_o]) { case M: if (last_d && diff <= 0) { //
+ * reduce D's, if possible if (range<=this.range[o-1]) { this.range[o
+ * - 1] -= range; } else { this.range[o-1]=0; } if
+ * (this.range[o-1]==0) o--; // lose this op. } e_range[e_o] +=
+ * range; // just add more matched residues break; case I: // change
+ * from insertion to match if (last_d && diff<=0) { // reduce D's, if
+ * possible if (range<=this.range[o-1]) { this.range[o - 1] -= range;
+ * } else { this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose
+ * this op. } e_range[e_o] break; default: throw new Inp }
+ *
+ * break; case I: break; case D: } break; default: throw new
+ * Error("Implementation Error: Unknown operation in addOperation!");
+ * } // finally, add remaining ops. while (e_oend)
- throw new Error("Implementation Error: deleteRange out of bounds: start must be non-negative and less than end.");
+ if (start < 0 || start > end)
+ {
+ throw new Error(
+ MessageManager
+ .getString("error.implementation_error_delete_range_out_of_bounds"));
+ }
// find beginning
int cursor = 0; // mark the position for the current operation being edited.
- int rlength=1+end-start; // number of positions to delete
- int oldlen=length;
+ int rlength = 1 + end - start; // number of positions to delete
+ int oldlen = length;
int o = 0;
- boolean editing=false;
+ boolean editing = false;
char[] oldops = operation;
int[] oldrange = range;
- length=0;
+ length = 0;
operation = null;
range = null;
compact_operations();
- while (o0) {
- if (oldops[o] == D) {
+ while (o < oldlen && cursor <= end && rlength > 0)
+ {
+ if (oldops[o] == D)
+ {
// absorbed into new deleted region.
addDeleted(oldrange[o++]);
continue;
}
int remain = oldrange[o]; // number of op characters left to edit
- if (!editing) {
- if ( (cursor + remain) <= start)
+ if (!editing)
+ {
+ if ((cursor + remain) <= start)
{
- addOperation(oldops[o],oldrange[o]);
- cursor+=oldrange[o++];
+ addOperation(oldops[o], oldrange[o]);
+ cursor += oldrange[o++];
continue; // next operation
}
- editing=true;
+ editing = true;
// add operations before hidden region
- if (start-cursor>0) {
- addOperation(oldops[o], start- cursor);
+ if (start - cursor > 0)
+ {
+ addOperation(oldops[o], start - cursor);
remain -= start - cursor;
}
}
// start inserting new ops
- if (o0 && remain>0) {
- switch (oldops[o]) {
- case M:
- if (rlength>remain) {
- addDeleted(remain);
- deleted+=remain;
- } else {
- deleted+=rlength;
- addDeleted(rlength);
- if (remain-rlength>0)
- this.addOperation(M,remain-rlength); // add remaining back.
- rlength=0;
- remain=0;
- }
- break;
- case I:
- if (remain-rlength>0) {
- // only remove some gaps
- addInsertion(remain-rlength);
- rlength=0;
+ if (o < oldlen && editing && rlength > 0 && remain > 0)
+ {
+ switch (oldops[o])
+ {
+ case M:
+ if (rlength > remain)
+ {
+ addDeleted(remain);
+ deleted += remain;
+ }
+ else
+ {
+ deleted += rlength;
+ addDeleted(rlength);
+ if (remain - rlength > 0)
+ {
+ this.addOperation(M, remain - rlength); // add remaining back.
}
- break;
- case D:
- throw new Error("Implementation error."); // do nothing;
- default:
- throw new Error("Implementation Error! Unknown operation '"+oldops[o]+"'");
+ rlength = 0;
+ remain = 0;
+ }
+ break;
+ case I:
+ if (remain - rlength > 0)
+ {
+ // only remove some gaps
+ addInsertion(remain - rlength);
+ rlength = 0;
+ }
+ break;
+ case D:
+ throw new Error(
+ MessageManager.getString("error.implementation_error")); // do
+ // nothing;
+ default:
+ throw new Error(MessageManager.formatMessage(
+ "error.implementation_error_unknown_operation",
+ new String[] { new StringBuffer(oldops[o]).toString() }));
}
- rlength-=remain;
+ rlength -= remain;
remain = oldrange[++o]; // number of op characters left to edit
}
}
// add remaining
- while (o 0)
- {
- // find operation bracketing start of the range
- do
- {
- if (operation[op] != D)
- {
- width += range[prevop = op];
- }
- op++;
- }
- while (op < length && width < start);
- }
- if (width < start)
- {
- // run off end - add more operations up to deletion.
- addInsertion(start - width);
- }
- else
- {
- // edit existing operations.
- op = prevop;
- width -= range[prevop];
- int[] oldrange = range;
- char[] oldops = operation;
- range = new int[oldrange.length];
- operation = new char[oldops.length];
- if (op < length)
- {
- do
- {
- if (operation[op] != D)
- {
- width += range[postop = op];
- }
- op++;
- }
- while (op < length && width <= end);
- }
- }
- if (deleted == true)
- {
- addDeleted(end - start + 1);
- }
- return deleted;
- }
-*/
+ *
+ * @param start
+ * alignment column
+ * @param end
+ * alignment column
+ * @return boolean true if residues were marked as deleted. public boolean
+ * deleteRange(int start, int end) { boolean deleted = false; int op =
+ * 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width =
+ * 0; // zero'th column if (length > 0) { // find operation bracketing
+ * start of the range do { if (operation[op] != D) { width +=
+ * range[prevop = op]; } op++; } while (op < length && width < start);
+ * } if (width < start) { // run off end - add more operations up to
+ * deletion. addInsertion(start - width); } else { // edit existing
+ * operations. op = prevop; width -= range[prevop]; int[] oldrange =
+ * range; char[] oldops = operation; range = new int[oldrange.length];
+ * operation = new char[oldops.length]; if (op < length) { do { if
+ * (operation[op] != D) { width += range[postop = op]; } op++; } while
+ * (op < length && width <= end); } } if (deleted == true) {
+ * addDeleted(end - start + 1); } return deleted; }
+ */
/**
- * Return an ENSEMBL style cigar string where D may indicates excluded parts of seq
+ * Return an ENSEMBL style cigar string where D may indicates excluded parts
+ * of seq
+ *
* @return String of form ([0-9]+[IMD])+
*/
public String getCigarstring()