/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel;
import jalview.util.MessageManager;
import java.util.Vector;
public abstract class CigarBase
{
/**
* Base class for compact idiosyncratic representation of gaps and aligned
* residues Regards to Tom Oldfield for his DynamicArray class. 17th July 2006
* Not thread safe.
*/
public CigarBase()
{
// nothing to be done (probably)
}
protected int length = 0;
protected int _inc_length = 10; // extension range for addition of new
// operations
protected char[] operation = null;
protected int[] range = null;
/**
* Range of Hidden residues in seq (translated as deleted in seq)
*/
public static final char D = 'D';
/**
* Range of insertions to seq
*/
public static final char I = 'I';
/**
* Range of aligned residues
*/
public static final char M = 'M';
static protected final char _case_shift = 'a' - 'A';
/**
* Ugly function to get edited sequence string, start and end symbol positions
* and the deletion regions as an array of int pairs May return null for an
* empty cigar string. May return null for deletion ranges if there are none.
*
* @param reference
* - the symbol sequence to apply the cigar operations to (or null if
* no sequence)
* @param GapChar
* - the symbol to use for Insert operations
* @return Object[] { String, int[] {start, startcol, end, endcol}, int[][3]
* {start, end, col} or null} the gapped sequence, first and last
* residue index, and the deletion ranges on the reference sequence
*/
public Object[] getSequenceAndDeletions(String reference, char GapChar)
{
int rlength = 0;
int[][] deletions = new int[length][];
int[][] trunc_deletions = null;
StringBuffer sq = new StringBuffer();
int cursor = 0, alcursor = 0, start = 0, startpos = 0, end = 0, endpos = 0, delcount = -1;
boolean consecutive_del = false;
if (length == 0)
{
return null;
}
if (reference != null)
{
rlength = reference.length();
}
boolean modstart = true;
for (int i = 0; i < length; i++)
{
switch (operation[i])
{
case D:
if (!consecutive_del)
{
deletions[++delcount] = new int[] { cursor, 0, alcursor };
}
cursor += range[i];
deletions[delcount][1] = cursor - 1;
consecutive_del = true;
break;
case I:
consecutive_del = false;
for (int r = 0; r < range[i]; r++)
{
sq.append(GapChar);
alcursor++;
}
break;
case M:
consecutive_del = false;
if (modstart)
{
start = cursor;
startpos = alcursor;
modstart = false;
}
if (reference != null)
{
int sbend = cursor + range[i];
if (sbend > rlength)
{
sq.append(reference.substring(cursor, rlength));
while (sbend-- >= rlength)
{
sq.append(GapChar);
}
}
else
{
sq.append(reference.substring(cursor, sbend));
}
}
alcursor += range[i];
cursor += range[i];
end = cursor - 1;
endpos = alcursor;
break;
default:
throw new Error(MessageManager.formatMessage(
"error.unknown_seq_cigar_operation",
new String[] { new StringBuffer(operation[i]).toString() }));
}
}
if (++delcount > 0)
{
trunc_deletions = new int[delcount][];
System.arraycopy(deletions, 0, trunc_deletions, 0, delcount);
}
deletions = null;
return new Object[] { ((reference != null) ? sq.toString() : null),
new int[] { start, startpos, end, endpos }, trunc_deletions };
}
protected void compact_operations()
{
int i = 1;
if (operation == null)
{
return;
}
char last = operation[0];
while (i < length)
{
if (last == operation[i])
{
range[i - 1] += range[i];
int r = length - i;
if (r > 0)
{
System.arraycopy(range, i + 1, range, i, r);
System.arraycopy(operation, i + 1, operation, i, r);
}
length--;
}
else
{
last = operation[i++];
}
}
}
/**
* turn a cigar string into a series of operation range pairs
*
* @param cigarString
* String
* @return object[] {char[] operation, int[] range}
* @throws java.lang.Exception
* for improperly formated cigar strings or ones with unknown
* operations
*/
public static Object[] parseCigarString(String cigarString)
throws Exception
{
int ops = 0;
for (int i = 0, l = cigarString.length(); i < l; i++)
{
char c = cigarString.charAt(i);
if (c == M || c == (M - _case_shift) || c == I
|| c == (I - _case_shift) || c == D || c == (D - _case_shift))
{
ops++;
}
}
char[] operation = new char[ops];
int[] range = new int[ops];
int op = 0;
int i = 0, l = cigarString.length();
while (i < l)
{
char c;
int j = i;
do
{
c = cigarString.charAt(j++);
} while (c >= '0' && c <= '9' && j < l);
if (j >= l && c >= '0' && c <= '9')
{
throw new Exception(
MessageManager
.getString("exception.unterminated_cigar_string"));
}
try
{
String rangeint = cigarString.substring(i, j - 1);
range[op] = Integer.parseInt(rangeint);
i = j;
} catch (Exception e)
{
throw new Error(
MessageManager
.getString("error.implementation_bug_parse_cigar_string"));
}
if (c >= 'a' && c <= 'z')
{
c -= _case_shift;
}
if ((c == M || c == I || c == D))
{
operation[op++] = c;
}
else
{
throw new Exception(MessageManager.formatMessage(
"exception.unexpected_operation_cigar_string_pos",
new String[] { new StringBuffer(c).toString(),
Integer.valueOf(i).toString(), cigarString }));
}
}
return new Object[] { operation, range };
}
/**
* add an operation to cigar string
*
* @param op
* char
* @param range
* int
*/
public void addOperation(char op, int range)
{
if (op >= 'a' && op <= 'z')
{
op -= _case_shift;
}
if (op != M && op != D && op != I)
{
throw new Error(
MessageManager
.getString("error.implementation_error_invalid_operation_string"));
}
if (range == 0)
{
return; // No Operation to add.
}
if (range < 0)
{
throw new Error(
MessageManager.getString("error.invalid_range_string"));
}
int lngth = 0;
if (operation == null)
{
this.operation = new char[_inc_length];
this.range = new int[_inc_length];
}
if (length + 1 == operation.length)
{
char[] ops = this.operation;
this.operation = new char[length + 1 + _inc_length];
System.arraycopy(ops, 0, this.operation, 0, length);
ops = null;
int[] rng = this.range;
this.range = new int[length + 1 + _inc_length];
System.arraycopy(rng, 0, this.range, 0, length);
rng = null;
}
if ((length > 0) && (operation[length - 1] == op))
{
length--; // modify existing operation.
}
else
{
this.range[length] = 0; // reset range
}
this.operation[length] = op;
this.range[length++] += range;
}
/**
* semi-efficient insert an operation on the current cigar string set at
* column pos (from 1) NOTE: Insertion operations simply extend width of cigar
* result - affecting registration of alignment Deletion ops will shorten
* length of result - and affect registration of alignment Match ops will also
* affect length of result - affecting registration of alignment (ie
* "10M".insert(4,I,3)->"4M3I3M") - (replace?) (ie
* "10M".insert(4,D,3)->"4M3D3M") - (shortens alignment) (ie
* "5I5M".insert(4,I,3)->"8I5M") - real insertion (ie
* "5I5M".insert(4,D,3)->"4I2D3M") - shortens aligment - I's are removed, Ms
* changed to Ds (ie "10M".insert(4,M,3)->"13M") - lengthens - Is changed to
* M, Ds changed to M. (ie "5I5M".insert(4,M,3)->"4I8M") - effectively shifts
* sequence left by 1 residue and extends it by 3 (
* "10D5M".insert(-1,M,3)->"3M7D5M") ( "10D5M".insert(0,M,3)->"7D8M") (
* "10D5M".insert(1,M,3)->"10D8M") ( "1M10D5M".insert(0,M,3)->"1M10D8M") (
* "1M10D5M".insert(1,M,3)->"
*
* if pos is beyond width - I operations are added before the operation
*
* @param pos
* int -1, 0-length of visible region, or greater to append new ops
* (with insertions in between)
* @param op
* char
* @param range
* int public void addOperationAt(int pos, char op, int range) { int
* cursor = -1; // mark the position for the current operation being
* edited. int o = 0; boolean last_d = false; // previous op was a
* deletion. if (pos < -1) throw new
* Error("pos<-1 is not supported."); while (o0) addInsertion(pos-cursor);
* // then just add the new operation. Regardless of what it is.
* addOperation(op, range); } else { int diff = pos - cursor;
*
* int e_length = length-o; // new edit operation array length. //
* diff<0 - can only happen before first insertion or match. -
* affects op and all following // dif==0 - only when at first
* position of existing op - // diff>0 - must preserve some existing
* operations int[] e_range = new int[e_length];
* System.arraycopy(this.range, o, e_range, 0, e_length); char[] e_op
* = new char[e_length]; System.arraycopy(this.operation, o, e_op, 0,
* e_length); length = o; // can now use add_operation to extend
* list. int e_o=0; // current operation being edited. switch (op) {
* case M: switch (e_op[e_o]) { case M: if (last_d && diff <= 0) { //
* reduce D's, if possible if (range<=this.range[o-1]) { this.range[o
* - 1] -= range; } else { this.range[o-1]=0; } if
* (this.range[o-1]==0) o--; // lose this op. } e_range[e_o] +=
* range; // just add more matched residues break; case I: // change
* from insertion to match if (last_d && diff<=0) { // reduce D's, if
* possible if (range<=this.range[o-1]) { this.range[o - 1] -= range;
* } else { this.range[o-1]=0; } if (this.range[o-1]==0) o--; // lose
* this op. } e_range[e_o] break; default: throw new Inp }
*
* break; case I: break; case D: } break; default: throw new
* Error("Implementation Error: Unknown operation in addOperation!");
* } // finally, add remaining ops. while (e_o end)
{
throw new Error(
MessageManager
.getString("error.implementation_error_delete_range_out_of_bounds"));
}
// find beginning
int cursor = 0; // mark the position for the current operation being edited.
int rlength = 1 + end - start; // number of positions to delete
int oldlen = length;
int o = 0;
boolean editing = false;
char[] oldops = operation;
int[] oldrange = range;
length = 0;
operation = null;
range = null;
compact_operations();
while (o < oldlen && cursor <= end && rlength > 0)
{
if (oldops[o] == D)
{
// absorbed into new deleted region.
addDeleted(oldrange[o++]);
continue;
}
int remain = oldrange[o]; // number of op characters left to edit
if (!editing)
{
if ((cursor + remain) <= start)
{
addOperation(oldops[o], oldrange[o]);
cursor += oldrange[o++];
continue; // next operation
}
editing = true;
// add operations before hidden region
if (start - cursor > 0)
{
addOperation(oldops[o], start - cursor);
remain -= start - cursor;
}
}
// start inserting new ops
if (o < oldlen && editing && rlength > 0 && remain > 0)
{
switch (oldops[o])
{
case M:
if (rlength > remain)
{
addDeleted(remain);
deleted += remain;
}
else
{
deleted += rlength;
addDeleted(rlength);
if (remain - rlength > 0)
{
this.addOperation(M, remain - rlength); // add remaining back.
}
rlength = 0;
remain = 0;
}
break;
case I:
if (remain - rlength > 0)
{
// only remove some gaps
addInsertion(remain - rlength);
rlength = 0;
}
break;
case D:
throw new Error(
MessageManager.getString("error.implementation_error")); // do
// nothing;
default:
throw new Error(MessageManager.formatMessage(
"error.implementation_error_unknown_operation",
new String[] { new StringBuffer(oldops[o]).toString() }));
}
rlength -= remain;
remain = oldrange[++o]; // number of op characters left to edit
}
}
// add remaining
while (o < oldlen)
{
addOperation(oldops[o], oldrange[o++]);
}
// if (cursor<(start+1)) {
// ran out of ops - nothing to do here ?
// addInsertion(start-cursor);
// }
return deleted;
}
/**
* Deleted regions mean that there will be discontinuous sequence numbering in
* the sequence returned by getSeq(char).
*
* @return true if there deletions
*/
public boolean hasDeletedRegions()
{
for (int i = 0; i < length; i++)
{
if (operation[i] == D)
{
return true;
}
}
return false;
}
/**
* enumerate the ranges on seq that are marked as deleted in this cigar
*
* @return int[] { vis_start, sym_start, length }
*/
public int[] getDeletedRegions()
{
if (length == 0)
{
return null;
}
Vector dr = new Vector();
int cursor = 0, vcursor = 0;
for (int i = 0; i < length; i++)
{
switch (operation[i])
{
case M:
cursor += range[i];
case I:
vcursor += range[i];
break;
case D:
dr.addElement(new int[] { vcursor, cursor, range[i] });
cursor += range[i];
}
}
if (dr.size() == 0)
{
return null;
}
int[] delregions = new int[dr.size() * 3];
for (int i = 0, l = dr.size(); i < l; i++)
{
int[] reg = (int[]) dr.elementAt(i);
delregions[i * 3] = reg[0];
delregions[i * 3 + 1] = reg[1];
delregions[i * 3 + 2] = reg[2];
}
return delregions;
}
/**
* sum of ranges in cigar string
*
* @return int number of residues hidden, matched, or gaps inserted into
* sequence
*/
public int getFullWidth()
{
int w = 0;
if (range != null)
{
for (int i = 0; i < length; i++)
{
w += range[i];
}
}
return w;
}
/**
* Visible length of aligned sequence
*
* @return int length of including gaps and less hidden regions
*/
public int getWidth()
{
int w = 0;
if (range != null)
{
for (int i = 0; i < length; i++)
{
if (operation[i] == M || operation[i] == I)
{
w += range[i];
}
}
}
return w;
}
/**
* mark a range of inserted residues
*
* @param range
* int
*/
public void addInsertion(int range)
{
this.addOperation(I, range);
}
/**
* mark the next range residues as hidden (not aligned) or deleted
*
* @param range
* int
*/
public void addDeleted(int range)
{
this.addOperation(D, range);
}
/**
* Modifies operation list to delete columns from start to end (inclusive)
* editing will remove insertion operations, and convert matches to deletions
*
* @param start
* alignment column
* @param end
* alignment column
* @return boolean true if residues were marked as deleted. public boolean
* deleteRange(int start, int end) { boolean deleted = false; int op =
* 0, prevop = -1, firstm = -1, lastm = -1, postop = -1; int width =
* 0; // zero'th column if (length > 0) { // find operation bracketing
* start of the range do { if (operation[op] != D) { width +=
* range[prevop = op]; } op++; } while (op < length && width < start);
* } if (width < start) { // run off end - add more operations up to
* deletion. addInsertion(start - width); } else { // edit existing
* operations. op = prevop; width -= range[prevop]; int[] oldrange =
* range; char[] oldops = operation; range = new int[oldrange.length];
* operation = new char[oldops.length]; if (op < length) { do { if
* (operation[op] != D) { width += range[postop = op]; } op++; } while
* (op < length && width <= end); } } if (deleted == true) {
* addDeleted(end - start + 1); } return deleted; }
*/
/**
* Return an ENSEMBL style cigar string where D may indicates excluded parts
* of seq
*
* @return String of form ([0-9]+[IMD])+
*/
public String getCigarstring()
{
StringBuffer cigarString = new StringBuffer();
for (int i = 0; i < length; i++)
{
cigarString.append("" + range[i]);
cigarString.append(operation[i]);
}
return cigarString.toString();
}
}