+/*
+ * Jalview - A Sequence Alignment Editor and Viewer
+ * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
package jalview.datamodel;
+import java.util.Hashtable;
+
import jalview.analysis.*;
-import jalview.util.ShiftList;
-import java.util.Vector;
+import jalview.util.*;
public class SeqCigar
extends CigarSimple
{
- /**
- * start(inclusive) and end(exclusive) of subsequence on refseq
- */
- private int start, end;
+ /**
+ * start(inclusive) and end(exclusive) of subsequence on refseq
+ */
+ private int start, end;
private SequenceI refseq = null;
+ private Hashtable seqProps;
/**
* Reference dataset sequence for the cigar string
* @return SequenceI
{
return refseq;
}
+
/**
*
* @return int start index of cigar ops on refSeq
*/
- public int getStart() {
+ public int getStart()
+ {
return start;
}
+
/**
*
* @return int end index (exclusive) of cigar ops on refSeq
*/
- public int getEnd() {
+ public int getEnd()
+ {
return end;
}
+
/**
* Returns sequence as a string with cigar operations applied to it
* @return String
public String getSequenceString(char GapChar)
{
return (length == 0) ? "" :
- (String) getSequenceAndDeletions(refseq.getSequence().substring(start, end), GapChar)[0];
+ (String) getSequenceAndDeletions(refseq.getSequenceAsString(start, end),
+ GapChar)[0];
}
/**
{
return null;
}
- Object[] edit_result = getSequenceAndDeletions(refseq.getSequence().substring(start,end),
+ Object[] edit_result = getSequenceAndDeletions(refseq.getSequenceAsString(
+ start, end),
GapChar);
if (edit_result == null)
{
}
int bounds[] = (int[]) edit_result[1];
seq = new Sequence(refseq.getName(), (String) edit_result[0],
- refseq.getStart() + start+bounds[0],
- refseq.getStart() + start+((bounds[2]==0) ? -1 : bounds[2]));
+ refseq.getStart() + start + bounds[0],
+ refseq.getStart() + start +
+ ( (bounds[2] == 0) ? -1 : bounds[2]));
+ seq.setDescription(refseq.getDescription());
+ int sstart = seq.getStart(),
+ send = seq.getEnd();
// seq.checkValidRange(); probably not needed
+ // recover local properties if present
+ if (seqProps!=null)
+ {
+ // this recovers dataset sequence reference as well as local features, names, start/end settings.
+ SeqsetUtils.SeqCharacterUnhash(seq, seqProps);
+ }
+ // ensure dataset sequence is up to date from local reference
seq.setDatasetSequence(refseq);
+ seq.setStart(sstart);
+ seq.setEnd(send);
return seq;
}
* @param _e index after last position in (possibly gapped) seq
* @return true if gaps are present in seq
*/
- private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s, int _e)
+ private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s,
+ int _e)
{
boolean hasgaps = false;
if (seq == null)
{
throw new Error("Implementation Error - _setSeq(null,...)");
}
- if (_s<0)
- throw new Error("Implementation Error: _s="+_s);
- String seq_string = seq.getSequence();
- if (_e==0 || _e<_s || _e>seq_string.length())
- _e=seq_string.length();
+ if (_s < 0)
+ {
+ throw new Error("Implementation Error: _s=" + _s);
+ }
+ String seq_string = seq.getSequenceAsString();
+ if (_e == 0 || _e < _s || _e > seq_string.length())
+ {
+ _e = seq_string.length();
+ }
// resolve start and end positions relative to ungapped reference sequence
- start = seq.findPosition(_s)-seq.getStart();
- end = seq.findPosition(_e)-seq.getStart();
- int l_ungapped = end-start;
+ start = seq.findPosition(_s) - seq.getStart();
+ end = seq.findPosition(_e) - seq.getStart();
+ int l_ungapped = end - start;
// Find correct sequence to reference and correct start and end - if necessary
SequenceI ds = seq.getDatasetSequence();
if (ds == null)
{
// make a new dataset sequence
String ungapped = AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
- new String(seq_string));
- l_ungapped=ungapped.length();
+ new String(seq_string));
+ l_ungapped = ungapped.length();
// check that we haven't just duplicated an ungapped sequence.
if (l_ungapped == seq.getLength())
{
{
ds = new Sequence(seq.getName(), ungapped,
seq.getStart(),
- seq.getStart()+ungapped.length()-1);
+ seq.getStart() + ungapped.length() - 1);
// JBPNote: this would be consistent but may not be useful
// seq.setDatasetSequence(ds);
}
// add in offset between seq and the dataset sequence
if (ds.getStart() < seq.getStart())
{
- int offset=seq.getStart()-ds.getStart();
- if (initialDeletion) {
- // absolute cigar string
- addDeleted(_s+offset);
- start=0;
- end+=offset;
- } else {
- // normal behaviour - just mark start and end subsequence
- start+=offset;
- end+=offset;
+ int offset = seq.getStart() - ds.getStart();
+ if (initialDeletion)
+ {
+ // absolute cigar string
+ addDeleted(_s + offset);
+ start = 0;
+ end += offset;
+ }
+ else
+ {
+ // normal behaviour - just mark start and end subsequence
+ start += offset;
+ end += offset;
}
}
// any gaps to process ?
- if (l_ungapped!=(_e-_s))
- hasgaps=true;
-
- this.refseq = ds;
+ if (l_ungapped != (_e - _s))
+ {
+ hasgaps = true;
+ }
+ refseq = ds;
+ // copy over local properties for the sequence instance of the refseq
+ seqProps = SeqsetUtils.SeqCharacterHash(seq);
// Check offsets
- if (end>ds.getLength()) {
+ if (end > ds.getLength())
+ {
throw new Error("SeqCigar: Possible implementation error: sequence is longer than dataset sequence");
// end = ds.getLength();
}
this.operation = null;
this.range = null;
this.length = 0;
- if (_setSeq(seq, false,0, 0))
+ if (_setSeq(seq, false, 0, 0))
{
throw new Error("NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence.");
}
{
this.addOperation(M, range);
}
+
/**
* Adds
* insertion and match operations based on seq to the cigar up to
* @param initialDeletions if true then initial deletions will be added from start of seq to startpos
*/
protected static void addSequenceOps(CigarBase cigar, SequenceI seq,
- int startpos, int endpos, boolean initialDeletions)
+ int startpos, int endpos,
+ boolean initialDeletions)
{
char op = '\0';
int range = 0;
int p = 0, res = seq.getLength();
if (!initialDeletions)
- p=startpos;
-
+ {
+ p = startpos;
+ }
while (p <= endpos)
{
}
_setSeq(seq, false, 0, 0);
// there is still work to do
- addSequenceOps(this, seq, 0, seq.getLength()-1, false);
+ addSequenceOps(this, seq, 0, seq.getLength() - 1, false);
}
/**
{
throw new Error("Implementation error for new Cigar(SequenceI)");
}
- _setSeq(seq, false, start, end+1);
+ _setSeq(seq, false, start, end + 1);
// there is still work to do
addSequenceOps(this, seq, start, end, false);
}
{
SequenceI[] seqs = new SequenceI[alseqs.length];
StringBuffer[] g_seqs = new StringBuffer[alseqs.length];
- String[] alseqs_string=new String[alseqs.length];
+ String[] alseqs_string = new String[alseqs.length];
Object[] gs_regions = new Object[alseqs.length];
for (int i = 0; i < alseqs.length; i++)
{
- alseqs_string[i]=alseqs[i].getRefSeq().
- getSequence().substring(alseqs[i].start,alseqs[i].end);
- gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], gapCharacter); // gapped sequence, {start, start col, end. endcol}, hidden regions {{start, end, col}})
+ alseqs_string[i] = alseqs[i].getRefSeq().
+ getSequenceAsString(alseqs[i].start, alseqs[i].end);
+ gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i],
+ gapCharacter); // gapped sequence, {start, start col, end. endcol}, hidden regions {{start, end, col}})
if (gs_regions[i] == null)
{
throw new Error("Implementation error: " + i +
else
{
g_seqs[s].insert(inspos,
- alseqs_string[i].substring(region[0], region[1] + 1));
+ alseqs_string[i].substring(region[0],
+ region[1] + 1));
}
}
shifts.addShift(region[2], insert.length); // update shift in alignment frame of reference
- if (segments==null)
+ if (segments == null)
+ {
// add a hidden column for this deletion
- colsel.hideColumns(inspos, inspos+insert.length-1);
+ colsel.hideColumns(inspos, inspos + insert.length - 1);
+ }
}
}
}
int[] bounds = ( (int[]) ( (Object[]) gs_regions[i])[1]);
SequenceI ref = alseqs[i].getRefSeq();
seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(),
- ref.getStart() + alseqs[i].start+bounds[0],
- ref.getStart() + alseqs[i].start+(bounds[2]==0 ? -1 : bounds[2]));
+ ref.getStart() + alseqs[i].start + bounds[0],
+ ref.getStart() + alseqs[i].start +
+ (bounds[2] == 0 ? -1 : bounds[2]));
seqs[i].setDatasetSequence(ref);
+ seqs[i].setDescription(ref.getDescription());
}
- if (segments!=null) {
- for (int i=0; i<segments.length; i+=3) {
+ if (segments != null)
+ {
+ for (int i = 0; i < segments.length; i += 3)
+ {
//int start=shifts.shift(segments[i]-1)+1;
//int end=shifts.shift(segments[i]+segments[i+1]-1)-1;
- colsel.hideColumns(segments[i+1], segments[i+1]+segments[i+2]-1);
+ colsel.hideColumns(segments[i + 1],
+ segments[i + 1] + segments[i + 2] - 1);
}
}
return seqs;
public static boolean testSeqRecovery(SeqCigar gen_sgapped,
SequenceI s_gapped)
{
- // this is non-rigorous - start and end recovery is not tested.
+ // this is non-rigorous - start and end recovery is not tested.
SequenceI gen_sgapped_s = gen_sgapped.getSeq('-');
if (!gen_sgapped_s.getSequence().equals(s_gapped.getSequence()))
{
System.err.println("Couldn't reconstruct sequence.\n" +
- gen_sgapped_s.getSequence() + "\n" +
- s_gapped.getSequence());
+ gen_sgapped_s.getSequenceAsString() + "\n" +
+ s_gapped.getSequenceAsString());
return false;
}
return true;
{
String o_seq;
Sequence s = new Sequence("MySeq",
- o_seq = "asdfktryasdtqwrtsaslldddptyipqqwaslchvhttt",
+ o_seq =
+ "asdfktryasdtqwrtsaslldddptyipqqwaslchvhttt",
39, 80);
String orig_gapped;
Sequence s_gapped = new Sequence("MySeq",
- orig_gapped = "----asdf------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhttt",
+ orig_gapped =
+ "----asdf------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhttt",
39, 80);
String ex_cs_gapped = "4I4M6I6M3I11M4I12M4I9M";
s_gapped.setDatasetSequence(s);
String sub_gapped_s;
Sequence s_subsequence_gapped = new Sequence("MySeq",
- sub_gapped_s = "------ktryas---dtqwrtsasll----dddptyipqqwa----slchvh",
+ sub_gapped_s =
+ "------ktryas---dtqwrtsasll----dddptyipqqwa----slchvh",
43, 77);
s_subsequence_gapped.setDatasetSequence(s);
+ "\nCigar String:" + sub_se_gp.getCigarstring() + "\n"
);
SequenceI ssgp = sub_se_gp.getSeq('-');
- System.out.println("\t " + ssgp.getSequence());
+ System.out.println("\t " + ssgp.getSequenceAsString());
for (int r = 0; r < 10; r++)
{
sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48);
{
int e = st + rs;
sub_se_gp.deleteRange(st, e);
- String ssgapedseq = sub_se_gp.getSeq('-').getSequence();
+ String ssgapedseq = sub_se_gp.getSeq('-').getSequenceAsString();
System.out.println(st + "," + e + "\t:" + ssgapedseq);
- st -=3;
+ st -= 3;
}
}
{
System.out.println("" + al.getSequenceAt(i).getName() + "\t" +
al.getSequenceAt(i).getStart() + "\t" +
al.getSequenceAt(i).getEnd() + "\t" +
- al.getSequenceAt(i).getSequence());
+ al.getSequenceAt(i).getSequenceAsString());
}
}
{
System.out.println("" + al.getSequenceAt(i).getName() + "\t" +
al.getSequenceAt(i).getStart() + "\t" +
al.getSequenceAt(i).getEnd() + "\t" +
- al.getSequenceAt(i).getSequence());
+ al.getSequenceAt(i).getSequenceAsString());
}
}
// if (!ssgapedseq.equals("ryas---dtqqwa----slchvh"))