1 package jalview.datamodel;
3 import jalview.analysis.AlignSeq;
9 private SequenceI refseq=null;
11 * Reference dataset sequence for the cigar string
14 public SequenceI getRefSeq() {
18 * Returns sequence as a string with cigar operations applied to it
21 public String getSequenceString(char GapChar)
23 return (length==0) ? "" : (String) getSequenceAndDeletions(refseq.getSequence(), GapChar)[0];
27 * recreates a gapped and edited version of RefSeq or null for an empty cigar string
30 public SequenceI getSeq(char GapChar) {
32 if (refseq==null || length==0)
34 Object[] edit_result=getSequenceAndDeletions(refseq.getSequence(), GapChar);
35 if (edit_result==null)
36 throw new Error("Implementation Error - unexpected null from getSequenceAndDeletions");
38 seq = new Sequence(refseq.getName(), (String) edit_result[0], refseq.getStart()+((int[]) edit_result[1])[0], refseq.getStart()+((int[]) edit_result[1])[2]);
39 seq.setDatasetSequence(refseq);
43 We don't allow this - refseq is given at construction time only
44 public void setSeq(SequenceI seq) {
49 * internal constructor - sets seq to a gapless sequence derived from seq
50 * and prepends any 'D' operations needed to get to the first residue of seq.
51 * @param seq SequenceI
52 * @return true if gaps are present in seq
54 private boolean _setSeq(SequenceI seq) {
55 boolean hasgaps=false;
58 throw new Error("Implementation Error - _setSeq(null)");
60 // Find correct sequence to reference and add initial hidden offset
61 SequenceI ds = seq.getDatasetSequence();
63 ds = new Sequence(seq.getName(),
64 AlignSeq.extractGaps(jalview.util.Comparison.GapChars, new String(seq.getSequence())),
68 // check that we haven't just duplicated an ungapped sequence.
69 if (ds.getLength()==seq.getLength()) {
76 if (ds.getStart()<seq.getStart()) {
77 addDeleted(seq.getStart()-ds.getStart());
82 * directly initialise a cigar object with a sequence of range, operation pairs and a sequence to apply it to.
83 * operation and range should be relative to the seq.getStart()'th residue of the dataset seq resolved from seq.
84 * @param seq SequenceI
85 * @param operation char[]
88 public SeqCigar(SequenceI seq, char operation[], int range[]) {
91 throw new Error("Implementation Bug. Null seq !");
92 if (operation.length!=range.length) {
93 throw new Error("Implementation Bug. Cigar Operation list!= range list");
96 if (operation!=null) {
97 this.operation = new char[operation.length+_inc_length];
98 this.range = new int[operation.length+_inc_length];
101 throw new Error("NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence.");
103 for (int i = this.length, j=0; j < operation.length; i++,j++)
105 char op = operation[j];
106 if (op != M && op != I && op != D)
109 "Implementation Bug. Cigar Operation '"+j+"' '"+op+"' not one of '"+M+"', '"+I+"', or '"+D+"'.");
111 this.operation[i] = op;
112 this.range[i] = range[j];
114 this.length+=operation.length;
116 this.operation = null;
120 throw new Error("NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence.");
125 * add range matched residues to cigar string
128 public void addMatch(int range) {
129 this.addOperation(M, range);
132 * Deleted regions mean that there will be discontinuous sequence numbering in the
133 * sequence returned by getSeq(char).
134 * @return true if there are non-terminal deletions
136 public boolean hasDeletedRegions() {
137 for (int i=1, l=length-1; i<l; i++) {
143 protected static void addSequenceOps(CigarBase cigar, SequenceI seq, int startpos, int endpos) {
146 int p = 0, res = seq.getLength();
151 boolean isGap = jalview.util.Comparison.isGap(seq.getCharAt(p++));
152 if ( (startpos <= p) && (p < endpos))
156 if (range > 0 && op != I)
158 cigar.addOperation(op, range);
166 if (range > 0 && op != M)
168 cigar.addOperation(op, range);
179 if (range > 0 && op != D)
181 cigar.addOperation(op, range);
189 // do nothing - insertions are not recorded in flanking regions.
195 cigar.addOperation(op, range);
199 * create a cigar string for given sequence
200 * @param seq SequenceI
202 public SeqCigar(SequenceI seq) {
205 throw new Error("Implementation error for new Cigar(SequenceI)");
208 // there is still work to do
209 addSequenceOps(this, seq, 0, seq.getLength());
212 public SeqCigar(SequenceI seq, int start, int end) {
215 throw new Error("Implementation error for new Cigar(SequenceI)");
218 // there is still work to do
219 addSequenceOps(this, seq, start, end);
224 * Create a cigar object from a cigar string like '[<I|D|M><range>]+'
225 * Will fail if the given seq already contains gaps (JBPNote: future implementation will fix)
226 * @param seq SequenceI object resolvable to a dataset sequence
227 * @param cigarString String
230 public static SeqCigar parseCigar(SequenceI seq, String cigarString)
233 Object[] opsandrange = parseCigarString(cigarString);
234 return new SeqCigar(seq, (char[]) opsandrange[0], (int[]) opsandrange[1]);
237 * non rigorous testing
241 * @param seq Sequence
242 * @param ex_cs_gapped String
245 public static String testCigar_string(Sequence seq, String ex_cs_gapped) {
246 SeqCigar c_sgapped = new SeqCigar(seq);
247 String cs_gapped = c_sgapped.getCigarstring();
248 if (!cs_gapped.equals(ex_cs_gapped))
249 System.err.println("Failed getCigarstring: incorect string '"+cs_gapped+"' != "+ex_cs_gapped);
252 public static boolean testSeqRecovery(SeqCigar gen_sgapped, SequenceI s_gapped) {
253 SequenceI gen_sgapped_s = gen_sgapped.getSeq('-');
254 if (!gen_sgapped_s.getSequence().equals(s_gapped.getSequence())) {
255 System.err.println("Couldn't reconstruct sequence.\n" +
256 gen_sgapped_s.getSequence() + "\n" +
257 s_gapped.getSequence());
262 public static void main(String argv[]) throws Exception {
263 Sequence s=new Sequence("MySeq", "asdfktryasdtqwrtsaslldddptyipqqwaslchvhttt",39,80);
265 Sequence s_gapped=new Sequence("MySeq", orig_gapped="----asdf------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhttt", 39,80);
266 String ex_cs_gapped="4I4M6I6M3I11M4I12M4I9M";
267 s_gapped.setDatasetSequence(s);
269 Sequence s_subsequence_gapped=new Sequence("MySeq", sub_gapped_s="------ktryas---dtqwrtsasll----dddptyipqqwa----slchvh", 43,77);
271 s_subsequence_gapped.setDatasetSequence(s);
272 SeqCigar c_null = new SeqCigar(s);
273 String cs_null = c_null.getCigarstring();
274 if (cs_null.length()>0)
275 System.err.println("Failed getCigarstring: Unexpected cigar operations:"+cs_null);
276 testCigar_string(s_gapped, ex_cs_gapped);
277 SeqCigar gen_sgapped = SeqCigar.parseCigar(s, ex_cs_gapped);
278 if (!gen_sgapped.getCigarstring().equals(ex_cs_gapped))
279 System.err.println("Failed parseCigar("+ex_cs_gapped+")->getCigarString()->'"+gen_sgapped.getCigarstring()+"'");
280 testSeqRecovery(gen_sgapped, s_gapped);
281 // Test dataset resolution
282 SeqCigar sub_gapped = new SeqCigar(s_subsequence_gapped);
283 if (!testSeqRecovery(sub_gapped, s_subsequence_gapped))
284 System.err.println("Failed recovery for subsequence of dataset sequence");
286 if (sub_gapped.getWidth()!=sub_gapped_s.length())
287 System.err.println("Failed getWidth()");
289 sub_gapped.getFullWidth();
290 if (sub_gapped.hasDeletedRegions())
291 System.err.println("hasDeletedRegions is incorrect.");
292 // Test start-end region SeqCigar
293 SeqCigar sub_se_gp= new SeqCigar(s_subsequence_gapped, 8, 48);
294 if (sub_se_gp.getWidth()!=40)
295 System.err.println("SeqCigar(seq, start, end) not properly clipped alignsequence.");
296 System.out.println("Original sequence align:\n"+sub_gapped_s+"\nReconstructed window from 8 to 48\n"+"XXXXXXXX"+sub_se_gp.getSequenceString('-')+"...."+"\nCigar String:"+sub_se_gp.getCigarstring()+"");