2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.analysis.AlignSeq;
24 import jalview.analysis.SeqsetUtils;
25 import jalview.analysis.SeqsetUtils.SequenceInfo;
26 import jalview.util.MessageManager;
27 import jalview.util.ShiftList;
29 import java.util.Enumeration;
30 import java.util.Hashtable;
32 public class SeqCigar extends CigarSimple
35 * start(inclusive) and end(exclusive) of subsequence on refseq
37 private int start, end;
39 private SequenceI refseq = null;
41 private SequenceInfo seqProps;
44 * Reference dataset sequence for the cigar string
48 public SequenceI getRefSeq()
55 * @return int start index of cigar ops on refSeq
64 * @return int end index (exclusive) of cigar ops on refSeq
74 * @return position in sequence for column (or -1 if no match state exists)
76 public int findPosition(int column)
78 int w = 0, ew, p = refseq.findPosition(start);
85 for (int i = 0; i < length; i++)
87 if (operation[i] == M || operation[i] == D)
91 if (operation[i] == M || operation[i] == I)
96 if (operation[i] == I)
100 return p - (ew - column);
110 * Returns sequence as a string with cigar operations applied to it
115 public String getSequenceString(char GapChar)
117 return (length == 0) ? ""
118 : (String) getSequenceAndDeletions(
119 refseq.getSequenceAsString(start, end), GapChar)[0];
123 * recreates a gapped and edited version of RefSeq or null for an empty cigar
128 public SequenceI getSeq(char GapChar)
131 if (refseq == null || length == 0)
135 Object[] edit_result = getSequenceAndDeletions(
136 refseq.getSequenceAsString(start, end), GapChar);
137 if (edit_result == null)
139 throw new Error(MessageManager.getString(
140 "error.implementation_error_unexpected_null_from_get_sequence_and_deletions"));
142 int bounds[] = (int[]) edit_result[1];
143 seq = new Sequence(refseq.getName(), (String) edit_result[0],
144 refseq.getStart() + start + bounds[0], refseq.getStart() + start
145 + ((bounds[2] == 0) ? -1 : bounds[2]));
146 seq.setDescription(refseq.getDescription());
147 int sstart = seq.getStart(), send = seq.getEnd();
148 // seq.checkValidRange(); probably not needed
149 // recover local properties if present
150 if (seqProps != null)
152 // this recovers dataset sequence reference as well as local features,
153 // names, start/end settings.
154 SeqsetUtils.SeqCharacterUnhash(seq, seqProps);
156 // ensure dataset sequence is up to date from local reference
157 seq.setDatasetSequence(refseq);
158 seq.setStart(sstart);
164 * We don't allow this - refseq is given at construction time only public void
165 * setSeq(SequenceI seq) { this.seq = seq; }
168 * internal constructor - sets seq to a gapless sequence derived from seq and
169 * prepends any 'D' operations needed to get to the first residue of seq.
173 * @param initialDeletion
174 * true to mark initial dataset sequence residues as deleted in
177 * index of first position in seq
179 * index after last position in (possibly gapped) seq
180 * @return true if gaps are present in seq
182 private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s,
185 boolean hasgaps = false;
188 throw new Error(MessageManager
189 .getString("error.implementation_error_set_seq_null"));
193 throw new Error(MessageManager
194 .formatMessage("error.implementation_error_s", new String[]
195 { Integer.valueOf(_s).toString() }));
197 String seq_string = seq.getSequenceAsString();
198 if (_e == 0 || _e < _s || _e > seq_string.length())
200 _e = seq_string.length();
202 // resolve start and end positions relative to ungapped reference sequence
203 start = seq.findPosition(_s) - seq.getStart();
204 end = seq.findPosition(_e) - seq.getStart();
205 int l_ungapped = end - start;
206 // Find correct sequence to reference and correct start and end - if
208 SequenceI ds = seq.getDatasetSequence();
211 // make a new dataset sequence
212 String ungapped = AlignSeq.extractGaps(
213 jalview.util.Comparison.GapChars, new String(seq_string));
214 l_ungapped = ungapped.length();
215 // check that we haven't just duplicated an ungapped sequence.
216 if (l_ungapped == seq.getLength())
222 ds = new Sequence(seq.getName(), ungapped, seq.getStart(),
223 seq.getStart() + ungapped.length() - 1);
224 // JBPNote: this would be consistent but may not be useful
225 // seq.setDatasetSequence(ds);
228 // add in offset between seq and the dataset sequence
229 if (ds.getStart() < seq.getStart())
231 int offset = seq.getStart() - ds.getStart();
234 // absolute cigar string
235 addDeleted(_s + offset);
241 // normal behaviour - just mark start and end subsequence
249 // any gaps to process ?
250 if (l_ungapped != (_e - _s))
256 // copy over local properties for the sequence instance of the refseq
257 seqProps = SeqsetUtils.SeqCharacterHash(seq);
259 if (end > ds.getLength())
261 throw new Error(MessageManager
262 .getString("error.implementation_error_seqcigar_possible"));
263 // end = ds.getLength();
270 * directly initialise a cigar object with a sequence of range, operation
271 * pairs and a sequence to apply it to. operation and range should be relative
272 * to the seq.getStart()'th residue of the dataset seq resolved from seq.
281 public SeqCigar(SequenceI seq, char operation[], int range[])
287 MessageManager.getString("error.implmentation_bug_seq_null"));
289 if (operation.length != range.length)
291 throw new Error(MessageManager.getString(
292 "error.implementation_bug_cigar_operation_list_range_list"));
295 if (operation != null)
297 this.operation = new char[operation.length + _inc_length];
298 this.range = new int[operation.length + _inc_length];
300 if (_setSeq(seq, false, 0, 0))
302 throw new Error(MessageManager.getString(
303 "error.not_yet_implemented_cigar_object_from_cigar_string"));
305 for (int i = this.length, j = 0; j < operation.length; i++, j++)
307 char op = operation[j];
308 if (op != M && op != I && op != D)
310 throw new Error(MessageManager.formatMessage(
311 "error.implementation_bug_cigar_operation", new String[]
312 { Integer.valueOf(j).toString(),
313 Integer.valueOf(op).toString(),
314 Integer.valueOf(M).toString(),
315 Integer.valueOf(I).toString(),
316 Integer.valueOf(D).toString() }));
318 this.operation[i] = op;
319 this.range[i] = range[j];
321 this.length += operation.length;
325 this.operation = null;
328 if (_setSeq(seq, false, 0, 0))
330 throw new Error(MessageManager.getString(
331 "error.not_yet_implemented_cigar_object_from_cigar_string"));
337 * add range matched residues to cigar string
342 public void addMatch(int range)
344 this.addOperation(M, range);
348 * Adds insertion and match operations based on seq to the cigar up to the
349 * endpos column of seq.
359 * @param initialDeletions
360 * if true then initial deletions will be added from start of seq to
363 protected static void addSequenceOps(CigarBase cigar, SequenceI seq,
364 int startpos, int endpos, boolean initialDeletions)
368 int p = 0, res = seq.getLength();
370 if (!initialDeletions)
377 boolean isGap = (p < res)
378 ? jalview.util.Comparison.isGap(seq.getCharAt(p))
380 if ((startpos <= p) && (p <= endpos))
384 if (range > 0 && op != I)
386 cigar.addOperation(op, range);
394 if (range > 0 && op != M)
396 cigar.addOperation(op, range);
407 if (range > 0 && op != D)
409 cigar.addOperation(op, range);
417 // do nothing - insertions are not made in flanking regions
424 cigar.addOperation(op, range);
429 * create a cigar string for given sequence
434 public SeqCigar(SequenceI seq)
439 throw new Error(MessageManager
440 .getString("error.implementation_error_for_new_cigar"));
442 _setSeq(seq, false, 0, 0);
443 // there is still work to do
444 addSequenceOps(this, seq, 0, seq.getLength() - 1, false);
448 * Create Cigar from a range of gaps and residues on a sequence object
453 * int - first column in range
455 * int - last column in range
457 public SeqCigar(SequenceI seq, int start, int end)
462 throw new Error(MessageManager
463 .getString("error.implementation_error_for_new_cigar"));
465 _setSeq(seq, false, start, end + 1);
466 // there is still work to do
467 addSequenceOps(this, seq, start, end, false);
471 * Create a cigar object from a cigar string like '[<I|D|M><range>]+' Will
472 * fail if the given seq already contains gaps (JBPNote: future implementation
476 * SequenceI object resolvable to a dataset sequence
481 public static SeqCigar parseCigar(SequenceI seq, String cigarString)
484 Object[] opsandrange = parseCigarString(cigarString);
485 return new SeqCigar(seq, (char[]) opsandrange[0],
486 (int[]) opsandrange[1]);
490 * create an alignment from the given array of cigar sequences and gap
491 * character, and marking the given segments as visible in the given
495 * @param gapCharacter
497 * - hiddenColumns where hidden regions are marked
499 * - visible regions of alignment
500 * @return SequenceI[]
502 public static SequenceI[] createAlignmentSequences(SeqCigar[] alseqs,
503 char gapCharacter, HiddenColumns hidden, int[] segments)
505 SequenceI[] seqs = new SequenceI[alseqs.length];
506 StringBuffer[] g_seqs = new StringBuffer[alseqs.length];
507 String[] alseqs_string = new String[alseqs.length];
508 Object[] gs_regions = new Object[alseqs.length];
509 for (int i = 0; i < alseqs.length; i++)
511 alseqs_string[i] = alseqs[i].getRefSeq()
512 .getSequenceAsString(alseqs[i].start, alseqs[i].end);
513 gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i],
514 gapCharacter); // gapped sequence, {start, start col, end.
515 // endcol}, hidden regions {{start, end, col}})
516 if (gs_regions[i] == null)
518 throw new Error(MessageManager.formatMessage(
519 "error.implementation_error_cigar_seq_no_operations",
521 { Integer.valueOf(i).toString() }));
523 g_seqs[i] = new StringBuffer((String) ((Object[]) gs_regions[i])[0]); // the
528 // Now account for insertions. (well - deletions)
529 // this is complicated because we must keep track of shifted positions in
531 ShiftList shifts = new ShiftList();
532 for (int i = 0; i < alseqs.length; i++)
534 Object[] gs_region = ((Object[]) ((Object[]) gs_regions[i])[2]);
535 if (gs_region != null)
538 for (int hr = 0; hr < gs_region.length; hr++)
540 int[] region = (int[]) gs_region[hr];
541 char[] insert = new char[region[1] - region[0] + 1];
542 for (int s = 0; s < insert.length; s++)
544 insert[s] = gapCharacter;
546 int inspos = shifts.shift(region[2]); // resolve insertion position in
547 // current alignment frame of
549 for (int s = 0; s < alseqs.length; s++)
553 if (g_seqs[s].length() <= inspos)
555 // prefix insertion with more gaps.
556 for (int l = inspos - g_seqs[s].length(); l > 0; l--)
558 g_seqs[s].append(gapCharacter); // to debug - use a diffferent
559 // gap character here
562 g_seqs[s].insert(inspos, insert);
566 g_seqs[s].insert(inspos,
567 alseqs_string[i].substring(region[0], region[1] + 1));
570 shifts.addShift(region[2], insert.length); // update shift in
571 // alignment frame of
573 if (segments == null)
575 // add a hidden column for this deletion
576 hidden.hideColumns(inspos, inspos + insert.length - 1);
581 for (int i = 0; i < alseqs.length; i++)
583 int[] bounds = ((int[]) ((Object[]) gs_regions[i])[1]);
584 SequenceI ref = alseqs[i].getRefSeq();
585 seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(),
586 ref.getStart() + alseqs[i].start + bounds[0],
587 ref.getStart() + alseqs[i].start
588 + (bounds[2] == 0 ? -1 : bounds[2]));
589 seqs[i].setDatasetSequence(ref);
590 seqs[i].setDescription(ref.getDescription());
592 if (segments != null)
594 for (int i = 0; i < segments.length; i += 3)
596 // int start=shifts.shift(segments[i]-1)+1;
597 // int end=shifts.shift(segments[i]+segments[i+1]-1)-1;
598 hidden.hideColumns(segments[i + 1],
599 segments[i + 1] + segments[i + 2] - 1);
606 * references to entities that this sequence cigar is associated with.
608 private Hashtable selGroups = null;
610 public void setGroupMembership(Object group)
612 if (selGroups == null)
614 selGroups = new Hashtable();
616 selGroups.put(group, new int[0]);
620 * Test for and if present remove association to group.
623 * @return true if group was associated and it was removed
625 public boolean removeGroupMembership(Object group)
627 if (selGroups != null && selGroups.containsKey(group))
629 selGroups.remove(group);
636 * forget all associations for this sequence.
638 public void clearMemberships()
640 if (selGroups != null)
649 * @return null or array of all associated entities
651 public Object[] getAllMemberships()
653 if (selGroups == null)
657 Object[] mmbs = new Object[selGroups.size()];
658 Enumeration en = selGroups.keys();
659 for (int i = 0; en.hasMoreElements(); i++)
661 mmbs[i] = en.nextElement();
667 * Test for group membership
670 * - a selection group or some other object that may be associated
672 * @return true if sgr is associated with this seqCigar
674 public boolean isMemberOf(Object sgr)
676 return (selGroups != null) && selGroups.get(sgr) != null;