X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSeqCigar.java;h=c8965558c091ecd51888de14d18e3a124d700e61;hb=a96d51f58325f8429c5792ae554243de5628d62a;hp=08c66d28540597a30f57a213b9e76d4db948d1c8;hpb=b31d6ded1992d8d8ea16f4321672b432b425693c;p=jalview.git diff --git a/src/jalview/datamodel/SeqCigar.java b/src/jalview/datamodel/SeqCigar.java index 08c66d2..c896555 100644 --- a/src/jalview/datamodel/SeqCigar.java +++ b/src/jalview/datamodel/SeqCigar.java @@ -1,16 +1,33 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.datamodel; import jalview.analysis.*; -import jalview.util.ShiftList; -import java.util.Vector; +import jalview.util.*; public class SeqCigar extends CigarSimple { - /** - * start(inclusive) and end(exclusive) of subsequence on refseq - */ - private int start, end; + /** + * start(inclusive) and end(exclusive) of subsequence on refseq + */ + private int start, end; private SequenceI refseq = null; /** * Reference dataset sequence for the cigar string @@ -20,20 +37,25 @@ public class SeqCigar { return refseq; } + /** * * @return int start index of cigar ops on refSeq */ - public int getStart() { + public int getStart() + { return start; } + /** * * @return int end index (exclusive) of cigar ops on refSeq */ - public int getEnd() { + public int getEnd() + { return end; } + /** * Returns sequence as a string with cigar operations applied to it * @return String @@ -41,7 +63,8 @@ public class SeqCigar public String getSequenceString(char GapChar) { return (length == 0) ? "" : - (String) getSequenceAndDeletions(refseq.getSequence().substring(start, end), GapChar)[0]; + (String) getSequenceAndDeletions(refseq.getSequenceAsString(start, end), + GapChar)[0]; } /** @@ -55,7 +78,8 @@ public class SeqCigar { return null; } - Object[] edit_result = getSequenceAndDeletions(refseq.getSequence().substring(start,end), + Object[] edit_result = getSequenceAndDeletions(refseq.getSequenceAsString( + start, end), GapChar); if (edit_result == null) { @@ -64,10 +88,12 @@ public class SeqCigar } int bounds[] = (int[]) edit_result[1]; seq = new Sequence(refseq.getName(), (String) edit_result[0], - refseq.getStart() + start+bounds[0], - refseq.getStart() + start+((bounds[2]==0) ? -1 : bounds[2])); + refseq.getStart() + start + bounds[0], + refseq.getStart() + start + + ( (bounds[2] == 0) ? -1 : bounds[2])); // seq.checkValidRange(); probably not needed seq.setDatasetSequence(refseq); + seq.setDescription(refseq.getDescription()); return seq; } @@ -86,30 +112,35 @@ public class SeqCigar * @param _e index after last position in (possibly gapped) seq * @return true if gaps are present in seq */ - private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s, int _e) + private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s, + int _e) { boolean hasgaps = false; if (seq == null) { throw new Error("Implementation Error - _setSeq(null,...)"); } - if (_s<0) - throw new Error("Implementation Error: _s="+_s); - String seq_string = seq.getSequence(); - if (_e==0 || _e<_s || _e>seq_string.length()) - _e=seq_string.length(); + if (_s < 0) + { + throw new Error("Implementation Error: _s=" + _s); + } + String seq_string = seq.getSequenceAsString(); + if (_e == 0 || _e < _s || _e > seq_string.length()) + { + _e = seq_string.length(); + } // resolve start and end positions relative to ungapped reference sequence - start = seq.findPosition(_s)-seq.getStart(); - end = seq.findPosition(_e)-seq.getStart(); - int l_ungapped = end-start; + start = seq.findPosition(_s) - seq.getStart(); + end = seq.findPosition(_e) - seq.getStart(); + int l_ungapped = end - start; // Find correct sequence to reference and correct start and end - if necessary SequenceI ds = seq.getDatasetSequence(); if (ds == null) { // make a new dataset sequence String ungapped = AlignSeq.extractGaps(jalview.util.Comparison.GapChars, - new String(seq_string)); - l_ungapped=ungapped.length(); + new String(seq_string)); + l_ungapped = ungapped.length(); // check that we haven't just duplicated an ungapped sequence. if (l_ungapped == seq.getLength()) { @@ -119,7 +150,7 @@ public class SeqCigar { ds = new Sequence(seq.getName(), ungapped, seq.getStart(), - seq.getStart()+ungapped.length()-1); + seq.getStart() + ungapped.length() - 1); // JBPNote: this would be consistent but may not be useful // seq.setDatasetSequence(ds); } @@ -127,29 +158,35 @@ public class SeqCigar // add in offset between seq and the dataset sequence if (ds.getStart() < seq.getStart()) { - int offset=seq.getStart()-ds.getStart(); - if (initialDeletion) { - // absolute cigar string - addDeleted(_s+offset); - start=0; - end+=offset; - } else { - // normal behaviour - just mark start and end subsequence - start+=offset; - end+=offset; + int offset = seq.getStart() - ds.getStart(); + if (initialDeletion) + { + // absolute cigar string + addDeleted(_s + offset); + start = 0; + end += offset; + } + else + { + // normal behaviour - just mark start and end subsequence + start += offset; + end += offset; } } // any gaps to process ? - if (l_ungapped!=(_e-_s)) - hasgaps=true; + if (l_ungapped != (_e - _s)) + { + hasgaps = true; + } this.refseq = ds; // Check offsets - if (end>ds.getLength()) { + if (end > ds.getLength()) + { throw new Error("SeqCigar: Possible implementation error: sequence is longer than dataset sequence"); // end = ds.getLength(); } @@ -204,7 +241,7 @@ public class SeqCigar this.operation = null; this.range = null; this.length = 0; - if (_setSeq(seq, false,0, 0)) + if (_setSeq(seq, false, 0, 0)) { throw new Error("NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence."); } @@ -219,6 +256,7 @@ public class SeqCigar { this.addOperation(M, range); } + /** * Adds * insertion and match operations based on seq to the cigar up to @@ -231,15 +269,17 @@ public class SeqCigar * @param initialDeletions if true then initial deletions will be added from start of seq to startpos */ protected static void addSequenceOps(CigarBase cigar, SequenceI seq, - int startpos, int endpos, boolean initialDeletions) + int startpos, int endpos, + boolean initialDeletions) { char op = '\0'; int range = 0; int p = 0, res = seq.getLength(); if (!initialDeletions) - p=startpos; - + { + p = startpos; + } while (p <= endpos) { @@ -305,7 +345,7 @@ public class SeqCigar } _setSeq(seq, false, 0, 0); // there is still work to do - addSequenceOps(this, seq, 0, seq.getLength()-1, false); + addSequenceOps(this, seq, 0, seq.getLength() - 1, false); } /** @@ -321,7 +361,7 @@ public class SeqCigar { throw new Error("Implementation error for new Cigar(SequenceI)"); } - _setSeq(seq, false, start, end+1); + _setSeq(seq, false, start, end + 1); // there is still work to do addSequenceOps(this, seq, start, end, false); } @@ -352,13 +392,14 @@ public class SeqCigar { SequenceI[] seqs = new SequenceI[alseqs.length]; StringBuffer[] g_seqs = new StringBuffer[alseqs.length]; - String[] alseqs_string=new String[alseqs.length]; + String[] alseqs_string = new String[alseqs.length]; Object[] gs_regions = new Object[alseqs.length]; for (int i = 0; i < alseqs.length; i++) { - alseqs_string[i]=alseqs[i].getRefSeq(). - getSequence().substring(alseqs[i].start,alseqs[i].end); - gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], gapCharacter); // gapped sequence, {start, start col, end. endcol}, hidden regions {{start, end, col}}) + alseqs_string[i] = alseqs[i].getRefSeq(). + getSequenceAsString(alseqs[i].start, alseqs[i].end); + gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], + gapCharacter); // gapped sequence, {start, start col, end. endcol}, hidden regions {{start, end, col}}) if (gs_regions[i] == null) { throw new Error("Implementation error: " + i + @@ -401,13 +442,16 @@ public class SeqCigar else { g_seqs[s].insert(inspos, - alseqs_string[i].substring(region[0], region[1] + 1)); + alseqs_string[i].substring(region[0], + region[1] + 1)); } } shifts.addShift(region[2], insert.length); // update shift in alignment frame of reference - if (segments==null) + if (segments == null) + { // add a hidden column for this deletion - colsel.hideColumns(inspos, inspos+insert.length-1); + colsel.hideColumns(inspos, inspos + insert.length - 1); + } } } } @@ -416,15 +460,20 @@ public class SeqCigar int[] bounds = ( (int[]) ( (Object[]) gs_regions[i])[1]); SequenceI ref = alseqs[i].getRefSeq(); seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(), - ref.getStart() + alseqs[i].start+bounds[0], - ref.getStart() + alseqs[i].start+bounds[2]); + ref.getStart() + alseqs[i].start + bounds[0], + ref.getStart() + alseqs[i].start + + (bounds[2] == 0 ? -1 : bounds[2])); seqs[i].setDatasetSequence(ref); + seqs[i].setDescription(ref.getDescription()); } - if (segments!=null) { - for (int i=0; i