From: Jim Procter Date: Thu, 16 Nov 2017 16:45:35 +0000 (+0000) Subject: Merge branch 'bug/JAL-2829deleteCharsWithGaps' into develop X-Git-Tag: Release_2_10_3b1~30 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=2b4cc5fb6217217761b4a8bd2529e51b4775a57d;hp=-c;p=jalview.git Merge branch 'bug/JAL-2829deleteCharsWithGaps' into develop --- 2b4cc5fb6217217761b4a8bd2529e51b4775a57d diff --combined src/jalview/datamodel/Sequence.java index d254a17,2feb9f6..15d1378 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@@ -77,6 -77,11 +77,6 @@@ public class Sequence extends ASequenc */ Vector annotation; - /** - * The index of the sequence in a MSA - */ - int index = -1; - private SequenceFeaturesI sequenceFeatureStore; /* @@@ -1160,7 -1165,7 +1160,7 @@@ } @Override - public void deleteChars(int i, int j) + public void deleteChars(final int i, final int j) { int newstart = start, newend = end; if (i >= sequence.length || i < 0) @@@ -1172,62 -1177,75 +1172,75 @@@ boolean createNewDs = false; // TODO: take a (second look) at the dataset creation validation method for // the very large sequence case - int eindex = -1, sindex = -1; - boolean ecalc = false, scalc = false; + int startIndex = findIndex(start) - 1; + int endIndex = findIndex(end) - 1; + int startDeleteColumn = -1; // for dataset sequence deletions + int deleteCount = 0; + for (int s = i; s < j; s++) { - if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23) + if (Comparison.isGap(sequence[s])) + { + continue; + } + deleteCount++; + if (startDeleteColumn == -1) + { + startDeleteColumn = findPosition(s) - start; + } + if (createNewDs) + { + newend--; + } + else { - if (createNewDs) + if (startIndex == s) { - newend--; + /* + * deleting characters from start of sequence; new start is the + * sequence position of the next column (position to the right + * if the column position is gapped) + */ + newstart = findPosition(j); + break; } else { - if (!scalc) - { - sindex = findIndex(start) - 1; - scalc = true; - } - if (sindex == s) + if (endIndex < j) { - // delete characters including start of sequence - newstart = findPosition(j); - break; // don't need to search for any more residue characters. + /* + * deleting characters at end of sequence; new end is the sequence + * position of the column before the deletion; subtract 1 if this is + * gapped since findPosition returns the next sequence position + */ + newend = findPosition(i - 1); + if (Comparison.isGap(sequence[i - 1])) + { + newend--; + } + break; } else { - // delete characters after start. - if (!ecalc) - { - eindex = findIndex(end) - 1; - ecalc = true; - } - if (eindex < j) - { - // delete characters at end of sequence - newend = findPosition(i - 1); - break; // don't need to search for any more residue characters. - } - else - { - createNewDs = true; - newend--; // decrease end position by one for the deleted residue - // and search further - } + createNewDs = true; + newend--; } } } } - // deletion occured in the middle of the sequence + if (createNewDs && this.datasetSequence != null) { - // construct a new sequence + /* + * if deletion occured in the middle of the sequence, + * construct a new dataset sequence and delete the residues + * that were deleted from the aligned sequence + */ Sequence ds = new Sequence(datasetSequence); + ds.deleteChars(startDeleteColumn, startDeleteColumn + deleteCount); + datasetSequence = ds; // TODO: remove any non-inheritable properties ? // TODO: create a sequence mapping (since there is a relation here ?) - ds.deleteChars(i, j); - datasetSequence = ds; } start = newstart; end = newend; @@@ -1677,6 -1695,30 +1690,6 @@@ } } - /** - * @return The index (zero-based) on this sequence in the MSA. It returns - * {@code -1} if this information is not available. - */ - @Override - public int getIndex() - { - return index; - } - - /** - * Defines the position of this sequence in the MSA. Use the value {@code -1} - * if this information is undefined. - * - * @param The - * position for this sequence. This value is zero-based (zero for - * this first sequence) - */ - @Override - public void setIndex(int value) - { - index = value; - } - @Override public void setRNA(RNA r) { diff --combined src/jalview/datamodel/SequenceI.java index 9ad0a61,857f206..2f3e925 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@@ -192,13 -192,14 +192,14 @@@ public interface SequenceI extends ASeq public int findIndex(int pos); /** - * Returns the sequence position for an alignment position. + * Returns the sequence position for an alignment (column) position. If at a + * gap, returns the position of the next residue to the right. If beyond the + * end of the sequence, returns 1 more than the last residue position. * * @param i * column index in alignment (from 0..