From: kiramt Date: Thu, 21 Sep 2017 13:36:07 +0000 (+0100) Subject: JAL-2674 Rewrote propagateInsertions X-Git-Tag: Release_2_10_4~55^2~1^2~116 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=8e640547def8e151db1fafafe93a2a0bb8078369;p=jalview.git JAL-2674 Rewrote propagateInsertions --- diff --git a/src/jalview/datamodel/HiddenColumns.java b/src/jalview/datamodel/HiddenColumns.java index 305a6f1..fe2907d 100644 --- a/src/jalview/datamodel/HiddenColumns.java +++ b/src/jalview/datamodel/HiddenColumns.java @@ -21,7 +21,6 @@ package jalview.datamodel; import jalview.util.Comparison; -import jalview.util.ShiftList; import java.util.ArrayList; import java.util.BitSet; @@ -1230,130 +1229,6 @@ public class HiddenColumns } /** - * removes intersection of position,length ranges in deletions from the - * start,end regions marked in intervals. - * - * @param shifts - * @param intervals - * @return - */ - private boolean pruneIntervalList(final List shifts, - ArrayList intervals) - { - boolean pruned = false; - int i = 0; - int j = intervals.size() - 1; - int s = 0; - int t = shifts.size() - 1; - int[] hr = intervals.get(i); - int[] sr = shifts.get(s); - while (i <= j && s <= t) - { - boolean trailinghn = hr[1] >= sr[0]; - if (!trailinghn) - { - if (i < j) - { - hr = intervals.get(++i); - } - else - { - i++; - } - continue; - } - int endshift = sr[0] + sr[1]; // deletion ranges - -ve means an insert - if (endshift < hr[0] || endshift < sr[0]) - { // leadinghc disjoint or not a deletion - if (s < t) - { - sr = shifts.get(++s); - } - else - { - s++; - } - continue; - } - boolean leadinghn = hr[0] >= sr[0]; - boolean leadinghc = hr[0] < endshift; - boolean trailinghc = hr[1] < endshift; - if (leadinghn) - { - if (trailinghc) - { // deleted hidden region. - intervals.remove(i); - pruned = true; - j--; - if (i <= j) - { - hr = intervals.get(i); - } - continue; - } - if (leadinghc) - { - hr[0] = endshift; // clip c terminal region - leadinghn = !leadinghn; - pruned = true; - } - } - if (!leadinghn) - { - if (trailinghc) - { - if (trailinghn) - { - hr[1] = sr[0] - 1; - pruned = true; - } - } - else - { - // sr contained in hr - if (s < t) - { - sr = shifts.get(++s); - } - else - { - s++; - } - continue; - } - } - } - return pruned; // true if any interval was removed or modified by - // operations. - } - - /** - * remove any hiddenColumns or selected columns and shift remaining based on a - * series of position, range deletions. - * - * @param deletions - */ - public void pruneDeletions(List shifts) - { - try - { - LOCK.writeLock().lock(); - // delete any intervals intersecting. - if (hiddenColumns != null) - { - pruneIntervalList(shifts, hiddenColumns); - if (hiddenColumns != null && hiddenColumns.size() == 0) - { - hiddenColumns = null; - } - } - } finally - { - LOCK.writeLock().unlock(); - } - } - - /** * Add gaps into the sequences aligned to profileseq under the given * AlignmentView * @@ -1393,153 +1268,113 @@ public class HiddenColumns SequenceI origseq) { char gc = al.getGapCharacter(); - // recover mapping between sequence's non-gap positions and positions - // mapping to view. - pruneDeletions(ShiftList.parseMap(origseq.gapMap())); - int[] viscontigs = getVisibleContigs(0, profileseq.getLength()); - int spos = 0; - int offset = 0; - // add profile to visible contigs - for (int v = 0; v < viscontigs.length; v += 2) - { - if (viscontigs[v] > spos) + // take the set of hidden columns, and the set of gaps in origseq, + // and remove all the hidden gaps from hiddenColumns + + // first get the gaps as a Bitset + BitSet gaps = origseq.gapBitset(); + + // now calculate hidden ^ not(gap) + BitSet hidden = new BitSet(); + markHiddenRegions(hidden); + hidden.andNot(gaps); + hiddenColumns = null; + this.hideMarkedBits(hidden); + + // for each sequence in the alignment, except the profile sequence, + // insert gaps corresponding to each hidden region + // but where each hidden column region is shifted backwards by the number of + // preceding visible gaps + // update hidden columns at the same time + ArrayList regions = getHiddenColumnsCopy(); + ArrayList newhidden = new ArrayList<>(); + + int numGapsBefore = 0; + int gapPosition = 0; + for (int[] region : regions) + { + // get region coordinates accounting for gaps + // we can rely on gaps not being *in* hidden regions because we already + // removed those + while (gapPosition < region[0]) { - StringBuffer sb = new StringBuffer(); - for (int s = 0, ns = viscontigs[v] - spos; s < ns; s++) + gapPosition++; + if (gaps.get(gapPosition)) { - sb.append(gc); + numGapsBefore++; } - for (int s = 0, ns = al.getHeight(); s < ns; s++) - { - SequenceI sqobj = al.getSequenceAt(s); - if (sqobj != profileseq) - { - String sq = al.getSequenceAt(s).getSequenceAsString(); - if (sq.length() <= spos + offset) - { - // pad sequence - int diff = spos + offset - sq.length() - 1; - if (diff > 0) - { - // pad gaps - sq = sq + sb; - while ((diff = spos + offset - sq.length() - 1) > 0) - { - // sq = sq - // + ((diff >= sb.length()) ? sb.toString() : sb - // .substring(0, diff)); - if (diff >= sb.length()) - { - sq += sb.toString(); - } - else - { - char[] buf = new char[diff]; - sb.getChars(0, diff, buf, 0); - sq += buf.toString(); - } - } - } - sq += sb.toString(); - } - else - { - al.getSequenceAt(s).setSequence(sq.substring(0, spos + offset) - + sb.toString() + sq.substring(spos + offset)); - } - } - } - // offset+=sb.length(); } - spos = viscontigs[v + 1] + 1; - } - if ((offset + spos) < profileseq.getLength()) - { - // pad the final region with gaps. + + int left = region[0] - numGapsBefore; + int right = region[1] - numGapsBefore; + newhidden.add(new int[] { left, right }); + + // make a string with number of gaps = length of hidden region StringBuffer sb = new StringBuffer(); - for (int s = 0, ns = profileseq.getLength() - spos - - offset; s < ns; s++) + for (int s = 0; s < right - left + 1; s++) { sb.append(gc); } - for (int s = 0, ns = al.getHeight(); s < ns; s++) - { - SequenceI sqobj = al.getSequenceAt(s); - if (sqobj == profileseq) - { - continue; - } - String sq = sqobj.getSequenceAsString(); - // pad sequence - int diff = origseq.getLength() - sq.length(); - while (diff > 0) - { - // sq = sq - // + ((diff >= sb.length()) ? sb.toString() : sb - // .substring(0, diff)); - if (diff >= sb.length()) - { - sq += sb.toString(); - } - else - { - char[] buf = new char[diff]; - sb.getChars(0, diff, buf, 0); - sq += buf.toString(); - } - diff = origseq.getLength() - sq.length(); - } - } - } - } - - /** - * remove any hiddenColumns or selected columns and shift remaining based on a - * series of position, range deletions. - * - * @param deletions - */ - private void pruneDeletions(ShiftList deletions) - { - if (deletions != null) - { - final List shifts = deletions.getShifts(); - if (shifts != null && shifts.size() > 0) - { - pruneDeletions(shifts); + padGaps(sb, left, profileseq, al); - // and shift the rest. - this.compensateForEdits(deletions); - } } + hiddenColumns = newhidden; } /** - * Adjust hidden column boundaries based on a series of column additions or - * deletions in visible regions. + * Pad gaps in all sequences in alignment except profileseq * - * @param shiftrecord - * @return + * @param sb + * gap string to insert + * @param left + * position to insert at + * @param profileseq + * sequence not to pad + * @param al + * alignment to pad sequences in */ - private ShiftList compensateForEdits(ShiftList shiftrecord) + private void padGaps(StringBuffer sb, int pos, SequenceI profileseq, + AlignmentI al) { - if (shiftrecord != null) + // loop over the sequences and pad with gaps where required + for (int s = 0, ns = al.getHeight(); s < ns; s++) { - final List shifts = shiftrecord.getShifts(); - if (shifts != null && shifts.size() > 0) + SequenceI sqobj = al.getSequenceAt(s); + if (sqobj != profileseq) { - int shifted = 0; - for (int i = 0, j = shifts.size(); i < j; i++) + String sq = al.getSequenceAt(s).getSequenceAsString(); + if (sq.length() <= pos) + { + // pad sequence + int diff = pos - sq.length() - 1; + if (diff > 0) + { + // pad gaps + sq = sq + sb; + while ((diff = pos - sq.length() - 1) > 0) + { + if (diff >= sb.length()) + { + sq += sb.toString(); + } + else + { + char[] buf = new char[diff]; + sb.getChars(0, diff, buf, 0); + sq += buf.toString(); + } + } + } + sq += sb.toString(); + } + else { - int[] sh = shifts.get(i); - compensateForDelEdits(shifted + sh[0], sh[1]); - shifted -= sh[1]; + al.getSequenceAt(s).setSequence( + sq.substring(0, pos) + sb.toString() + sq.substring(pos)); } } - return shiftrecord.getInverse(); } - return null; } /** diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 2f1da7f..ee91a4a 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -396,7 +396,7 @@ public class Sequence extends ASequence implements SequenceI { if (pdbIds == null) { - pdbIds = new Vector(); + pdbIds = new Vector<>(); pdbIds.add(entry); return true; } @@ -1059,6 +1059,27 @@ public class Sequence extends ASequence implements SequenceI return map; } + /** + * Build a bitset corresponding to sequence gaps + * + * @return a BitSet where set values correspond to gaps in the sequence + */ + @Override + public BitSet gapBitset() + { + BitSet gaps = new BitSet(sequence.length); + int j = 0; + while (j < sequence.length) + { + if (jalview.util.Comparison.isGap(sequence[j])) + { + gaps.set(j); + } + j++; + } + return gaps; + } + @Override public int[] findPositionMap() { @@ -1082,7 +1103,7 @@ public class Sequence extends ASequence implements SequenceI @Override public List getInsertions() { - ArrayList map = new ArrayList(); + ArrayList map = new ArrayList<>(); int lastj = -1, j = 0; int pos = start; int seqlen = sequence.length; @@ -1380,7 +1401,7 @@ public class Sequence extends ASequence implements SequenceI { if (this.annotation == null) { - this.annotation = new Vector(); + this.annotation = new Vector<>(); } if (!this.annotation.contains(annotation)) { @@ -1547,7 +1568,7 @@ public class Sequence extends ASequence implements SequenceI return null; } - Vector subset = new Vector(); + Vector subset = new Vector<>(); Enumeration e = annotation.elements(); while (e.hasMoreElements()) { @@ -1705,7 +1726,7 @@ public class Sequence extends ASequence implements SequenceI public List getAlignmentAnnotations(String calcId, String label) { - List result = new ArrayList(); + List result = new ArrayList<>(); if (this.annotation != null) { for (AlignmentAnnotation ann : annotation) @@ -1761,7 +1782,7 @@ public class Sequence extends ASequence implements SequenceI } synchronized (dbrefs) { - List primaries = new ArrayList(); + List primaries = new ArrayList<>(); DBRefEntry[] tmp = new DBRefEntry[1]; for (DBRefEntry ref : dbrefs) { diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 6e6d1aa..c1687fe 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -222,6 +222,13 @@ public interface SequenceI extends ASequenceI public int[] gapMap(); /** + * Build a bitset corresponding to sequence gaps + * + * @return a BitSet where set values correspond to gaps in the sequence + */ + public BitSet gapBitset(); + + /** * Returns an int array where indices correspond to each position in sequence * char array and the element value gives the result of findPosition for that * index in the sequence.