package jalview.ws2.actions.annotation; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; import java.util.List; import java.util.Map; import jalview.analysis.AlignSeq; import jalview.analysis.SeqsetUtils; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceCollectionI; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; import jalview.ws2.actions.BaseJob; public class AnnotationJob extends BaseJob { final boolean[] gapMap; final Map seqNames; final int regionStart, regionEnd; final int minSize; public AnnotationJob(List inputSeqs, boolean[] gapMap, Map seqNames, int start, int end, int minSize) { super(inputSeqs); this.gapMap = gapMap; this.seqNames = seqNames; this.regionStart = start; this.regionEnd = end; this.minSize = minSize; } @Override public boolean isInputValid() { int nvalid = 0; for (SequenceI sq : getInputSequences()) if (sq.getStart() <= sq.getEnd()) nvalid++; return nvalid >= minSize; } public static AnnotationJob create(SequenceCollectionI inputSeqs, boolean bySequence, boolean submitGaps, boolean requireAligned, boolean filterNonStandardResidues, int minSize) { List sequences = new ArrayList<>(); int minlen = 10; int width = 0; Map namesMap = bySequence ? new HashMap<>() : null; BitSet residueMap = new BitSet(); int start = inputSeqs.getStartRes(); int end = inputSeqs.getEndRes(); // TODO: URGENT! unify with JPred / MSA code to handle hidden regions // correctly // TODO: push attributes into WsJob instance (so they can be safely // persisted/restored for (SequenceI sq : inputSeqs.getSequences()) { int sqLen = (bySequence) ? sq.findPosition(end + 1) - sq.findPosition(start + 1) : sq.getEnd() - sq.getStart(); if (sqLen < minlen) continue; width = Math.max(width, sq.getLength()); String newName = SeqsetUtils.unique_name(sequences.size() + 1); if (namesMap != null) namesMap.put(newName, sq); char[] seqChars = sq.getSequence(start, end + 1); if (filterNonStandardResidues) replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein()); Sequence seq; if (submitGaps) { seq = new Sequence(newName, seqChars); updateResidueMap(residueMap, seq); } else { // TODO: add ability to exclude hidden regions seq = new Sequence(newName, AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars))); // for annotation need to also record map to sequence start/end // position in range // then transfer back to original sequence on return. } sequences.add(seq); } boolean[] gapMapArray = null; if (submitGaps) { adjustColumns(sequences, residueMap, requireAligned); gapMapArray = new boolean[width]; for (int i = 0; i < width; i++) gapMapArray[i] = residueMap.get(i); } return new AnnotationJob(sequences, gapMapArray, namesMap, start, end, minSize); } static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein) { for (int i = 0; i < seq.length; i++) { char chr = seq[i]; if (isProtein ? ResidueProperties.aaIndex[chr] >= 20 : ResidueProperties.nucleotideIndex[chr] >= 5) { seq[i] = replacement; } } } /** * Add residue positions of the given sequence to the residues map. Perform an * "or" operation between the given residue map and the inverse of the gap map * of the given sequence. * * @param residueMap * mapping to be updated in-place * @param seq * the sequence whose residue positions are added to the map */ static void updateResidueMap(BitSet residueMap, SequenceI seq) { var gaps = seq.gapBitset(); gaps.flip(0, seq.getLength()); residueMap.or(gaps); } /** * Remove columns not included in the mask from the sequences in-place. If * {@code padToLength} is set, the shorter sequences are padded with gaps at * the end. * * @param sequences * list of sequences to be modified * @param mask * mask of columns that will remain * @param padToLength * if gaps should be added to the end of shorter sequences */ static void adjustColumns(List sequences, BitSet mask, boolean padToLength) { int width = mask.cardinality(); for (SequenceI seq : sequences) { char[] chars = SeqsetUtils.filterSequence(seq.getSequence(), mask); if (padToLength && chars.length < width) { int limit = chars.length; chars = Arrays.copyOf(chars, width); Arrays.fill(chars, limit, chars.length, Comparison.GAP_DASH); } seq.setEnd(seq.getStart()); seq.setSequence(chars); } } }