1 package jalview.ws2.actions.annotation;
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.BitSet;
6 import java.util.HashMap;
10 import jalview.analysis.AlignSeq;
11 import jalview.analysis.SeqsetUtils;
12 import jalview.datamodel.Sequence;
13 import jalview.datamodel.SequenceCollectionI;
14 import jalview.datamodel.SequenceI;
15 import jalview.schemes.ResidueProperties;
16 import jalview.util.Comparison;
17 import jalview.ws2.actions.BaseJob;
19 public class AnnotationJob extends BaseJob
21 final boolean[] gapMap;
23 final Map<String, SequenceI> seqNames;
25 final int regionStart, regionEnd;
29 public AnnotationJob(List<SequenceI> inputSeqs, boolean[] gapMap,
30 Map<String, SequenceI> seqNames, int start, int end, int minSize)
34 this.seqNames = seqNames;
35 this.regionStart = start;
37 this.minSize = minSize;
41 public boolean isInputValid()
44 for (SequenceI sq : getInputSequences())
45 if (sq.getStart() <= sq.getEnd())
47 return nvalid >= minSize;
50 public static AnnotationJob create(SequenceCollectionI inputSeqs,
51 boolean bySequence, boolean submitGaps, boolean requireAligned,
52 boolean filterNonStandardResidues, int minSize)
54 List<SequenceI> sequences = new ArrayList<>();
57 Map<String, SequenceI> namesMap = bySequence ? new HashMap<>() : null;
58 BitSet residueMap = new BitSet();
59 int start = inputSeqs.getStartRes();
60 int end = inputSeqs.getEndRes();
61 // TODO: URGENT! unify with JPred / MSA code to handle hidden regions
63 // TODO: push attributes into WsJob instance (so they can be safely
65 for (SequenceI sq : inputSeqs.getSequences())
67 int sqLen = (bySequence)
68 ? sq.findPosition(end + 1) - sq.findPosition(start + 1)
69 : sq.getEnd() - sq.getStart();
72 width = Math.max(width, sq.getLength());
73 String newName = SeqsetUtils.unique_name(sequences.size() + 1);
75 namesMap.put(newName, sq);
76 char[] seqChars = sq.getSequence(start, end + 1);
77 if (filterNonStandardResidues)
78 replaceNonStandardResidues(seqChars, Comparison.GAP_DASH,
83 seq = new Sequence(newName, seqChars);
84 updateResidueMap(residueMap, seq);
87 // TODO: add ability to exclude hidden regions
88 seq = new Sequence(newName,
89 AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars)));
90 // for annotation need to also record map to sequence start/end
92 // then transfer back to original sequence on return.
96 boolean[] gapMapArray = null;
99 adjustColumns(sequences, residueMap, requireAligned);
100 gapMapArray = new boolean[width];
101 for (int i = 0; i < width; i++)
102 gapMapArray[i] = residueMap.get(i);
104 return new AnnotationJob(sequences, gapMapArray, namesMap, start, end,
108 static void replaceNonStandardResidues(char[] seq, char replacement,
111 for (int i = 0; i < seq.length; i++)
114 if (isProtein ? ResidueProperties.aaIndex[chr] >= 20
115 : ResidueProperties.nucleotideIndex[chr] >= 5)
117 seq[i] = replacement;
123 * Add residue positions of the given sequence to the residues map. Perform an
124 * "or" operation between the given residue map and the inverse of the gap map
125 * of the given sequence.
128 * mapping to be updated in-place
130 * the sequence whose residue positions are added to the map
132 static void updateResidueMap(BitSet residueMap, SequenceI seq)
134 var gaps = seq.gapBitset();
135 gaps.flip(0, seq.getLength());
140 * Remove columns not included in the mask from the sequences in-place. If
141 * {@code padToLength} is set, the shorter sequences are padded with gaps at
145 * list of sequences to be modified
147 * mask of columns that will remain
149 * if gaps should be added to the end of shorter sequences
151 static void adjustColumns(List<SequenceI> sequences, BitSet mask,
154 int width = mask.cardinality();
155 for (SequenceI seq : sequences)
157 char[] chars = SeqsetUtils.filterSequence(seq.getSequence(), mask);
158 if (padToLength && chars.length < width)
160 int limit = chars.length;
161 chars = Arrays.copyOf(chars, width);
162 Arrays.fill(chars, limit, chars.length, Comparison.GAP_DASH);
164 seq.setEnd(seq.getStart());
165 seq.setSequence(chars);