1 package jalview.ws2.actions.annotation;
3 import java.util.ArrayList;
4 import java.util.BitSet;
5 import java.util.HashMap;
9 import jalview.analysis.AlignSeq;
10 import jalview.analysis.SeqsetUtils;
11 import jalview.datamodel.Sequence;
12 import jalview.datamodel.SequenceCollectionI;
13 import jalview.datamodel.SequenceI;
14 import jalview.schemes.ResidueProperties;
15 import jalview.util.Comparison;
16 import jalview.ws2.actions.BaseJob;
18 public class AnnotationJob extends BaseJob
20 final boolean[] gapMap;
22 final Map<String, SequenceI> seqNames;
24 final int regionStart, regionEnd;
28 public AnnotationJob(List<SequenceI> inputSeqs, boolean[] gapMap,
29 Map<String, SequenceI> seqNames, int start, int end, int minSize)
33 this.seqNames = seqNames;
34 this.regionStart = start;
36 this.minSize = minSize;
40 public boolean isInputValid()
43 for (SequenceI sq : getInputSequences())
44 if (sq.getStart() <= sq.getEnd())
46 return nvalid >= minSize;
49 public static AnnotationJob create(SequenceCollectionI inputSeqs,
50 boolean bySequence, boolean submitGaps, boolean requireAligned,
51 boolean filterNonStandardResidues, int minSize)
53 List<SequenceI> seqences = new ArrayList<>();
56 Map<String, SequenceI> namesMap = bySequence ? new HashMap<>() : null;
57 BitSet residueMap = new BitSet();
58 int start = inputSeqs.getStartRes();
59 int end = inputSeqs.getEndRes();
60 // TODO: URGENT! unify with JPred / MSA code to handle hidden regions
62 // TODO: push attributes into WsJob instance (so they can be safely
64 for (SequenceI sq : inputSeqs.getSequences())
66 int sqLen = (bySequence)
67 ? sq.findPosition(end + 1) - sq.findPosition(start + 1)
68 : sq.getEnd() - sq.getStart();
71 String newName = SeqsetUtils.unique_name(seqences.size() + 1);
73 namesMap.put(newName, sq);
74 char[] seqChars = sq.getSequence(start, end + 1);
75 if (filterNonStandardResidues)
76 replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein());
80 seq = new Sequence(newName, seqChars);
81 updateResidueMap(residueMap, seq, filterNonStandardResidues);
85 // TODO: add ability to exclude hidden regions
86 seq = new Sequence(newName,
87 AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars)));
88 // for annotation need to also record map to sequence start/end
90 // then transfer back to original sequence on return.
93 width = Math.max(width, seq.getLength());
96 if (requireAligned && submitGaps)
98 for (int i = 0; i < seqences.size(); i++)
100 SequenceI sq = seqences.get(i);
101 char[] padded = fitSequenceToResidueMap(sq.getSequence(),
103 seqences.set(i, new Sequence(sq.getName(), padded));
106 boolean[] gapMapArray = null;
109 gapMapArray = new boolean[width];
110 for (int i = 0; i < width; i++)
111 gapMapArray[i] = residueMap.get(i);
113 return new AnnotationJob(seqences, gapMapArray, namesMap, start, end,
117 private static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein)
119 for (int i = 0; i < seq.length; i++)
123 ? ResidueProperties.aaIndex[chr] >= 20
124 : ResidueProperties.nucleotideIndex[chr] >= 5)
126 seq[i] = replacement;
131 private static void updateResidueMap(BitSet residueMap, SequenceI seq,
132 boolean filterNonStandardResidues)
134 for (int pos : seq.gapMap())
136 char sqchr = seq.getCharAt(pos);
137 boolean include = !filterNonStandardResidues;
138 include |= seq.isProtein() ? ResidueProperties.aaIndex[sqchr] < 20
139 : ResidueProperties.nucleotideIndex[sqchr] < 5;
146 * Fits the sequence to the residue map removing empty columns where residue
147 * map is unset and padding the sequence with gaps at the end if needed.
149 private static char[] fitSequenceToResidueMap(char[] sequence,
152 int width = residueMap.cardinality();
153 char[] padded = new char[width];
154 for (int op = 0, pp = 0; pp < width; op++)
156 if (residueMap.get(op))
158 if (sequence.length > op)
159 padded[pp++] = sequence[op];