1 package jalview.ws2.actions.annotation;
3 import java.util.ArrayList;
4 import java.util.BitSet;
5 import java.util.HashMap;
9 import jalview.analysis.AlignSeq;
10 import jalview.analysis.SeqsetUtils;
11 import jalview.datamodel.Sequence;
12 import jalview.datamodel.SequenceCollectionI;
13 import jalview.datamodel.SequenceI;
14 import jalview.schemes.ResidueProperties;
15 import jalview.util.Comparison;
16 import jalview.ws2.actions.BaseJob;
18 public class AnnotationJob extends BaseJob
20 final boolean[] gapMap;
22 final Map<String, SequenceI> seqNames;
24 final int regionStart, regionEnd;
28 public AnnotationJob(List<SequenceI> inputSeqs, boolean[] gapMap,
29 Map<String, SequenceI> seqNames, int start, int end, int minSize)
33 this.seqNames = seqNames;
34 this.regionStart = start;
36 this.minSize = minSize;
40 public boolean isInputValid()
43 for (SequenceI sq : getInputSequences())
44 if (sq.getStart() <= sq.getEnd())
46 return nvalid >= minSize;
49 public static AnnotationJob create(SequenceCollectionI inputSeqs,
50 boolean bySequence, boolean submitGaps, boolean requireAligned,
51 boolean filterNonStandardResidues, int minSize)
53 List<SequenceI> seqences = new ArrayList<>();
56 Map<String, SequenceI> namesMap = bySequence ? new HashMap<>() : null;
57 BitSet residueMap = new BitSet();
58 int start = inputSeqs.getStartRes();
59 int end = inputSeqs.getEndRes();
60 // TODO: URGENT! unify with JPred / MSA code to handle hidden regions
62 // TODO: push attributes into WsJob instance (so they can be safely
64 for (SequenceI sq : inputSeqs.getSequences())
66 int sqLen = (bySequence)
67 ? sq.findPosition(end + 1) - sq.findPosition(start + 1)
68 : sq.getEnd() - sq.getStart();
71 String newName = SeqsetUtils.unique_name(seqences.size() + 1);
73 namesMap.put(newName, sq);
77 seq = new Sequence(newName, sq.getSequenceAsString());
78 updateResidueMap(residueMap, seq, filterNonStandardResidues);
82 // TODO: add ability to exclude hidden regions
83 seq = new Sequence(newName,
84 AlignSeq.extractGaps(Comparison.GapChars,
85 sq.getSequenceAsString(start, end + 1)));
86 // for annotation need to also record map to sequence start/end
88 // then transfer back to original sequence on return.
91 width = Math.max(width, seq.getLength());
94 if (requireAligned && submitGaps)
96 for (int i = 0; i < seqences.size(); i++)
98 SequenceI sq = seqences.get(i);
99 char[] padded = fitSequenceToResidueMap(sq.getSequence(),
101 seqences.set(i, new Sequence(sq.getName(), padded));
104 boolean[] gapMapArray = null;
107 gapMapArray = new boolean[width];
108 for (int i = 0; i < width; i++)
109 gapMapArray[i] = residueMap.get(i);
111 return new AnnotationJob(seqences, gapMapArray, namesMap, start, end,
115 private static void updateResidueMap(BitSet residueMap, SequenceI seq,
116 boolean filterNonStandardResidues)
118 for (int pos : seq.gapMap())
120 char sqchr = seq.getCharAt(pos);
121 boolean include = !filterNonStandardResidues;
122 include |= seq.isProtein() ? ResidueProperties.aaIndex[sqchr] < 20
123 : ResidueProperties.nucleotideIndex[sqchr] < 5;
130 * Fits the sequence to the residue map removing empty columns where residue
131 * map is unset and padding the sequence with gaps at the end if needed.
133 private static char[] fitSequenceToResidueMap(char[] sequence,
136 int width = residueMap.cardinality();
137 char[] padded = new char[width];
138 for (int op = 0, pp = 0; pp < width; op++)
140 if (residueMap.get(op))
142 if (sequence.length > op)
143 padded[pp++] = sequence[op];