JAL-3878 Add action and task for annotation services
[jalview.git] / src / jalview / ws2 / actions / annotation / AnnotationJob.java
diff --git a/src/jalview/ws2/actions/annotation/AnnotationJob.java b/src/jalview/ws2/actions/annotation/AnnotationJob.java
new file mode 100644 (file)
index 0000000..23e462b
--- /dev/null
@@ -0,0 +1,144 @@
+package jalview.ws2.actions.annotation;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import jalview.analysis.AlignSeq;
+import jalview.analysis.SeqsetUtils;
+import jalview.api.FeatureColourI;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AnnotatedCollectionI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherSetI;
+import jalview.schemes.ResidueProperties;
+import jalview.util.Comparison;
+import jalview.ws2.actions.BaseJob;
+
+public class AnnotationJob extends BaseJob
+{
+  final boolean[] gapMap;
+
+  final Map<String, SequenceI> seqNames;
+
+  final int start, end;
+  
+  final int minSize;
+
+  List<AlignmentAnnotation> returnedAnnotations = Collections.emptyList();
+  
+  Map<String, FeatureColourI> featureColours = Collections.emptyMap();
+  
+  Map<String, FeatureMatcherSetI> featureFilters = Collections.emptyMap();
+  
+
+  public AnnotationJob(List<SequenceI> inputSeqs, boolean[] gapMap,
+      Map<String, SequenceI> seqNames, int start, int end, int minSize)
+  {
+    super(inputSeqs);
+    this.gapMap = gapMap;
+    this.seqNames = seqNames;
+    this.start = start;
+    this.end = end;
+    this.minSize = minSize;
+  }
+
+  @Override
+  public boolean isInputValid()
+  {
+    int nvalid = 0;
+    for (SequenceI sq : getInputSequences())
+      if (sq.getStart() <= sq.getEnd())
+        nvalid++;
+    return nvalid >= minSize;
+  }
+
+  public static AnnotationJob create(AnnotatedCollectionI inputSeqs, 
+      boolean bySequence, boolean submitGaps, boolean requireAligned, 
+      boolean filterNonStandardResidues, int minSize)
+  {
+    List<SequenceI> seqs = new ArrayList<>();
+    int minlen = 10;
+    int ln = -1;
+    Map<String, SequenceI> seqNames = bySequence ? new HashMap<>() : null;
+    BitSet gapMap = new BitSet();
+    int gapMapSize = 0;
+    int start = inputSeqs.getStartRes();
+    int end = inputSeqs.getEndRes();
+    // TODO: URGENT! unify with JPred / MSA code to handle hidden regions
+    // correctly
+    // TODO: push attributes into WsJob instance (so they can be safely
+    // persisted/restored
+    for (SequenceI sq : inputSeqs.getSequences())
+    {
+      int sqlen;
+      if (bySequence)
+        sqlen = sq.findPosition(end + 1) - sq.findPosition(start + 1);
+      else
+        sqlen = sq.getEnd() - sq.getStart();
+      if (sqlen >= minlen)
+      {
+        String newName = SeqsetUtils.unique_name(seqs.size() + 1);
+        if (seqNames != null)
+          seqNames.put(newName, sq);
+        Sequence seq;
+        if (submitGaps)
+        {
+          seq = new Sequence(newName, sq.getSequenceAsString());
+          gapMapSize = Math.max(gapMapSize, seq.getLength());
+          for (int pos : sq.gapMap())
+          {
+            char sqchr = sq.getCharAt(pos);
+            boolean include = !filterNonStandardResidues;
+            include |= sq.isProtein() ? ResidueProperties.aaIndex[sqchr] < 20
+                : ResidueProperties.nucleotideIndex[sqchr] < 5;
+            if (include)
+              gapMap.set(pos);
+          }
+        }
+        else
+        {
+          // TODO: add ability to exclude hidden regions
+          seq = new Sequence(newName, AlignSeq.extractGaps(Comparison.GapChars,
+              sq.getSequenceAsString(start, end + 1)));
+          // for annotation need to also record map to sequence start/end
+          // position in range
+          // then transfer back to original sequence on return.
+        }
+        seqs.add(seq);
+        ln = Math.max(ln, seq.getLength());
+      }
+    }
+
+    if (requireAligned && submitGaps)
+    {
+      int realWidth = gapMap.cardinality();
+      for (int i = 0; i < seqs.size(); i++)
+      {
+        SequenceI sq = seqs.get(i);
+        char[] padded = new char[realWidth];
+        char[] original = sq.getSequence();
+        for (int op = 0, pp = 0; pp < realWidth; op++)
+        {
+          if (gapMap.get(op))
+          {
+            if (original.length > op)
+              padded[pp++] = original[op];
+            else
+              padded[pp++] = '-';
+          }
+        }
+        seqs.set(i, new Sequence(sq.getName(), padded));
+      }
+    }
+    boolean[] gapMapArray = new boolean[gapMapSize];
+    for (int i = 0; i < gapMapSize; i++)
+      gapMapArray[i] = gapMap.get(i);
+    return new AnnotationJob(seqs, gapMapArray, seqNames, start, end, minSize);
+  }
+}