src/jalview/ws2/actions/annotation/AnnotationJob.java

   1 package jalview.ws2.actions.annotation;
   2
   3 import java.util.ArrayList;
   4 import java.util.BitSet;
   5 import java.util.HashMap;
   6 import java.util.List;
   7 import java.util.Map;
   8
   9 import jalview.analysis.AlignSeq;
  10 import jalview.analysis.SeqsetUtils;
  11 import jalview.datamodel.Sequence;
  12 import jalview.datamodel.SequenceCollectionI;
  13 import jalview.datamodel.SequenceI;
  14 import jalview.schemes.ResidueProperties;
  15 import jalview.util.Comparison;
  16 import jalview.ws2.actions.BaseJob;
  17
  18 public class AnnotationJob extends BaseJob
  19 {
  20   final boolean[] gapMap;
  21
  22   final Map<String, SequenceI> seqNames;
  23
  24   final int regionStart, regionEnd;
  25
  26   final int minSize;
  27
  28   public AnnotationJob(List<SequenceI> inputSeqs, boolean[] gapMap,
  29           Map<String, SequenceI> seqNames, int start, int end, int minSize)
  30   {
  31     super(inputSeqs);
  32     this.gapMap = gapMap;
  33     this.seqNames = seqNames;
  34     this.regionStart = start;
  35     this.regionEnd = end;
  36     this.minSize = minSize;
  37   }
  38
  39   @Override
  40   public boolean isInputValid()
  41   {
  42     int nvalid = 0;
  43     for (SequenceI sq : getInputSequences())
  44       if (sq.getStart() <= sq.getEnd())
  45         nvalid++;
  46     return nvalid >= minSize;
  47   }
  48
  49   public static AnnotationJob create(SequenceCollectionI inputSeqs,
  50           boolean bySequence, boolean submitGaps, boolean requireAligned,
  51           boolean filterNonStandardResidues, int minSize)
  52   {
  53     List<SequenceI> seqences = new ArrayList<>();
  54     int minlen = 10;
  55     int width = 0;
  56     Map<String, SequenceI> namesMap = bySequence ? new HashMap<>() : null;
  57     BitSet residueMap = new BitSet();
  58     int start = inputSeqs.getStartRes();
  59     int end = inputSeqs.getEndRes();
  60     // TODO: URGENT! unify with JPred / MSA code to handle hidden regions
  61     // correctly
  62     // TODO: push attributes into WsJob instance (so they can be safely
  63     // persisted/restored
  64     for (SequenceI sq : inputSeqs.getSequences())
  65     {
  66       int sqLen = (bySequence)
  67               ? sq.findPosition(end + 1) - sq.findPosition(start + 1)
  68               : sq.getEnd() - sq.getStart();
  69       if (sqLen < minlen)
  70         continue;
  71       String newName = SeqsetUtils.unique_name(seqences.size() + 1);
  72       if (namesMap != null)
  73         namesMap.put(newName, sq);
  74       char[] seqChars = sq.getSequence(start, end + 1);
  75       if (filterNonStandardResidues)
  76         replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein());
  77       Sequence seq;
  78       if (submitGaps)
  79       {
  80         seq = new Sequence(newName, seqChars);
  81         updateResidueMap(residueMap, seq, filterNonStandardResidues);
  82       }
  83       else
  84       {
  85         // TODO: add ability to exclude hidden regions
  86         seq = new Sequence(newName,
  87                 AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars)));
  88         // for annotation need to also record map to sequence start/end
  89         // position in range
  90         // then transfer back to original sequence on return.
  91       }
  92       seqences.add(seq);
  93       width = Math.max(width, seq.getLength());
  94     }
  95
  96     if (requireAligned && submitGaps)
  97     {
  98       for (int i = 0; i < seqences.size(); i++)
  99       {
 100         SequenceI sq = seqences.get(i);
 101         char[] padded = fitSequenceToResidueMap(sq.getSequence(),
 102                 residueMap);
 103         seqences.set(i, new Sequence(sq.getName(), padded));
 104       }
 105     }
 106     boolean[] gapMapArray = null;
 107     if (submitGaps)
 108     {
 109       gapMapArray = new boolean[width];
 110       for (int i = 0; i < width; i++)
 111         gapMapArray[i] = residueMap.get(i);
 112     }
 113     return new AnnotationJob(seqences, gapMapArray, namesMap, start, end,
 114             minSize);
 115   }
 116
 117   private static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein)
 118   {
 119     for (int i = 0; i < seq.length; i++)
 120     {
 121       char chr = seq[i];
 122       if (isProtein
 123           ? ResidueProperties.aaIndex[chr] >= 20
 124           : ResidueProperties.nucleotideIndex[chr] >= 5)
 125       {
 126         seq[i] = replacement;
 127       }
 128     }
 129   }
 130
 131   private static void updateResidueMap(BitSet residueMap, SequenceI seq,
 132           boolean filterNonStandardResidues)
 133   {
 134     for (int pos : seq.gapMap())
 135     {
 136       char sqchr = seq.getCharAt(pos);
 137       boolean include = !filterNonStandardResidues;
 138       include |= seq.isProtein() ? ResidueProperties.aaIndex[sqchr] < 20
 139               : ResidueProperties.nucleotideIndex[sqchr] < 5;
 140       if (include)
 141         residueMap.set(pos);
 142     }
 143   }
 144
 145   /**
 146    * Fits the sequence to the residue map removing empty columns where residue
 147    * map is unset and padding the sequence with gaps at the end if needed.
 148    */
 149   private static char[] fitSequenceToResidueMap(char[] sequence,
 150           BitSet residueMap)
 151   {
 152     int width = residueMap.cardinality();
 153     char[] padded = new char[width];
 154     for (int op = 0, pp = 0; pp < width; op++)
 155     {
 156       if (residueMap.get(op))
 157       {
 158         if (sequence.length > op)
 159           padded[pp++] = sequence[op];
 160         else
 161           padded[pp++] = '-';
 162       }
 163     }
 164     return padded;
 165   }
 166 }