forester/archive/RIO/others/hmmer/src/postprob.c

   1 /************************************************************
   2  * Copyright (C) 1998 Ian Holmes (ihh@sanger.ac.uk)
   3  * HMMER - Biological sequence analysis with profile HMMs
   4  * Copyright (C) 1992-1999 Washington University School of Medicine
   5  * All Rights Reserved
   6  *
   7  *     This source code is distributed under the terms of the
   8  *     GNU General Public License. See the files COPYING and LICENSE
   9  *     for details.
  10  ************************************************************/
  11
  12 /* postprob.c
  13  * Author: Ian Holmes (ihh@sanger.ac.uk, Jun 5 1998)
  14  * Derived from core_algorithms.c (SRE, Nov 11 1996)
  15  * Incorporated SRE, Sat Nov  6 09:07:12 1999 [Cold Spring Harbor]
  16  *
  17  * RCS $Id: postprob.c,v 1.1.1.1 2005/03/22 08:34:15 cmzmasek Exp $
  18  *****************************************************************
  19  * IHH's notes:
  20  *
  21  * Functions for working with posterior probabilities,
  22  * including unfussed "backwards" and "optimal accuracy"
  23  * implementations.
  24  *****************************************************************
  25  * SRE's notes:
  26  *
  27  * Simple API example:
  28  *     struct p7trace_s  *tr;
  29  *     struct dpmatrix_s *fwd;
  30  *     struct dpmatrix_s *bck;
  31  *     struct dpmatrix_s *posterior;
  32  *     char *postcode;
  33  *
  34  *     (get a traceback from somewhere: P7Viterbi() or a modelmaker)
  35  *     (get an HMM from somewhere: read file or construct it)
  36  *     P7Forward (dsq, len, hmm, &fwd);
  37  *     P7Backward(dsq, len, hmm, &bck);
  38  *     posterior = bck;              -- can alloc posterior, but also can re-use bck --
  39  *     P7EmitterPosterior(len, hmm, fwd, bck, posterior);
  40  *     postcode = PostalCode(len, posterior, tr);
  41  *
  42  *     MSAAppendGR(msa, "POST", seqidx, postcode);  -- or a similar annotation call --
  43  *
  44  *     free(postcode);
  45  *     FreePlan7Matrix(fwd);
  46  *     FreePlan7Matrix(bck);
  47  *
  48  * P7OptimalAccuracy() - the Durbin/Holmes optimal accuracy
  49  *                       alignment algorithm. Takes a sequence
  50  *                       and an HMM, returns an alignment as
  51  *                       a trace structure.
  52  *
  53  * P7Backward()        - The Backward() algorithm, counterpart
  54  *                       of P7Forward() in core_algorithms.c.
  55  *
  56  * P7EmitterPosterior()- The heart of postprob.c: given a Forward
  57  *                       and a Backward matrix, calculate a new matrix
  58  *                       that contains the posterior probabilities
  59  *                       for each symbol i being emitted by
  60  *                       state k (so, \sum_k p(k | x_i) = 1.0).
  61  *
  62  * P7FillOptimalAccuracy() - The core DP algorithm called by
  63  *                       P7OptimalAccuracy().
  64  *
  65  * P7OptimalAccuracyTrace() - the traceback algorithm called by
  66  *                       P7FillOptimalAccuracy().
  67  *
  68  * PostalCode()        - Create a character string for annotating
  69  *                       an alignment.
  70  *
  71  * No small memory variants of these algorithms are available
  72  * right now.
  73  */
  74
  75 #include "structs.h"
  76 #include "config.h"
  77 #include "funcs.h"
  78 #include "squid.h"
  79
  80
  81 /* Function: P7OptimalAccuracy()
  82  *
  83  * Purpose:  The optimal accuracy dynamic programming algorithm.
  84  *           Identical to Viterbi() except that posterior residue
  85  *           label probabilities are used as scores.
  86  *
  87  * Args:     dsq    - sequence in digitized form
  88  *           L      - length of dsq
  89  *           hmm    - the model
  90  *           ret_tr - RETURN: traceback; pass NULL if it's not wanted
  91  *
  92  * Return:   log ( sum_{residues} P(label|M,D) ), as a bit score
  93  *           (i.e. log of expected accuracy)
  94  */
  95 float
  96 P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr)
  97 {
  98   double sc;
  99   struct dpmatrix_s *forward;
 100   struct dpmatrix_s *backward;
 101
 102   (void) P7Forward(dsq, L, hmm, &forward);
 103   (void) P7Backward(dsq, L, hmm, &backward);
 104
 105   P7EmitterPosterior(L, hmm, forward, backward, backward);           /* Re-use backward matrix for posterior scores */
 106
 107   sc = P7FillOptimalAccuracy(L, hmm->M, backward, forward, ret_tr);  /* Re-use forward matrix for optimal accuracy scores */
 108
 109   FreePlan7Matrix(forward);
 110   FreePlan7Matrix(backward);
 111
 112   return sc;
 113 }
 114
 115
 116
 117 /* Function: P7Backward()
 118  *
 119  * Purpose:  The Backward dynamic programming algorithm.
 120  *           The scaling issue is dealt with by working in log space
 121  *           and calling ILogsum(); this is a slow but robust approach.
 122  *
 123  * Args:     dsq    - sequence in digitized form
 124  *           L      - length of dsq
 125  *           hmm    - the model
 126  *           ret_mx - RETURN: dp matrix; pass NULL if it's not wanted
 127  *
 128  * Return:   log P(S|M)/P(S|R), as a bit score.
 129  */
 130 float
 131 P7Backward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx)
 132 {
 133   struct dpmatrix_s *mx;
 134   int **xmx;
 135   int **mmx;
 136   int **imx;
 137   int **dmx;
 138   int   i,k;
 139   int   sc;
 140
 141   /* Allocate a DP matrix with 0..L rows, 0..M-1 columns.
 142    */
 143   mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx);
 144
 145   /* Initialization of the L row.
 146    * Note that xmx[i][stS] = xmx[i][stN] by definition for all i,
 147    *    so stS need not be calculated in backward DP matrices.
 148    */
 149   xmx[L][XMC] = hmm->xsc[XTC][MOVE];                 /* C<-T                          */
 150   xmx[L][XME] = xmx[L][XMC] + hmm->xsc[XTE][MOVE];   /* E<-C, no C-tail               */
 151   xmx[L][XMJ] = xmx[L][XMB] = xmx[L][XMN] = -INFTY;  /* need seq to get out from here */
 152   for (k = hmm->M; k >= 1; k--) {
 153     mmx[L][k] = xmx[L][XME] + hmm->esc[k];           /* M<-E ...                      */
 154     mmx[L][k] += hmm->msc[(int) dsq[L]][k];          /* ... + emitted match symbol    */
 155     imx[L][k] = dmx[L][k] = -INFTY;                  /* need seq to get out from here */
 156   }
 157
 158   /* Recursion. Done as a pull.
 159    * Note slightly wasteful boundary conditions:
 160    *    M_M precalculated, D_M set to -INFTY,
 161    *    D_1 wastefully calculated.
 162    * Scores for transitions to D_M also have to be hacked to -INFTY,
 163    * as Plan7Logoddsify does not do this for us (I think? - ihh).
 164    */
 165   hmm->tsc[hmm->M-1][TDD] = hmm->tsc[hmm->M-1][TMD] = -INFTY;    /* no D_M state -- HACK -- should be in Plan7Logoddsify */
 166   for (i = L-1; i >= 0; i--)
 167     {
 168       /* Do the special states first.
 169        * remember, C, N and J emissions are zero score by definition
 170        */
 171       xmx[i][XMC] = xmx[i+1][XMC] + hmm->xsc[XTC][LOOP];
 172
 173       xmx[i][XMB] = -INFTY;
 174       /* The following section has been hacked to fit a bug in core_algorithms.c
 175        * The "correct" code is:
 176        * for (k = hmm->M; k >= 1; k--)
 177        * xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][k] + hmm->bsc[k];
 178        *
 179        * The following code gives the same results as core_algorithms.c:
 180        */
 181       xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][hmm->M] + hmm->bsc[hmm->M-1]);
 182       for (k = hmm->M-1; k >= 1; k--)
 183         xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][k] + hmm->bsc[k]);
 184
 185       xmx[i][XMJ] = ILogsum(xmx[i][XMB] + hmm->xsc[XTJ][MOVE],
 186                             xmx[i+1][XMJ] + hmm->xsc[XTJ][LOOP]);
 187
 188       xmx[i][XME] = ILogsum(xmx[i][XMC] + hmm->xsc[XTE][MOVE],
 189                             xmx[i][XMJ] + hmm->xsc[XTE][LOOP]);
 190
 191       xmx[i][XMN] = ILogsum(xmx[i][XMB] + hmm->xsc[XTN][MOVE],
 192                             xmx[i+1][XMN] + hmm->xsc[XTN][LOOP]);
 193
 194       /* Now the main states. Note the boundary conditions at M.
 195        */
 196
 197       if (i>0) {
 198         mmx[i][hmm->M] = xmx[i][XME] + hmm->esc[hmm->M] + hmm->msc[(int) dsq[i]][hmm->M];
 199         dmx[i][hmm->M] = -INFTY;
 200         for (k = hmm->M-1; k >= 1; k--)
 201           {
 202             mmx[i][k]  = ILogsum(ILogsum(xmx[i][XME] + hmm->esc[k],
 203                                          mmx[i+1][k+1] + hmm->tsc[k][TMM]),
 204                                  ILogsum(imx[i+1][k] + hmm->tsc[k][TMI],
 205                                          dmx[i][k+1] + hmm->tsc[k][TMD]));
 206             mmx[i][k] += hmm->msc[(int) dsq[i]][k];
 207
 208             imx[i][k]  = ILogsum(imx[i+1][k] + hmm->tsc[k][TII],
 209                                  mmx[i+1][k+1] + hmm->tsc[k][TIM]);
 210             imx[i][k] += hmm->isc[(int) dsq[i]][k];
 211
 212             dmx[i][k]  = ILogsum(dmx[i][k+1] + hmm->tsc[k][TDD],
 213                                  mmx[i+1][k+1] + hmm->tsc[k][TDM]);
 214
 215           }
 216       }
 217
 218     }
 219
 220   sc = xmx[0][XMN];
 221
 222   if (ret_mx != NULL) *ret_mx = mx;
 223   else                FreePlan7Matrix(mx);
 224
 225   return Scorify(sc);           /* the total Backward score. */
 226 }
 227
 228
 229 /* Function: P7EmitterPosterior()
 230  *
 231  * Purpose:  Combines Forward and Backward matrices into a posterior
 232  *           probability matrix.
 233  *           The entries in row i of this matrix are the logs of the
 234  *           posterior probabilities of each state emitting symbol i of
 235  *           the sequence, i.e. all entries for non-emitting states are -INFTY.
 236  *           The caller must allocate space for the matrix, although the
 237  *           backward matrix can be used instead (overwriting it will not
 238  *           compromise the algorithm).
 239  *
 240  * Args:     L        - length of sequence
 241  *           hmm      - the model
 242  *           forward  - pre-calculated forward matrix
 243  *           backward - pre-calculated backward matrix
 244  *           mx       - pre-allocated dynamic programming matrix
 245  *
 246  * Return:   void
 247  */
 248 void
 249 P7EmitterPosterior(int L,
 250                  struct plan7_s *hmm,
 251                  struct dpmatrix_s *forward,
 252                  struct dpmatrix_s *backward,
 253                  struct dpmatrix_s *mx)
 254 {
 255   int i;
 256   int k;
 257   int sc;
 258
 259   sc = backward->xmx[0][XMN];
 260
 261   for (i = L; i >= 1; i--)
 262     {
 263       mx->xmx[i][XMC] = forward->xmx[i-1][XMC] + hmm->xsc[XTC][LOOP] + backward->xmx[i][XMC] - sc;
 264
 265       mx->xmx[i][XMJ] = forward->xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP] + backward->xmx[i][XMJ] - sc;
 266
 267       mx->xmx[i][XMN] = forward->xmx[i-1][XMN] + hmm->xsc[XTN][LOOP] + backward->xmx[i][XMN] - sc;
 268
 269       mx->xmx[i][XMB] = mx->xmx[i][XME] = -INFTY;
 270
 271       for (k = 1; k < hmm->M; k++) {
 272         mx->mmx[i][k]  = backward->mmx[i][k];
 273         mx->mmx[i][k] += ILogsum(ILogsum(forward->mmx[i-1][k-1] + hmm->tsc[k-1][TMM],
 274                                              forward->imx[i-1][k-1] + hmm->tsc[k-1][TIM]),
 275                                      ILogsum(forward->xmx[i-1][XMB] + hmm->bsc[k],
 276                                              forward->dmx[i-1][k-1] + hmm->tsc[k-1][TDM]));
 277         mx->mmx[i][k] -= sc;
 278
 279         mx->imx[i][k]  = backward->imx[i][k];
 280         mx->imx[i][k] += ILogsum(forward->mmx[i-1][k] + hmm->tsc[k][TMI],
 281                                      forward->imx[i-1][k] + hmm->tsc[k][TII]);
 282         mx->imx[i][k] -= sc;
 283
 284         mx->dmx[i][k] = -INFTY;
 285       }
 286       mx->mmx[i][hmm->M]  = backward->mmx[i][hmm->M];
 287       mx->mmx[i][hmm->M] += ILogsum(ILogsum(forward->mmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TMM],
 288                                            forward->imx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TIM]),
 289                                    ILogsum(forward->xmx[i-1][XMB] + hmm->bsc[hmm->M],
 290                                            forward->dmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TDM]));
 291       mx->mmx[i][hmm->M] -= sc;
 292
 293       mx->imx[i][hmm->M] = mx->dmx[i][hmm->M] = mx->dmx[i][0] = -INFTY;
 294
 295     }
 296 }
 297
 298
 299 /* Function: P7FillOptimalAccuracy()
 300  *
 301  * Purpose:  The core of the optimal accuracy dynamic programming algorithm.
 302  *           Identical to Viterbi() except that scores are given by a
 303  *           posterior matrix (that the caller must pre-calculate).
 304  *           Also, the caller must pre-allocate the optimal accuracy matrix
 305  *           (this allows the forward matrix to be re-used).
 306  *           P7OptimalAccuracy() does all this for you and cleans up.
 307  *
 308  *
 309  * Args:     L         - length of sequence
 310  *           M         - length of model
 311  *           posterior - pre-calculated emitter posterior matrix
 312  *           mx        - pre-allocated dynamic programming matrix
 313  *           ret_tr    - RETURN: traceback; pass NULL if it's not wanted
 314  *
 315  * Return:   log ( sum_{residues} P(label|M,D) ), as a bit score
 316  *           (i.e. log of expected accuracy)
 317  */
 318 float P7FillOptimalAccuracy(int L,
 319                             int M,
 320                             struct dpmatrix_s *posterior,
 321                             struct dpmatrix_s *mx,
 322                             struct p7trace_s **ret_tr)
 323 {
 324   struct p7trace_s  *tr;
 325   int **xmx;
 326   int **mmx;
 327   int **imx;
 328   int **dmx;
 329   int   i,k;
 330   int   sc;
 331
 332   xmx = mx->xmx;
 333   mmx = mx->mmx;
 334   imx = mx->imx;
 335   dmx = mx->dmx;
 336
 337   /* Initialization of the zero row.
 338    * Each cell in the optimal accuracy matrix holds the log of the expected
 339    * of correctly assigned symbols up to that point.
 340    * To begin with, everything is log(0) = -INFTY.
 341    */
 342   xmx[0][XMN] = xmx[0][XMB] = xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;
 343   for (k = 0; k <= M; k++)
 344     mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;
 345
 346   /* Recursion. Done as a pull.
 347    * Note some slightly wasteful boundary conditions:
 348    *    D_M and I_M are wastefully calculated (they don't exist)
 349    */
 350   for (i = 1; i <= L; i++)
 351     {
 352       mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY;
 353
 354       for (k = 1; k <= M; k++)
 355         {
 356           /* match state */
 357           mmx[i][k]  = -INFTY;
 358           if ((sc = mmx[i-1][k-1]) > mmx[i][k])
 359             mmx[i][k] = sc;
 360           if ((sc = imx[i-1][k-1]) > mmx[i][k])
 361             mmx[i][k] = sc;
 362           if ((sc = dmx[i-1][k-1]) > mmx[i][k])
 363             mmx[i][k] = sc;
 364           if ((sc = xmx[i-1][XMB]) > mmx[i][k])
 365             mmx[i][k] = sc;
 366           mmx[i][k] = ILogsum(mmx[i][k], posterior->mmx[i][k]);
 367
 368           /* delete state */
 369           dmx[i][k] = -INFTY;
 370           if ((sc = mmx[i][k-1]) > dmx[i][k])
 371             dmx[i][k] = sc;
 372           if ((sc = dmx[i][k-1]) > dmx[i][k])
 373             dmx[i][k] = sc;
 374
 375           /* insert state */
 376           imx[i][k] = -INFTY;
 377           if ((sc = mmx[i-1][k]) > imx[i][k])
 378             imx[i][k] = sc;
 379           if ((sc = imx[i-1][k]) > imx[i][k])
 380             imx[i][k] = sc;
 381           imx[i][k] = ILogsum(imx[i][k], posterior->imx[i][k]);
 382         }
 383
 384       /* Now the special states. Order is important here.
 385        * remember, C and J emissions are zero score by definition,
 386        */
 387
 388       /* N state */
 389       xmx[i][XMN] = -INFTY;
 390       if ((sc = ILogsum(xmx[i-1][XMN], posterior->xmx[i][XMN])) > -INFTY)
 391         xmx[i][XMN] = sc;
 392
 393       /* E state */
 394       xmx[i][XME] = -INFTY;
 395       for (k = 1; k <= M; k++)
 396         if ((sc =  mmx[i][k]) > xmx[i][XME])
 397           xmx[i][XME] = sc;
 398
 399       /* J state */
 400       xmx[i][XMJ] = -INFTY;
 401       if ((sc = ILogsum(xmx[i-1][XMJ], posterior->xmx[i][XMJ])) > -INFTY)
 402         xmx[i][XMJ] = sc;
 403       if ((sc = xmx[i][XME]) > xmx[i][XMJ])      /* no E->J emission */
 404         xmx[i][XMJ] = sc;
 405
 406       /* B state */
 407       xmx[i][XMB] = -INFTY;
 408       if ((sc = xmx[i][XMN]) > -INFTY)
 409         xmx[i][XMB] = sc;
 410       if ((sc = xmx[i][XMJ]) > xmx[i][XMB])
 411         xmx[i][XMB] = sc;
 412
 413       /* C state */
 414       xmx[i][XMC] = -INFTY;
 415       if ((sc = ILogsum(xmx[i-1][XMC], posterior->xmx[i][XMC])) > -INFTY)
 416         xmx[i][XMC] = sc;
 417       if ((sc = xmx[i][XME]) > xmx[i][XMC])      /* no E->C emission */
 418         xmx[i][XMC] = sc;
 419     }
 420
 421   /* T state (not stored) */
 422   sc = xmx[L][XMC];
 423
 424   if (ret_tr != NULL) {
 425     P7OptimalAccuracyTrace(L, M, posterior, mx, &tr);
 426     *ret_tr = tr;
 427   }
 428
 429   return Score2Prob(sc,1);      /* the log of the expected accuracy. */
 430 }
 431
 432
 433 /* Function: P7OptimalAccuracyTrace()
 434  *
 435  * Purpose:  Traceback of an optimal accuracy matrix: i.e. retrieval
 436  *           of optimum alignment.
 437  *
 438  * Args:     L         - length of sequence
 439  *           M         - length of HMM
 440  *           posterior - the posterior matrix
 441  *           mx        - the matrix to trace back in, (L+1) x M
 442  *           ret_tr    - RETURN: traceback.
 443  *
 444  * Return:   (void)
 445  *           ret_tr is allocated here. Free using P7FreeTrace().
 446  */
 447 void
 448 P7OptimalAccuracyTrace(int L,
 449                        int M,
 450                        struct dpmatrix_s *posterior,
 451                        struct dpmatrix_s *mx,
 452                        struct p7trace_s **ret_tr)
 453 {
 454   struct p7trace_s *tr;
 455   int curralloc;                /* current allocated length of trace */
 456   int tpos;                     /* position in trace */
 457   int i;                        /* position in seq (1..L) */
 458   int k;                        /* position in model (1..M) */
 459   int **xmx, **mmx, **imx, **dmx;
 460   int sc;                       /* temp var for pre-emission score */
 461
 462   /* Overallocate for the trace.
 463    * S-N-B- ... - E-C-T  : 6 states + L is minimum trace;
 464    * add L more as buffer.
 465    */
 466   curralloc = L * 2 + 6;
 467   P7AllocTrace(curralloc, &tr);
 468
 469   xmx = mx->xmx;
 470   mmx = mx->mmx;
 471   imx = mx->imx;
 472   dmx = mx->dmx;
 473
 474   /* Initialization of trace
 475    * We do it back to front; ReverseTrace() is called later.
 476    */
 477   tr->statetype[0] = STT;
 478   tr->nodeidx[0]   = 0;
 479   tr->pos[0]       = 0;
 480   tr->statetype[1] = STC;
 481   tr->nodeidx[1]   = 0;
 482   tr->pos[1]       = 0;
 483   tpos = 2;
 484   i    = L;                     /* current i (seq pos) we're trying to assign */
 485
 486   /* Traceback
 487    */
 488   while (tr->statetype[tpos-1] != STS) {
 489     switch (tr->statetype[tpos-1]) {
 490     case STM:                   /* M connects from i-1,k-1, or B */
 491       sc = mmx[i+1][k+1];
 492       if (sc == ILogsum(mmx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 0)
 493         {
 494           tr->statetype[tpos] = STM;
 495           tr->nodeidx[tpos]   = k--;
 496           tr->pos[tpos]       = i--;
 497         }
 498       else if (sc == ILogsum(imx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 0)
 499         {
 500           tr->statetype[tpos] = STI;
 501           tr->nodeidx[tpos]   = k;
 502           tr->pos[tpos]       = i--;
 503         }
 504       else if (sc == ILogsum(dmx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 1)
 505         {
 506           tr->statetype[tpos] = STD;
 507           tr->nodeidx[tpos]   = k--;
 508           tr->pos[tpos]       = 0;
 509         }
 510       else if (sc == ILogsum(xmx[i][XMB], posterior->mmx[i+1][k+1]))
 511         {
 512           tr->statetype[tpos] = STB;
 513           tr->nodeidx[tpos]   = 0;
 514           tr->pos[tpos]       = 0;
 515         }
 516       else Die("traceback failed");
 517       break;
 518
 519     case STD:                   /* D connects from M,D */
 520       if (dmx[i][k+1] == mmx[i][k] && i > 0 && k > 0)
 521         {
 522           tr->statetype[tpos] = STM;
 523           tr->nodeidx[tpos]   = k--;
 524           tr->pos[tpos]       = i--;
 525         }
 526       else if (dmx[i][k+1] == dmx[i][k] && k > 1)
 527         {
 528           tr->statetype[tpos] = STD;
 529           tr->nodeidx[tpos]   = k--;
 530           tr->pos[tpos]       = 0;
 531         }
 532       else Die("traceback failed");
 533       break;
 534
 535     case STI:                   /* I connects from M,I */
 536       sc = imx[i+1][k];
 537       if (sc == ILogsum(mmx[i][k], posterior->imx[i+1][k]) && i > 0 && k > 0)
 538         {
 539           tr->statetype[tpos] = STM;
 540           tr->nodeidx[tpos]   = k--;
 541           tr->pos[tpos]       = i--;
 542         }
 543       else if (sc == ILogsum(imx[i][k], posterior->imx[i+1][k]) && i > 0 && k > 0)
 544         {
 545           tr->statetype[tpos] = STI;
 546           tr->nodeidx[tpos]   = k;
 547           tr->pos[tpos]       = i--;
 548         }
 549       else Die("traceback failed");
 550       break;
 551
 552     case STN:                   /* N connects from S, N */
 553       if (i == 0 && xmx[i][XMN] == -INFTY)
 554         {
 555           tr->statetype[tpos] = STS;
 556           tr->nodeidx[tpos]   = 0;
 557           tr->pos[tpos]       = 0;
 558         }
 559       else if (i > 0 && xmx[i+1][XMN] == ILogsum(xmx[i][XMN], posterior->xmx[i+1][XMN]) && i > 0)
 560         {
 561           tr->statetype[tpos] = STN;
 562           tr->nodeidx[tpos]   = 0;
 563           tr->pos[tpos]       = 0;    /* note convention adherence:  */
 564           tr->pos[tpos-1]     = i--;  /* first N doesn't emit        */
 565         }
 566       else Die("traceback failed");
 567       break;
 568
 569     case STB:                   /* B connects from N, J */
 570       if (xmx[i][XMB] == xmx[i][XMN])
 571         {
 572           tr->statetype[tpos] = STN;
 573           tr->nodeidx[tpos]   = 0;
 574           tr->pos[tpos]       = 0;
 575         }
 576       else if (xmx[i][XMB] == xmx[i][XMJ])
 577         {
 578           tr->statetype[tpos] = STJ;
 579           tr->nodeidx[tpos]   = 0;
 580           tr->pos[tpos]       = 0;
 581         }
 582       else Die("traceback failed");
 583       break;
 584
 585     case STE:                   /* E connects from any M state. k set here */
 586       for (k = M; k >= 1; k--)
 587         if (xmx[i][XME] == mmx[i][k] && i > 0)
 588           {
 589             tr->statetype[tpos] = STM;
 590             tr->nodeidx[tpos]   = k--;
 591             tr->pos[tpos]       = i--;
 592             break;
 593           }
 594       if (k <= 0) Die("traceback failed");
 595       break;
 596
 597     case STC:                   /* C comes from C, E */
 598       if (xmx[i][XMC] == ILogsum(xmx[i-1][XMC], posterior->xmx[i][XMC]) && i > 0)
 599         {
 600           tr->statetype[tpos] = STC;
 601           tr->nodeidx[tpos]   = 0;
 602           tr->pos[tpos]       = 0;    /* note convention adherence: */
 603           tr->pos[tpos-1]     = i--;  /* first C doesn't emit       */
 604         }
 605       else if (xmx[i][XMC] == xmx[i][XME])
 606         {
 607           tr->statetype[tpos] = STE;
 608           tr->nodeidx[tpos]   = 0;
 609           tr->pos[tpos]       = 0; /* E is a nonemitter */
 610         }
 611       else Die("Traceback failed.");
 612       break;
 613
 614     case STJ:                   /* J connects from E, J */
 615       if (xmx[i][XMJ] == ILogsum(xmx[i-1][XMJ], posterior->xmx[i][XMJ]) && i > 0)
 616         {
 617           tr->statetype[tpos] = STJ;
 618           tr->nodeidx[tpos]   = 0;
 619           tr->pos[tpos]       = 0;    /* note convention adherence: */
 620           tr->pos[tpos-1]     = i--;  /* first J doesn't emit       */
 621         }
 622       else if (xmx[i][XMJ] == xmx[i][XME])
 623         {
 624           tr->statetype[tpos] = STE;
 625           tr->nodeidx[tpos]   = 0;
 626           tr->pos[tpos]       = 0; /* E is a nonemitter */
 627         }
 628       else Die("Traceback failed.");
 629       break;
 630
 631     default:
 632       Die("traceback failed");
 633
 634     } /* end switch over statetype[tpos-1] */
 635
 636     tpos++;
 637     if (tpos == curralloc)
 638       {                         /* grow trace if necessary  */
 639         curralloc += L;
 640         P7ReallocTrace(tr, curralloc);
 641       }
 642
 643   } /* end traceback, at S state; tpos == tlen now */
 644   tr->tlen = tpos;
 645   P7ReverseTrace(tr);
 646   *ret_tr = tr;
 647
 648 }
 649
 650
 651 /* Function: PostalCode()
 652  * Date:     SRE, Sun Nov  7 15:31:35 1999 [Cold Spring Harbor]
 653  *
 654  * Purpose:  Given a traceback and one of Ian's posterior
 655  *           probability matrices, calculate a string that
 656  *           represents the confidence values on each
 657  *           residue in the sequence.
 658  *
 659  *           The code string is 0..L-1  (L = len of target seq),
 660  *           so it's in the coordinate system of the sequence string;
 661  *           off by one from dsq; and convertible to the coordinate
 662  *           system of aseq using MakeAlignedString().
 663  *
 664  *           Values are 0-9,*
 665  *           for example, 9 means with >=90% posterior probabiility,
 666  *           residue i is aligned to the state k that it
 667  *           is assigned to in the given trace.
 668  *
 669  * Args:     L  - length of seq
 670  *           mx - posterior prob matrix: see P7EmitterPosterior()
 671  *           tr - a traceback to get a Postal code string for.
 672  *
 673  * Returns:  char * array of codes, 0..L-1
 674  *           Caller is responsible for free'ing it.
 675  */
 676 static char
 677 score2postcode(int sc)
 678 {
 679   char i;
 680   i = (char) (Score2Prob(sc, 1.) * 10.);
 681   return ((i > 9) ? '*' : '0'+i);
 682 }
 683 char *
 684 PostalCode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr)
 685 {
 686   int tpos;
 687   int i;
 688   int k;
 689   char *postcode;
 690
 691   postcode = MallocOrDie((L+1) * sizeof(char));
 692   for (tpos = 0; tpos < tr->tlen; tpos++)
 693     {
 694       i = tr->pos[tpos];
 695       k = tr->nodeidx[tpos];
 696       if (i == 0) continue;
 697
 698       switch (tr->statetype[tpos]) {
 699       case STM: postcode[i-1] = score2postcode(mx->mmx[i][k]);   break;
 700       case STI: postcode[i-1] = score2postcode(mx->imx[i][k]);   break;
 701       case STN: postcode[i-1] = score2postcode(mx->xmx[i][XMN]); break;
 702       case STC: postcode[i-1] = score2postcode(mx->xmx[i][XMC]); break;
 703       case STJ: postcode[i-1] = score2postcode(mx->xmx[i][XMJ]); break;
 704       }
 705     }
 706   postcode[L] = '\0';
 707
 708   return postcode;
 709 }