/************************************************************
 * Copyright (C) 1998 Ian Holmes (ihh@sanger.ac.uk)
 * HMMER - Biological sequence analysis with profile HMMs
 * Copyright (C) 1992-1999 Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ************************************************************/

/* postprob.c
 * Author: Ian Holmes (ihh@sanger.ac.uk, Jun 5 1998)
 * Derived from core_algorithms.c (SRE, Nov 11 1996)
 * Incorporated SRE, Sat Nov  6 09:07:12 1999 [Cold Spring Harbor]
 *
 * RCS $Id: postprob.c,v 1.1.1.1 2005/03/22 08:34:15 cmzmasek Exp $ 
 *****************************************************************
 * IHH's notes:
 * 
 * Functions for working with posterior probabilities,
 * including unfussed "backwards" and "optimal accuracy"
 * implementations.
 *****************************************************************
 * SRE's notes:
 * 
 * Simple API example:
 *     struct p7trace_s  *tr;
 *     struct dpmatrix_s *fwd;
 *     struct dpmatrix_s *bck;
 *     struct dpmatrix_s *posterior;
 *     char *postcode;
 *     
 *     (get a traceback from somewhere: P7Viterbi() or a modelmaker)
 *     (get an HMM from somewhere: read file or construct it)
 *     P7Forward (dsq, len, hmm, &fwd);
 *     P7Backward(dsq, len, hmm, &bck);
 *     posterior = bck;              -- can alloc posterior, but also can re-use bck --
 *     P7EmitterPosterior(len, hmm, fwd, bck, posterior);
 *     postcode = PostalCode(len, posterior, tr);
 *
 *     MSAAppendGR(msa, "POST", seqidx, postcode);  -- or a similar annotation call --
 *     
 *     free(postcode);
 *     FreePlan7Matrix(fwd);
 *     FreePlan7Matrix(bck);
 *     
 * P7OptimalAccuracy() - the Durbin/Holmes optimal accuracy
 *                       alignment algorithm. Takes a sequence
 *                       and an HMM, returns an alignment as
 *                       a trace structure.
 * 
 * P7Backward()        - The Backward() algorithm, counterpart
 *                       of P7Forward() in core_algorithms.c.
 *                       
 * P7EmitterPosterior()- The heart of postprob.c: given a Forward
 *                       and a Backward matrix, calculate a new matrix
 *                       that contains the posterior probabilities
 *                       for each symbol i being emitted by 
 *                       state k (so, \sum_k p(k | x_i) = 1.0).
 *                       
 * P7FillOptimalAccuracy() - The core DP algorithm called by 
 *                       P7OptimalAccuracy().
 *                       
 * P7OptimalAccuracyTrace() - the traceback algorithm called by
 *                       P7FillOptimalAccuracy().                         
 *                       
 * PostalCode()        - Create a character string for annotating
 *                       an alignment.                         
 *                       
 * No small memory variants of these algorithms are available
 * right now.
 */

#include "structs.h"
#include "config.h"
#include "funcs.h"
#include "squid.h"


/* Function: P7OptimalAccuracy()
 * 
 * Purpose:  The optimal accuracy dynamic programming algorithm. 
 *           Identical to Viterbi() except that posterior residue
 *           label probabilities are used as scores.
 *           
 * Args:     dsq    - sequence in digitized form
 *           L      - length of dsq
 *           hmm    - the model
 *           ret_tr - RETURN: traceback; pass NULL if it's not wanted
 *           
 * Return:   log ( sum_{residues} P(label|M,D) ), as a bit score
 *           (i.e. log of expected accuracy)
 */
float
P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr)
{
  double sc;
  struct dpmatrix_s *forward;
  struct dpmatrix_s *backward;

  (void) P7Forward(dsq, L, hmm, &forward);
  (void) P7Backward(dsq, L, hmm, &backward);

  P7EmitterPosterior(L, hmm, forward, backward, backward);           /* Re-use backward matrix for posterior scores */

  sc = P7FillOptimalAccuracy(L, hmm->M, backward, forward, ret_tr);  /* Re-use forward matrix for optimal accuracy scores */

  FreePlan7Matrix(forward);
  FreePlan7Matrix(backward);

  return sc;
}


/* Function: P7Backward()
 * 
 * Purpose:  The Backward dynamic programming algorithm.
 *           The scaling issue is dealt with by working in log space
 *           and calling ILogsum(); this is a slow but robust approach.
 *           
 * Args:     dsq    - sequence in digitized form
 *           L      - length of dsq
 *           hmm    - the model
 *           ret_mx - RETURN: dp matrix; pass NULL if it's not wanted
 *           
 * Return:   log P(S|M)/P(S|R), as a bit score.
 */
float
P7Backward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx)
{
  struct dpmatrix_s *mx;
  int **xmx;
  int **mmx;
  int **imx;
  int **dmx;
  int   i,k;
  int   sc;

  /* Allocate a DP matrix with 0..L rows, 0..M-1 columns.
   */ 
  mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx);

  /* Initialization of the L row.
   * Note that xmx[i][stS] = xmx[i][stN] by definition for all i,
   *    so stS need not be calculated in backward DP matrices.
   */
  xmx[L][XMC] = hmm->xsc[XTC][MOVE];                 /* C<-T                          */
  xmx[L][XME] = xmx[L][XMC] + hmm->xsc[XTE][MOVE];   /* E<-C, no C-tail               */
  xmx[L][XMJ] = xmx[L][XMB] = xmx[L][XMN] = -INFTY;  /* need seq to get out from here */
  for (k = hmm->M; k >= 1; k--) {
    mmx[L][k] = xmx[L][XME] + hmm->esc[k];           /* M<-E ...                      */
    mmx[L][k] += hmm->msc[(int) dsq[L]][k];          /* ... + emitted match symbol    */
    imx[L][k] = dmx[L][k] = -INFTY;                  /* need seq to get out from here */
  }

  /* Recursion. Done as a pull.
   * Note slightly wasteful boundary conditions:
   *    M_M precalculated, D_M set to -INFTY,
   *    D_1 wastefully calculated.
   * Scores for transitions to D_M also have to be hacked to -INFTY,
   * as Plan7Logoddsify does not do this for us (I think? - ihh).
   */
  hmm->tsc[hmm->M-1][TDD] = hmm->tsc[hmm->M-1][TMD] = -INFTY;    /* no D_M state -- HACK -- should be in Plan7Logoddsify */
  for (i = L-1; i >= 0; i--)
    {
      /* Do the special states first.
       * remember, C, N and J emissions are zero score by definition
       */
      xmx[i][XMC] = xmx[i+1][XMC] + hmm->xsc[XTC][LOOP];
      
      xmx[i][XMB] = -INFTY;
      /* The following section has been hacked to fit a bug in core_algorithms.c
       * The "correct" code is:
       * for (k = hmm->M; k >= 1; k--)
       * xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][k] + hmm->bsc[k];
       *
       * The following code gives the same results as core_algorithms.c:
       */
      xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][hmm->M] + hmm->bsc[hmm->M-1]);
      for (k = hmm->M-1; k >= 1; k--)
	xmx[i][XMB] = ILogsum(xmx[i][XMB], mmx[i+1][k] + hmm->bsc[k]);
      
      xmx[i][XMJ] = ILogsum(xmx[i][XMB] + hmm->xsc[XTJ][MOVE],
			    xmx[i+1][XMJ] + hmm->xsc[XTJ][LOOP]);

      xmx[i][XME] = ILogsum(xmx[i][XMC] + hmm->xsc[XTE][MOVE],
			    xmx[i][XMJ] + hmm->xsc[XTE][LOOP]);

      xmx[i][XMN] = ILogsum(xmx[i][XMB] + hmm->xsc[XTN][MOVE],
			    xmx[i+1][XMN] + hmm->xsc[XTN][LOOP]);

      /* Now the main states. Note the boundary conditions at M.
       */

      if (i>0) {
	mmx[i][hmm->M] = xmx[i][XME] + hmm->esc[hmm->M] + hmm->msc[(int) dsq[i]][hmm->M];
	dmx[i][hmm->M] = -INFTY;
	for (k = hmm->M-1; k >= 1; k--)
	  {
	    mmx[i][k]  = ILogsum(ILogsum(xmx[i][XME] + hmm->esc[k],
					 mmx[i+1][k+1] + hmm->tsc[k][TMM]),
				 ILogsum(imx[i+1][k] + hmm->tsc[k][TMI],
					 dmx[i][k+1] + hmm->tsc[k][TMD]));
	    mmx[i][k] += hmm->msc[(int) dsq[i]][k];
	    
	    imx[i][k]  = ILogsum(imx[i+1][k] + hmm->tsc[k][TII],
				 mmx[i+1][k+1] + hmm->tsc[k][TIM]);
	    imx[i][k] += hmm->isc[(int) dsq[i]][k];
	    
	    dmx[i][k]  = ILogsum(dmx[i][k+1] + hmm->tsc[k][TDD],
				 mmx[i+1][k+1] + hmm->tsc[k][TDM]);
	    
	  }
      }
      
    }

  sc = xmx[0][XMN];

  if (ret_mx != NULL) *ret_mx = mx;
  else                FreePlan7Matrix(mx);

  return Scorify(sc);		/* the total Backward score. */
}


/* Function: P7EmitterPosterior()
 *
 * Purpose:  Combines Forward and Backward matrices into a posterior
 *           probability matrix.
 *           The entries in row i of this matrix are the logs of the
 *           posterior probabilities of each state emitting symbol i of
 *           the sequence, i.e. all entries for non-emitting states are -INFTY.
 *           The caller must allocate space for the matrix, although the
 *           backward matrix can be used instead (overwriting it will not
 *           compromise the algorithm).
 *           
 * Args:     L        - length of sequence
 *           hmm      - the model
 *           forward  - pre-calculated forward matrix
 *           backward - pre-calculated backward matrix
 *           mx       - pre-allocated dynamic programming matrix
 *           
 * Return:   void
 */
void
P7EmitterPosterior(int L,
		 struct plan7_s *hmm,
		 struct dpmatrix_s *forward,
		 struct dpmatrix_s *backward,
		 struct dpmatrix_s *mx)
{
  int i;
  int k;
  int sc;

  sc = backward->xmx[0][XMN];
  
  for (i = L; i >= 1; i--)
    {
      mx->xmx[i][XMC] = forward->xmx[i-1][XMC] + hmm->xsc[XTC][LOOP] + backward->xmx[i][XMC] - sc;
      
      mx->xmx[i][XMJ] = forward->xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP] + backward->xmx[i][XMJ] - sc;
 
      mx->xmx[i][XMN] = forward->xmx[i-1][XMN] + hmm->xsc[XTN][LOOP] + backward->xmx[i][XMN] - sc;

      mx->xmx[i][XMB] = mx->xmx[i][XME] = -INFTY;
      
      for (k = 1; k < hmm->M; k++) {
	mx->mmx[i][k]  = backward->mmx[i][k];
	mx->mmx[i][k] += ILogsum(ILogsum(forward->mmx[i-1][k-1] + hmm->tsc[k-1][TMM],
					     forward->imx[i-1][k-1] + hmm->tsc[k-1][TIM]),
				     ILogsum(forward->xmx[i-1][XMB] + hmm->bsc[k],
					     forward->dmx[i-1][k-1] + hmm->tsc[k-1][TDM]));
	mx->mmx[i][k] -= sc;
	
	mx->imx[i][k]  = backward->imx[i][k];
	mx->imx[i][k] += ILogsum(forward->mmx[i-1][k] + hmm->tsc[k][TMI],
				     forward->imx[i-1][k] + hmm->tsc[k][TII]);
	mx->imx[i][k] -= sc;
	
	mx->dmx[i][k] = -INFTY;
      }
      mx->mmx[i][hmm->M]  = backward->mmx[i][hmm->M];
      mx->mmx[i][hmm->M] += ILogsum(ILogsum(forward->mmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TMM],
					   forward->imx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TIM]),
				   ILogsum(forward->xmx[i-1][XMB] + hmm->bsc[hmm->M],
					   forward->dmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TDM]));
      mx->mmx[i][hmm->M] -= sc;

      mx->imx[i][hmm->M] = mx->dmx[i][hmm->M] = mx->dmx[i][0] = -INFTY;
      
    }
}


/* Function: P7FillOptimalAccuracy()
 * 
 * Purpose:  The core of the optimal accuracy dynamic programming algorithm. 
 *           Identical to Viterbi() except that scores are given by a
 *           posterior matrix (that the caller must pre-calculate).
 *           Also, the caller must pre-allocate the optimal accuracy matrix
 *           (this allows the forward matrix to be re-used).
 *           P7OptimalAccuracy() does all this for you and cleans up.
 *  
 *           
 * Args:     L         - length of sequence
 *           M         - length of model
 *           posterior - pre-calculated emitter posterior matrix
 *           mx        - pre-allocated dynamic programming matrix
 *           ret_tr    - RETURN: traceback; pass NULL if it's not wanted
 *           
 * Return:   log ( sum_{residues} P(label|M,D) ), as a bit score
 *           (i.e. log of expected accuracy)
 */
float P7FillOptimalAccuracy(int L,
			    int M,
			    struct dpmatrix_s *posterior,
			    struct dpmatrix_s *mx,
			    struct p7trace_s **ret_tr)
{
  struct p7trace_s  *tr;
  int **xmx;
  int **mmx;
  int **imx;
  int **dmx;
  int   i,k;
  int   sc;

  xmx = mx->xmx;
  mmx = mx->mmx;
  imx = mx->imx;
  dmx = mx->dmx;

  /* Initialization of the zero row.
   * Each cell in the optimal accuracy matrix holds the log of the expected
   * of correctly assigned symbols up to that point.
   * To begin with, everything is log(0) = -INFTY.
   */
  xmx[0][XMN] = xmx[0][XMB] = xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;
  for (k = 0; k <= M; k++)
    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;

  /* Recursion. Done as a pull.
   * Note some slightly wasteful boundary conditions:  
   *    D_M and I_M are wastefully calculated (they don't exist)
   */
  for (i = 1; i <= L; i++)
    {
      mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY;
      
      for (k = 1; k <= M; k++)
	{
	  /* match state */
	  mmx[i][k]  = -INFTY;
	  if ((sc = mmx[i-1][k-1]) > mmx[i][k])
	    mmx[i][k] = sc;
	  if ((sc = imx[i-1][k-1]) > mmx[i][k])
	    mmx[i][k] = sc;
	  if ((sc = dmx[i-1][k-1]) > mmx[i][k])
	    mmx[i][k] = sc;
	  if ((sc = xmx[i-1][XMB]) > mmx[i][k])
	    mmx[i][k] = sc;
	  mmx[i][k] = ILogsum(mmx[i][k], posterior->mmx[i][k]);
	  
	  /* delete state */
	  dmx[i][k] = -INFTY;
	  if ((sc = mmx[i][k-1]) > dmx[i][k])
	    dmx[i][k] = sc;
	  if ((sc = dmx[i][k-1]) > dmx[i][k])
	    dmx[i][k] = sc;

	  /* insert state */
	  imx[i][k] = -INFTY;
	  if ((sc = mmx[i-1][k]) > imx[i][k])
	    imx[i][k] = sc;
	  if ((sc = imx[i-1][k]) > imx[i][k])
	    imx[i][k] = sc;
	  imx[i][k] = ILogsum(imx[i][k], posterior->imx[i][k]);
	}
      
      /* Now the special states. Order is important here.
       * remember, C and J emissions are zero score by definition,
       */

      /* N state */
      xmx[i][XMN] = -INFTY;
      if ((sc = ILogsum(xmx[i-1][XMN], posterior->xmx[i][XMN])) > -INFTY)
	xmx[i][XMN] = sc;
      
      /* E state */
      xmx[i][XME] = -INFTY;
      for (k = 1; k <= M; k++)
	if ((sc =  mmx[i][k]) > xmx[i][XME])
	  xmx[i][XME] = sc;
      
      /* J state */
      xmx[i][XMJ] = -INFTY;
      if ((sc = ILogsum(xmx[i-1][XMJ], posterior->xmx[i][XMJ])) > -INFTY)
	xmx[i][XMJ] = sc;
      if ((sc = xmx[i][XME]) > xmx[i][XMJ])      /* no E->J emission */
	xmx[i][XMJ] = sc;
      
      /* B state */
      xmx[i][XMB] = -INFTY;
      if ((sc = xmx[i][XMN]) > -INFTY)
	xmx[i][XMB] = sc;
      if ((sc = xmx[i][XMJ]) > xmx[i][XMB])
	xmx[i][XMB] = sc;
      
      /* C state */
      xmx[i][XMC] = -INFTY;
      if ((sc = ILogsum(xmx[i-1][XMC], posterior->xmx[i][XMC])) > -INFTY)
	xmx[i][XMC] = sc;
      if ((sc = xmx[i][XME]) > xmx[i][XMC])      /* no E->C emission */
	xmx[i][XMC] = sc;
    }

  /* T state (not stored) */
  sc = xmx[L][XMC];
  
  if (ret_tr != NULL) {
    P7OptimalAccuracyTrace(L, M, posterior, mx, &tr);
    *ret_tr = tr;
  }
  
  return Score2Prob(sc,1);	/* the log of the expected accuracy. */
}


/* Function: P7OptimalAccuracyTrace()
 * 
 * Purpose:  Traceback of an optimal accuracy matrix: i.e. retrieval 
 *           of optimum alignment.
 *           
 * Args:     L         - length of sequence
 *           M         - length of HMM
 *           posterior - the posterior matrix
 *           mx        - the matrix to trace back in, (L+1) x M
 *           ret_tr    - RETURN: traceback.
 *           
 * Return:   (void)
 *           ret_tr is allocated here. Free using P7FreeTrace().
 */
void
P7OptimalAccuracyTrace(int L,
		       int M,
		       struct dpmatrix_s *posterior,
		       struct dpmatrix_s *mx,
		       struct p7trace_s **ret_tr)
{
  struct p7trace_s *tr;
  int curralloc;		/* current allocated length of trace */
  int tpos;			/* position in trace */
  int i;			/* position in seq (1..L) */
  int k;			/* position in model (1..M) */
  int **xmx, **mmx, **imx, **dmx;
  int sc;			/* temp var for pre-emission score */

  /* Overallocate for the trace.
   * S-N-B- ... - E-C-T  : 6 states + L is minimum trace;
   * add L more as buffer.             
   */
  curralloc = L * 2 + 6; 
  P7AllocTrace(curralloc, &tr);

  xmx = mx->xmx;
  mmx = mx->mmx;
  imx = mx->imx;
  dmx = mx->dmx;

  /* Initialization of trace
   * We do it back to front; ReverseTrace() is called later.
   */
  tr->statetype[0] = STT;
  tr->nodeidx[0]   = 0;
  tr->pos[0]       = 0;
  tr->statetype[1] = STC;
  tr->nodeidx[1]   = 0;
  tr->pos[1]       = 0;
  tpos = 2;
  i    = L;			/* current i (seq pos) we're trying to assign */

  /* Traceback
   */
  while (tr->statetype[tpos-1] != STS) {
    switch (tr->statetype[tpos-1]) {
    case STM:			/* M connects from i-1,k-1, or B */
      sc = mmx[i+1][k+1];
      if (sc == ILogsum(mmx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 0)
	{
	  tr->statetype[tpos] = STM;
	  tr->nodeidx[tpos]   = k--;
	  tr->pos[tpos]       = i--;
	}
      else if (sc == ILogsum(imx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 0)
	{
	  tr->statetype[tpos] = STI;
	  tr->nodeidx[tpos]   = k;
	  tr->pos[tpos]       = i--;
	}
      else if (sc == ILogsum(dmx[i][k], posterior->mmx[i+1][k+1]) && i > 0 && k > 1)
	{
	  tr->statetype[tpos] = STD;
	  tr->nodeidx[tpos]   = k--;
	  tr->pos[tpos]       = 0;
	}
      else if (sc == ILogsum(xmx[i][XMB], posterior->mmx[i+1][k+1]))
	{
	  tr->statetype[tpos] = STB;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;
	}
      else Die("traceback failed");
      break;

    case STD:			/* D connects from M,D */
      if (dmx[i][k+1] == mmx[i][k] && i > 0 && k > 0)
	{
	  tr->statetype[tpos] = STM;
	  tr->nodeidx[tpos]   = k--;
	  tr->pos[tpos]       = i--;
	}
      else if (dmx[i][k+1] == dmx[i][k] && k > 1)
	{
	  tr->statetype[tpos] = STD;
	  tr->nodeidx[tpos]   = k--;
	  tr->pos[tpos]       = 0;
	}
      else Die("traceback failed");
      break;

    case STI:			/* I connects from M,I */
      sc = imx[i+1][k];
      if (sc == ILogsum(mmx[i][k], posterior->imx[i+1][k]) && i > 0 && k > 0)
	{
	  tr->statetype[tpos] = STM;
	  tr->nodeidx[tpos]   = k--;
	  tr->pos[tpos]       = i--;
	}
      else if (sc == ILogsum(imx[i][k], posterior->imx[i+1][k]) && i > 0 && k > 0)
	{
	  tr->statetype[tpos] = STI;
	  tr->nodeidx[tpos]   = k;
	  tr->pos[tpos]       = i--;
	}
      else Die("traceback failed");
      break;

    case STN:			/* N connects from S, N */
      if (i == 0 && xmx[i][XMN] == -INFTY)
	{
	  tr->statetype[tpos] = STS;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;
	}
      else if (i > 0 && xmx[i+1][XMN] == ILogsum(xmx[i][XMN], posterior->xmx[i+1][XMN]) && i > 0)
	{
	  tr->statetype[tpos] = STN;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;    /* note convention adherence:  */
	  tr->pos[tpos-1]     = i--;  /* first N doesn't emit        */
	}
      else Die("traceback failed");
      break;

    case STB:			/* B connects from N, J */
      if (xmx[i][XMB] == xmx[i][XMN])
	{
	  tr->statetype[tpos] = STN;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;
	}
      else if (xmx[i][XMB] == xmx[i][XMJ])
	{
	  tr->statetype[tpos] = STJ;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;
	}
      else Die("traceback failed");
      break;

    case STE:			/* E connects from any M state. k set here */
      for (k = M; k >= 1; k--)
	if (xmx[i][XME] == mmx[i][k] && i > 0)
	  {
	    tr->statetype[tpos] = STM;
	    tr->nodeidx[tpos]   = k--;
	    tr->pos[tpos]       = i--;
	    break;
	  }
      if (k <= 0) Die("traceback failed");
      break;

    case STC:			/* C comes from C, E */
      if (xmx[i][XMC] == ILogsum(xmx[i-1][XMC], posterior->xmx[i][XMC]) && i > 0)
	{
	  tr->statetype[tpos] = STC;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;    /* note convention adherence: */
	  tr->pos[tpos-1]     = i--;  /* first C doesn't emit       */
	}
      else if (xmx[i][XMC] == xmx[i][XME])
	{
	  tr->statetype[tpos] = STE;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0; /* E is a nonemitter */
	}
      else Die("Traceback failed.");
      break;

    case STJ:			/* J connects from E, J */
      if (xmx[i][XMJ] == ILogsum(xmx[i-1][XMJ], posterior->xmx[i][XMJ]) && i > 0)
	{
	  tr->statetype[tpos] = STJ;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0;    /* note convention adherence: */
	  tr->pos[tpos-1]     = i--;  /* first J doesn't emit       */
	}
      else if (xmx[i][XMJ] == xmx[i][XME])
	{
	  tr->statetype[tpos] = STE;
	  tr->nodeidx[tpos]   = 0;
	  tr->pos[tpos]       = 0; /* E is a nonemitter */
	}
      else Die("Traceback failed.");
      break;

    default:
      Die("traceback failed");

    } /* end switch over statetype[tpos-1] */
    
    tpos++;
    if (tpos == curralloc) 
      {				/* grow trace if necessary  */
	curralloc += L;
	P7ReallocTrace(tr, curralloc);
      }

  } /* end traceback, at S state; tpos == tlen now */
  tr->tlen = tpos;
  P7ReverseTrace(tr);
  *ret_tr = tr;

}


/* Function: PostalCode()
 * Date:     SRE, Sun Nov  7 15:31:35 1999 [Cold Spring Harbor]
 *
 * Purpose:  Given a traceback and one of Ian's posterior
 *           probability matrices, calculate a string that
 *           represents the confidence values on each 
 *           residue in the sequence.
 *           
 *           The code string is 0..L-1  (L = len of target seq),
 *           so it's in the coordinate system of the sequence string;
 *           off by one from dsq; and convertible to the coordinate
 *           system of aseq using MakeAlignedString().
 *           
 *           Values are 0-9,*  
 *           for example, 9 means with >=90% posterior probabiility,
 *           residue i is aligned to the state k that it
 *           is assigned to in the given trace.
 *
 * Args:     L  - length of seq
 *           mx - posterior prob matrix: see P7EmitterPosterior()
 *           tr - a traceback to get a Postal code string for.   
 *
 * Returns:  char * array of codes, 0..L-1
 *           Caller is responsible for free'ing it.
 */
static char
score2postcode(int sc)
{
  char i;
  i = (char) (Score2Prob(sc, 1.) * 10.);
  return ((i > 9) ? '*' : '0'+i);
}
char *
PostalCode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr)
{
  int tpos;
  int i;
  int k;
  char *postcode;

  postcode = MallocOrDie((L+1) * sizeof(char)); 
  for (tpos = 0; tpos < tr->tlen; tpos++)
    {
      i = tr->pos[tpos];
      k = tr->nodeidx[tpos];
      if (i == 0) continue;

      switch (tr->statetype[tpos]) {
      case STM: postcode[i-1] = score2postcode(mx->mmx[i][k]);   break;
      case STI: postcode[i-1] = score2postcode(mx->imx[i][k]);   break;
      case STN:	postcode[i-1] = score2postcode(mx->xmx[i][XMN]); break;
      case STC: postcode[i-1] = score2postcode(mx->xmx[i][XMC]); break;
      case STJ:	postcode[i-1] = score2postcode(mx->xmx[i][XMJ]); break;
      }
    }
  postcode[L] = '\0';

  return postcode;
}