/* Written by David Wilson */
/******************************************************************************
  Operations for evaluating probabilities pertaining to a protein sequence.
  The sequence is specified as a character string char *seq and a length.
******************************************************************************/
#include "scconst.h"
#include <stdio.h>
#include "sc2seq.h"
#include "hmm.h"

#define MALLOC(NUMBER,TYPE) ((TYPE*)Malloc((NUMBER)*sizeof(TYPE)))
#define CALLOC(NUMBER,TYPE) ((TYPE*)Calloc((NUMBER),sizeof(TYPE)))
#define REALLOC(PTR,NUMBER,TYPE) ((TYPE*)Realloc((PTR),(NUMBER)*sizeof(TYPE)))
#define FREE(PTR) free(PTR)


/*********The following are defined in sc2seq_interface.c **************/

extern double many_pprobs[MAX_TABLE_NUMBER][NUM_RES_TYPE][POSNUM], 
         many_pprobp[MAX_TABLE_NUMBER][NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM];


extern double gprobs[NUM_RES_TYPE], gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM];

extern double (*pprobs)[POSNUM];
extern double (*pprobp)[NUM_RES_TYPE][POSNUM][POSNUM];


/************************************************************************/


void *Malloc(), *Calloc(), *Realloc(), Free();
/******************************************************************************
  Given a sequence of normalized probabilities, alpha (beta) gives the
  normalized probability that one sees the prefix upto (suffix from) the
  Tth state, given this state is q.  ;; Works even if alpha (beta) == nprobs.
  The alpha and beta arrays are indexed as the nprobs array is.
******************************************************************************/
Alphas (double *alpha, double *nprob, int length, HMM *hmm)
{
  int i,t,n;
  double *alphat, *nprobt;
  n = hmm->nstates;
  bzero(alpha,length*n*sizeof(double));
  for (i=n-1; i>=0; --i)
    alpha[i] = hmm->initprob[i] * nprob[i];
  for (t=1; t<length; ++t) {
    alphat = &(alpha[n]);
    nprobt = &(nprob[n]);
    for (i=hmm->ntrans-1; i>=0; --i)
      alphat[hmm->transt[i]] += alpha[hmm->transf[i]] * hmm->transprob[i];
    for (i=n-1; i>=0; --i)
      alphat[i] *= nprobt[i];
    alpha = alphat;
    nprob = nprobt;
  }
}
Betas  (double  *beta, double *nprob, int length, HMM *hmm)
{
  int i,t,n;
  double *betat, *nprobt;
  n = hmm->nstates;
  bzero(beta,length*n*sizeof(double));
  betat  = &(beta [n*(length-1)]);
  nprobt = &(nprob[n*(length-1)]);
  for (i=n-1; i>=0; --i)
    betat[i] = hmm->finprob[i] * nprobt[i];
  for (t=length-2; t>=0; --t) {
    beta  = &(betat [-n]);
    nprob = &(nprobt[-n]);
    for (i=hmm->ntrans-1; i>=0; --i)
      beta[hmm->transf[i]] += betat[hmm->transt[i]] * hmm->transprob[i];
    for (i=n-1; i>=0; --i)
      beta[i] *= nprob[i];
    betat  = beta;
    nprobt = nprob;
  }
}


/******************************************************************************
  Given a sequence, for each residue for each state output the probability
  that this residue was in that state.
******************************************************************************/
/* Contains a static variable pointing to a dynamically allocated array of
   doubles, and a static variable giving the length of the previous array size
   (initially 0).  Memory is needed for 4*nstates*length.  This value is
   rounded up to the nearest power of 2 when determining how much memory to
   allocate.
   */
double *Memory (amount)
{
  static double *memory=(void*)0;
  static int memsize=0;
  if (!memsize) {
    memsize = amount;
    memory = MALLOC(memsize,double);
  }
  if (memsize<amount) {
    for (memsize=1; memsize<amount; memsize *= 2);
    memory = REALLOC(memory,memsize,double);
  }
  return memory;
}
#define AlphaBeta(i) (nprob[i] ? alpha[i]*beta[i]/nprob[i] : 0)


/* Score sequence using the hidden Markov model */
/** Note that prob use is determined by extern setting of pprobs, pprobp */
void HMMScore (char *seq, int length, HMM *hmm,
		double scores[MAXSEQLEN][POSNUM+1], int table,
	       int offset_to_use)
{
  int n, i,t,j;
  double *memory, *alpha, *beta, *nprob, *gamma, sum;


  n = hmm->nstates;
  memory = Memory(4*n*length);
  gamma = &(memory[0]);
  alpha = &(memory[1*n*length]);
  beta  = &(memory[2*n*length]);
  nprob = &(memory[3*n*length]);
  hmm->outprobs(nprob,seq,length);
  Alphas(alpha,nprob,length,hmm);
  Betas(beta,nprob,length,hmm);
  for (t=0; t<length; ++t) {
    sum = 0;
    for (i=n-1; i>=0; --i)
      sum += gamma[i] = AlphaBeta(i);
    for (i=n-1; i>=0; --i)
      gamma[i] /= sum;
    gamma = &(gamma[n]);
    alpha = &(alpha[n]);
    beta  = &(beta[n]);
    nprob = &(nprob[n]);
  }

  for (i=0; i<length; i++) {
    scores[i][POSNUM] =0;
    for (j=0; j<POSNUM; j++) {
      scores[i][j] = memory[8*i +j];
      if (scores[i][j] > scores[i][POSNUM]) 
	scores[i][POSNUM] = scores[i][j];
    }
    if ((offset_to_use<=6) && (offset_to_use >=0) )  
      scores[i][POSNUM] = scores[i][(i+offset_to_use)%POSNUM];
  }
}

CountTransitions (char *seq, int length, HMM *hmm, double *tbl, double *start, double *end, double  *steady)
{
  int n, m, i,t;
  double *memory, *alpha, *beta, *at, *bt1, *nprob, sum=0, *contrib, econtrib;
  n = hmm->nstates;
  m = hmm->ntrans;
  memory = Memory(3*n*length+n);
  alpha =  &(memory[0*n*length]);
  beta  =  &(memory[1*n*length]);
  nprob =  &(memory[2*n*length]);
  contrib= &(memory[3*n*length]);
  hmm->outprobs(nprob,seq,length);
  Alphas(alpha,nprob,length,hmm);
  Betas(beta,nprob,length,hmm);
  for (sum=0,i=0; i<n; ++i)
    sum += beta[i] * hmm->initprob[i];
  for (i=0; i<m; ++i) {
    econtrib = 0;
    at = alpha; bt1 = &(beta[n]);
    for (t=0; t+1<length; ++t) {
      econtrib += at[hmm->transf[i]] * hmm->transprob[i] * bt1[hmm->transt[i]];
      at  = &(at[n]);
      bt1 = &(bt1[n]);
    }
    tbl[i] += econtrib / sum;
  }
  bzero(contrib,n*sizeof(double));
  for (i=n-1; i>=0; --i)
    start[i] += AlphaBeta(i)/sum;
  for (t=0; t<length; ++t) {
    for (i=n-1; i>=0; --i)
      contrib[i] += AlphaBeta(i);
    alpha = &(alpha[n]);
    beta  = &(beta[n]);
    nprob = &(nprob[n]);
  }
  for (i=n-1; i>=0; --i)
    steady[i] += contrib[i]/sum;
  alpha = &(alpha[-n]);
  beta  = &(beta[-n]);
  nprob = &(nprob[-n]);
  for (i=n-1; i>=0; --i)
    end[i] += AlphaBeta(i)/sum;
}


#include "scconst.h"
#include <stdio.h>
#include "scio.h"
#include <math.h>


context (double *nprobs, char *seq, int length)
{
  double lnprob;
  int i, j, dist;
  int x, y, xoffs, yoffs;

  bzero(nprobs,length*8*sizeof(double));

  /* Pairwise interactions */
  for (xoffs=0; xoffs<8; ++xoffs)
    for (dist=1; dist<=4; dist+=3) {
      yoffs = (xoffs<7) ? (xoffs+dist)%7 : xoffs;
      for (x = 0, y = x+dist; y < length; x++, y++) {
	lnprob = (xoffs<7) ? (pprobp[seq[x]][seq[y]][xoffs][yoffs] -
			      pprobs[seq[x]][xoffs] -
			      pprobs[seq[y]][yoffs])
	  : (gprobp[seq[x]][seq[y]][dist-1] - gprobs[seq[x]] - gprobs[seq[y]]);
	nprobs[8*x+xoffs] += lnprob;
	nprobs[8*y+yoffs] += lnprob;
      }
    }

  /* Singles component */
  for (xoffs=0; xoffs<8; ++xoffs)
    for (x = 0; x < length; x++) {
      nprobs[8*x+xoffs] /= 4;
      nprobs[8*x+xoffs] += (xoffs<7) ? pprobs[seq[x]][xoffs] : gprobs[seq[x]];
    }
  /* Now normalize */
  for (xoffs=0; xoffs<8; ++xoffs)
    for (x = 0; x < length; x++)
      nprobs[8*x+xoffs] -= nprobs[8*x+7];
  /* Make position 'f' like genbank, and don't use 'b' and 'c' as much */
/*  for (x = 0; x < length; x++) {
    nprobs[8*x+5] /= 2;
    nprobs[8*x+1] /= 2;
    nprobs[8*x+2] /= 2;
  }*/
  /* Convert from log */
  for (xoffs=0; xoffs<8; ++xoffs)
    for (x = 0; x < length; x++)
      nprobs[8*x+xoffs] = exp(nprobs[8*x+xoffs]);
/*
      if ((nprobs[8*x+xoffs] = exp(nprobs[8*x+xoffs]))>10)
	printf("Check out position %d, %c%c%c\n",x,
	       numaa(seq[x-1]), numaa(seq[x]), numaa(seq[x+1]));
*/
}


/* The purpose of the next function to allow the new scoring method to be
   easily interfaced with the old code so that histograms and the like may
   be made.  The long term future of this function is not a concern.
   */

/* Create the CC HMM given global variable prob. functions. */
HMM *CCHMM (FILE *fgin, FILE *fpin)
{
  int i,j;
  double *initprob, *finprob, *transprob;
  int *transf, *transt;
  HMM *hmm;
  double FractionInCCs = 0.02;
  double AverageCCLength = 40.0;
  double CC, Gen, GenGen, GenCC, CCGen, CCCC, CCNewPhase;
  double CCShift[7];

  CC = FractionInCCs;
  GenCC = FractionInCCs/AverageCCLength;
  CCNewPhase = GenCC;
  CCGen = 1/AverageCCLength - (POSNUM-1)*CCNewPhase;
  CCCC = 1-1/AverageCCLength;
  for (i=0; i<7; ++i)
    CCShift[i] = CCNewPhase;
  CCShift[1] = CCCC;

  CC = GenCC;
  
  Gen = 1-CC;
  GenGen = 1-GenCC;
  CC /= 7;
  GenCC /= 7;

  initprob = MALLOC(8,double);
  finprob = MALLOC(8,double);
  transprob = MALLOC(64,double);
  transf = MALLOC(64,int);
  transt = MALLOC(64,int);
/*  tables = MALLOC(1,ProbTbls); */
  for (i=0; i<7; ++i)
    initprob[i] = CC;
  initprob[7] = Gen;
  for (i=0; i<7; ++i)
    finprob[i] = CCGen;
  finprob[7] = GenGen;
  for (i=0; i<8; ++i)
    for (j=0; j<8; ++j) {
      int e = i*8+j;
      transf[e] = i;
      transt[e] = j;
      transprob[e] = CCShift[(j+7-i)%7];
      if (i==7)
	transprob[e] = GenCC;
      if (j==7)
	transprob[e] = CCGen;
      if (i==7 && j==7)
	transprob[e] = GenGen;
    }

  hmm = MALLOC(1,HMM);
  hmm->nstates = 8;
  hmm->initprob = initprob;
  hmm->finprob = finprob;
  hmm->transf = transf;
  hmm->transt = transt;
  hmm->ntrans = 64;
  hmm->transprob = transprob;
  hmm->outprobs = context;
  return hmm;
}
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

void *Calloc (n,elsize)
     int n,elsize;
{
  void *ptr;
  ptr = (void*)calloc(n,elsize);
  if (ptr) return ptr;
  fprintf(stderr,"Memory shortage!!\n");
  exit(-1);
}

void *Realloc (ptr,n)
     void *ptr;
     int n;
{
  ptr = (void*)realloc(ptr,n);
  if (ptr) return ptr;
  fprintf(stderr,"Memory shortage!!\n");
  exit(-1);
}