--- /dev/null
+/* Written by David Wilson */
+/******************************************************************************
+ Operations for evaluating probabilities pertaining to a protein sequence.
+ The sequence is specified as a character string char *seq and a length.
+******************************************************************************/
+#include "scconst.h"
+#include <stdio.h>
+#include "sc2seq.h"
+#include "hmm.h"
+
+#define MALLOC(NUMBER,TYPE) ((TYPE*)Malloc((NUMBER)*sizeof(TYPE)))
+#define CALLOC(NUMBER,TYPE) ((TYPE*)Calloc((NUMBER),sizeof(TYPE)))
+#define REALLOC(PTR,NUMBER,TYPE) ((TYPE*)Realloc((PTR),(NUMBER)*sizeof(TYPE)))
+#define FREE(PTR) free(PTR)
+
+
+/*********The following are defined in sc2seq_interface.c **************/
+
+extern double many_pprobs[MAX_TABLE_NUMBER][NUM_RES_TYPE][POSNUM],
+ many_pprobp[MAX_TABLE_NUMBER][NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM];
+
+
+extern double gprobs[NUM_RES_TYPE], gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM];
+
+extern double (*pprobs)[POSNUM];
+extern double (*pprobp)[NUM_RES_TYPE][POSNUM][POSNUM];
+
+
+
+/************************************************************************/
+
+
+
+void *Malloc(), *Calloc(), *Realloc(), Free();
+/******************************************************************************
+ Given a sequence of normalized probabilities, alpha (beta) gives the
+ normalized probability that one sees the prefix upto (suffix from) the
+ Tth state, given this state is q. ;; Works even if alpha (beta) == nprobs.
+ The alpha and beta arrays are indexed as the nprobs array is.
+******************************************************************************/
+Alphas (double *alpha, double *nprob, int length, HMM *hmm)
+{
+ int i,t,n;
+ double *alphat, *nprobt;
+ n = hmm->nstates;
+ bzero(alpha,length*n*sizeof(double));
+ for (i=n-1; i>=0; --i)
+ alpha[i] = hmm->initprob[i] * nprob[i];
+ for (t=1; t<length; ++t) {
+ alphat = &(alpha[n]);
+ nprobt = &(nprob[n]);
+ for (i=hmm->ntrans-1; i>=0; --i)
+ alphat[hmm->transt[i]] += alpha[hmm->transf[i]] * hmm->transprob[i];
+ for (i=n-1; i>=0; --i)
+ alphat[i] *= nprobt[i];
+ alpha = alphat;
+ nprob = nprobt;
+ }
+}
+Betas (double *beta, double *nprob, int length, HMM *hmm)
+{
+ int i,t,n;
+ double *betat, *nprobt;
+ n = hmm->nstates;
+ bzero(beta,length*n*sizeof(double));
+ betat = &(beta [n*(length-1)]);
+ nprobt = &(nprob[n*(length-1)]);
+ for (i=n-1; i>=0; --i)
+ betat[i] = hmm->finprob[i] * nprobt[i];
+ for (t=length-2; t>=0; --t) {
+ beta = &(betat [-n]);
+ nprob = &(nprobt[-n]);
+ for (i=hmm->ntrans-1; i>=0; --i)
+ beta[hmm->transf[i]] += betat[hmm->transt[i]] * hmm->transprob[i];
+ for (i=n-1; i>=0; --i)
+ beta[i] *= nprob[i];
+ betat = beta;
+ nprobt = nprob;
+ }
+}
+
+
+/******************************************************************************
+ Given a sequence, for each residue for each state output the probability
+ that this residue was in that state.
+******************************************************************************/
+/* Contains a static variable pointing to a dynamically allocated array of
+ doubles, and a static variable giving the length of the previous array size
+ (initially 0). Memory is needed for 4*nstates*length. This value is
+ rounded up to the nearest power of 2 when determining how much memory to
+ allocate.
+ */
+double *Memory (amount)
+{
+ static double *memory=(void*)0;
+ static int memsize=0;
+ if (!memsize) {
+ memsize = amount;
+ memory = MALLOC(memsize,double);
+ }
+ if (memsize<amount) {
+ for (memsize=1; memsize<amount; memsize *= 2);
+ memory = REALLOC(memory,memsize,double);
+ }
+ return memory;
+}
+#define AlphaBeta(i) (nprob[i] ? alpha[i]*beta[i]/nprob[i] : 0)
+
+
+
+/* Score sequence using the hidden Markov model */
+/** Note that prob use is determined by extern setting of pprobs, pprobp */
+void HMMScore (char *seq, int length, HMM *hmm,
+ double scores[MAXSEQLEN][POSNUM+1], int table,
+ int offset_to_use)
+{
+ int n, i,t,j;
+ double *memory, *alpha, *beta, *nprob, *gamma, sum;
+
+
+ n = hmm->nstates;
+ memory = Memory(4*n*length);
+ gamma = &(memory[0]);
+ alpha = &(memory[1*n*length]);
+ beta = &(memory[2*n*length]);
+ nprob = &(memory[3*n*length]);
+ hmm->outprobs(nprob,seq,length);
+ Alphas(alpha,nprob,length,hmm);
+ Betas(beta,nprob,length,hmm);
+ for (t=0; t<length; ++t) {
+ sum = 0;
+ for (i=n-1; i>=0; --i)
+ sum += gamma[i] = AlphaBeta(i);
+ for (i=n-1; i>=0; --i)
+ gamma[i] /= sum;
+ gamma = &(gamma[n]);
+ alpha = &(alpha[n]);
+ beta = &(beta[n]);
+ nprob = &(nprob[n]);
+ }
+
+ for (i=0; i<length; i++) {
+ scores[i][POSNUM] =0;
+ for (j=0; j<POSNUM; j++) {
+ scores[i][j] = memory[8*i +j];
+ if (scores[i][j] > scores[i][POSNUM])
+ scores[i][POSNUM] = scores[i][j];
+ }
+ if ((offset_to_use<=6) && (offset_to_use >=0) )
+ scores[i][POSNUM] = scores[i][(i+offset_to_use)%POSNUM];
+ }
+}
+
+CountTransitions (char *seq, int length, HMM *hmm, double *tbl, double *start, double *end, double *steady)
+{
+ int n, m, i,t;
+ double *memory, *alpha, *beta, *at, *bt1, *nprob, sum=0, *contrib, econtrib;
+ n = hmm->nstates;
+ m = hmm->ntrans;
+ memory = Memory(3*n*length+n);
+ alpha = &(memory[0*n*length]);
+ beta = &(memory[1*n*length]);
+ nprob = &(memory[2*n*length]);
+ contrib= &(memory[3*n*length]);
+ hmm->outprobs(nprob,seq,length);
+ Alphas(alpha,nprob,length,hmm);
+ Betas(beta,nprob,length,hmm);
+ for (sum=0,i=0; i<n; ++i)
+ sum += beta[i] * hmm->initprob[i];
+ for (i=0; i<m; ++i) {
+ econtrib = 0;
+ at = alpha; bt1 = &(beta[n]);
+ for (t=0; t+1<length; ++t) {
+ econtrib += at[hmm->transf[i]] * hmm->transprob[i] * bt1[hmm->transt[i]];
+ at = &(at[n]);
+ bt1 = &(bt1[n]);
+ }
+ tbl[i] += econtrib / sum;
+ }
+ bzero(contrib,n*sizeof(double));
+ for (i=n-1; i>=0; --i)
+ start[i] += AlphaBeta(i)/sum;
+ for (t=0; t<length; ++t) {
+ for (i=n-1; i>=0; --i)
+ contrib[i] += AlphaBeta(i);
+ alpha = &(alpha[n]);
+ beta = &(beta[n]);
+ nprob = &(nprob[n]);
+ }
+ for (i=n-1; i>=0; --i)
+ steady[i] += contrib[i]/sum;
+ alpha = &(alpha[-n]);
+ beta = &(beta[-n]);
+ nprob = &(nprob[-n]);
+ for (i=n-1; i>=0; --i)
+ end[i] += AlphaBeta(i)/sum;
+}
+
+
+
+#include "scconst.h"
+#include <stdio.h>
+#include "scio.h"
+#include <math.h>
+
+
+context (double *nprobs, char *seq, int length)
+{
+ double lnprob;
+ int i, j, dist;
+ int x, y, xoffs, yoffs;
+
+ bzero(nprobs,length*8*sizeof(double));
+
+ /* Pairwise interactions */
+ for (xoffs=0; xoffs<8; ++xoffs)
+ for (dist=1; dist<=4; dist+=3) {
+ yoffs = (xoffs<7) ? (xoffs+dist)%7 : xoffs;
+ for (x = 0, y = x+dist; y < length; x++, y++) {
+ lnprob = (xoffs<7) ? (pprobp[seq[x]][seq[y]][xoffs][yoffs] -
+ pprobs[seq[x]][xoffs] -
+ pprobs[seq[y]][yoffs])
+ : (gprobp[seq[x]][seq[y]][dist-1] - gprobs[seq[x]] - gprobs[seq[y]]);
+ nprobs[8*x+xoffs] += lnprob;
+ nprobs[8*y+yoffs] += lnprob;
+ }
+ }
+
+ /* Singles component */
+ for (xoffs=0; xoffs<8; ++xoffs)
+ for (x = 0; x < length; x++) {
+ nprobs[8*x+xoffs] /= 4;
+ nprobs[8*x+xoffs] += (xoffs<7) ? pprobs[seq[x]][xoffs] : gprobs[seq[x]];
+ }
+ /* Now normalize */
+ for (xoffs=0; xoffs<8; ++xoffs)
+ for (x = 0; x < length; x++)
+ nprobs[8*x+xoffs] -= nprobs[8*x+7];
+ /* Make position 'f' like genbank, and don't use 'b' and 'c' as much */
+/* for (x = 0; x < length; x++) {
+ nprobs[8*x+5] /= 2;
+ nprobs[8*x+1] /= 2;
+ nprobs[8*x+2] /= 2;
+ }*/
+ /* Convert from log */
+ for (xoffs=0; xoffs<8; ++xoffs)
+ for (x = 0; x < length; x++)
+ nprobs[8*x+xoffs] = exp(nprobs[8*x+xoffs]);
+/*
+ if ((nprobs[8*x+xoffs] = exp(nprobs[8*x+xoffs]))>10)
+ printf("Check out position %d, %c%c%c\n",x,
+ numaa(seq[x-1]), numaa(seq[x]), numaa(seq[x+1]));
+*/
+}
+
+
+/* The purpose of the next function to allow the new scoring method to be
+ easily interfaced with the old code so that histograms and the like may
+ be made. The long term future of this function is not a concern.
+ */
+
+/* Create the CC HMM given global variable prob. functions. */
+HMM *CCHMM (FILE *fgin, FILE *fpin)
+{
+ int i,j;
+ double *initprob, *finprob, *transprob;
+ int *transf, *transt;
+ HMM *hmm;
+ double FractionInCCs = 0.02;
+ double AverageCCLength = 40.0;
+ double CC, Gen, GenGen, GenCC, CCGen, CCCC, CCNewPhase;
+ double CCShift[7];
+
+ CC = FractionInCCs;
+ GenCC = FractionInCCs/AverageCCLength;
+ CCNewPhase = GenCC;
+ CCGen = 1/AverageCCLength - (POSNUM-1)*CCNewPhase;
+ CCCC = 1-1/AverageCCLength;
+ for (i=0; i<7; ++i)
+ CCShift[i] = CCNewPhase;
+ CCShift[1] = CCCC;
+
+ CC = GenCC;
+
+ Gen = 1-CC;
+ GenGen = 1-GenCC;
+ CC /= 7;
+ GenCC /= 7;
+
+ initprob = MALLOC(8,double);
+ finprob = MALLOC(8,double);
+ transprob = MALLOC(64,double);
+ transf = MALLOC(64,int);
+ transt = MALLOC(64,int);
+/* tables = MALLOC(1,ProbTbls); */
+ for (i=0; i<7; ++i)
+ initprob[i] = CC;
+ initprob[7] = Gen;
+ for (i=0; i<7; ++i)
+ finprob[i] = CCGen;
+ finprob[7] = GenGen;
+ for (i=0; i<8; ++i)
+ for (j=0; j<8; ++j) {
+ int e = i*8+j;
+ transf[e] = i;
+ transt[e] = j;
+ transprob[e] = CCShift[(j+7-i)%7];
+ if (i==7)
+ transprob[e] = GenCC;
+ if (j==7)
+ transprob[e] = CCGen;
+ if (i==7 && j==7)
+ transprob[e] = GenGen;
+ }
+
+ hmm = MALLOC(1,HMM);
+ hmm->nstates = 8;
+ hmm->initprob = initprob;
+ hmm->finprob = finprob;
+ hmm->transf = transf;
+ hmm->transt = transt;
+ hmm->ntrans = 64;
+ hmm->transprob = transprob;
+ hmm->outprobs = context;
+ return hmm;
+}
+/*---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+
+void *Calloc (n,elsize)
+ int n,elsize;
+{
+ void *ptr;
+ ptr = (void*)calloc(n,elsize);
+ if (ptr) return ptr;
+ fprintf(stderr,"Memory shortage!!\n");
+ exit(-1);
+}
+
+void *Realloc (ptr,n)
+ void *ptr;
+ int n;
+{
+ ptr = (void*)realloc(ptr,n);
+ if (ptr) return ptr;
+ fprintf(stderr,"Memory shortage!!\n");
+ exit(-1);
+}
+
+
+