1 /* Written by David Wilson */
2 /******************************************************************************
3 Operations for evaluating probabilities pertaining to a protein sequence.
4 The sequence is specified as a character string char *seq and a length.
5 ******************************************************************************/
11 #define MALLOC(NUMBER,TYPE) ((TYPE*)Malloc((NUMBER)*sizeof(TYPE)))
12 #define CALLOC(NUMBER,TYPE) ((TYPE*)Calloc((NUMBER),sizeof(TYPE)))
13 #define REALLOC(PTR,NUMBER,TYPE) ((TYPE*)Realloc((PTR),(NUMBER)*sizeof(TYPE)))
14 #define FREE(PTR) free(PTR)
17 /*********The following are defined in sc2seq_interface.c **************/
19 extern double many_pprobs[MAX_TABLE_NUMBER][NUM_RES_TYPE][POSNUM],
20 many_pprobp[MAX_TABLE_NUMBER][NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM];
23 extern double gprobs[NUM_RES_TYPE], gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM];
25 extern double (*pprobs)[POSNUM];
26 extern double (*pprobp)[NUM_RES_TYPE][POSNUM][POSNUM];
30 /************************************************************************/
34 void *Malloc(), *Calloc(), *Realloc(), Free();
35 /******************************************************************************
36 Given a sequence of normalized probabilities, alpha (beta) gives the
37 normalized probability that one sees the prefix upto (suffix from) the
38 Tth state, given this state is q. ;; Works even if alpha (beta) == nprobs.
39 The alpha and beta arrays are indexed as the nprobs array is.
40 ******************************************************************************/
41 Alphas (double *alpha, double *nprob, int length, HMM *hmm)
44 double *alphat, *nprobt;
46 bzero(alpha,length*n*sizeof(double));
47 for (i=n-1; i>=0; --i)
48 alpha[i] = hmm->initprob[i] * nprob[i];
49 for (t=1; t<length; ++t) {
52 for (i=hmm->ntrans-1; i>=0; --i)
53 alphat[hmm->transt[i]] += alpha[hmm->transf[i]] * hmm->transprob[i];
54 for (i=n-1; i>=0; --i)
55 alphat[i] *= nprobt[i];
60 Betas (double *beta, double *nprob, int length, HMM *hmm)
63 double *betat, *nprobt;
65 bzero(beta,length*n*sizeof(double));
66 betat = &(beta [n*(length-1)]);
67 nprobt = &(nprob[n*(length-1)]);
68 for (i=n-1; i>=0; --i)
69 betat[i] = hmm->finprob[i] * nprobt[i];
70 for (t=length-2; t>=0; --t) {
72 nprob = &(nprobt[-n]);
73 for (i=hmm->ntrans-1; i>=0; --i)
74 beta[hmm->transf[i]] += betat[hmm->transt[i]] * hmm->transprob[i];
75 for (i=n-1; i>=0; --i)
83 /******************************************************************************
84 Given a sequence, for each residue for each state output the probability
85 that this residue was in that state.
86 ******************************************************************************/
87 /* Contains a static variable pointing to a dynamically allocated array of
88 doubles, and a static variable giving the length of the previous array size
89 (initially 0). Memory is needed for 4*nstates*length. This value is
90 rounded up to the nearest power of 2 when determining how much memory to
93 double *Memory (amount)
95 static double *memory=(void*)0;
99 memory = MALLOC(memsize,double);
101 if (memsize<amount) {
102 for (memsize=1; memsize<amount; memsize *= 2);
103 memory = REALLOC(memory,memsize,double);
107 #define AlphaBeta(i) (nprob[i] ? alpha[i]*beta[i]/nprob[i] : 0)
111 /* Score sequence using the hidden Markov model */
112 /** Note that prob use is determined by extern setting of pprobs, pprobp */
113 void HMMScore (char *seq, int length, HMM *hmm,
114 double scores[MAXSEQLEN][POSNUM+1], int table,
118 double *memory, *alpha, *beta, *nprob, *gamma, sum;
122 memory = Memory(4*n*length);
123 gamma = &(memory[0]);
124 alpha = &(memory[1*n*length]);
125 beta = &(memory[2*n*length]);
126 nprob = &(memory[3*n*length]);
127 hmm->outprobs(nprob,seq,length);
128 Alphas(alpha,nprob,length,hmm);
129 Betas(beta,nprob,length,hmm);
130 for (t=0; t<length; ++t) {
132 for (i=n-1; i>=0; --i)
133 sum += gamma[i] = AlphaBeta(i);
134 for (i=n-1; i>=0; --i)
142 for (i=0; i<length; i++) {
143 scores[i][POSNUM] =0;
144 for (j=0; j<POSNUM; j++) {
145 scores[i][j] = memory[8*i +j];
146 if (scores[i][j] > scores[i][POSNUM])
147 scores[i][POSNUM] = scores[i][j];
149 if ((offset_to_use<=6) && (offset_to_use >=0) )
150 scores[i][POSNUM] = scores[i][(i+offset_to_use)%POSNUM];
154 CountTransitions (char *seq, int length, HMM *hmm, double *tbl, double *start, double *end, double *steady)
157 double *memory, *alpha, *beta, *at, *bt1, *nprob, sum=0, *contrib, econtrib;
160 memory = Memory(3*n*length+n);
161 alpha = &(memory[0*n*length]);
162 beta = &(memory[1*n*length]);
163 nprob = &(memory[2*n*length]);
164 contrib= &(memory[3*n*length]);
165 hmm->outprobs(nprob,seq,length);
166 Alphas(alpha,nprob,length,hmm);
167 Betas(beta,nprob,length,hmm);
168 for (sum=0,i=0; i<n; ++i)
169 sum += beta[i] * hmm->initprob[i];
170 for (i=0; i<m; ++i) {
172 at = alpha; bt1 = &(beta[n]);
173 for (t=0; t+1<length; ++t) {
174 econtrib += at[hmm->transf[i]] * hmm->transprob[i] * bt1[hmm->transt[i]];
178 tbl[i] += econtrib / sum;
180 bzero(contrib,n*sizeof(double));
181 for (i=n-1; i>=0; --i)
182 start[i] += AlphaBeta(i)/sum;
183 for (t=0; t<length; ++t) {
184 for (i=n-1; i>=0; --i)
185 contrib[i] += AlphaBeta(i);
190 for (i=n-1; i>=0; --i)
191 steady[i] += contrib[i]/sum;
192 alpha = &(alpha[-n]);
194 nprob = &(nprob[-n]);
195 for (i=n-1; i>=0; --i)
196 end[i] += AlphaBeta(i)/sum;
207 context (double *nprobs, char *seq, int length)
211 int x, y, xoffs, yoffs;
213 bzero(nprobs,length*8*sizeof(double));
215 /* Pairwise interactions */
216 for (xoffs=0; xoffs<8; ++xoffs)
217 for (dist=1; dist<=4; dist+=3) {
218 yoffs = (xoffs<7) ? (xoffs+dist)%7 : xoffs;
219 for (x = 0, y = x+dist; y < length; x++, y++) {
220 lnprob = (xoffs<7) ? (pprobp[seq[x]][seq[y]][xoffs][yoffs] -
221 pprobs[seq[x]][xoffs] -
222 pprobs[seq[y]][yoffs])
223 : (gprobp[seq[x]][seq[y]][dist-1] - gprobs[seq[x]] - gprobs[seq[y]]);
224 nprobs[8*x+xoffs] += lnprob;
225 nprobs[8*y+yoffs] += lnprob;
229 /* Singles component */
230 for (xoffs=0; xoffs<8; ++xoffs)
231 for (x = 0; x < length; x++) {
232 nprobs[8*x+xoffs] /= 4;
233 nprobs[8*x+xoffs] += (xoffs<7) ? pprobs[seq[x]][xoffs] : gprobs[seq[x]];
236 for (xoffs=0; xoffs<8; ++xoffs)
237 for (x = 0; x < length; x++)
238 nprobs[8*x+xoffs] -= nprobs[8*x+7];
239 /* Make position 'f' like genbank, and don't use 'b' and 'c' as much */
240 /* for (x = 0; x < length; x++) {
245 /* Convert from log */
246 for (xoffs=0; xoffs<8; ++xoffs)
247 for (x = 0; x < length; x++)
248 nprobs[8*x+xoffs] = exp(nprobs[8*x+xoffs]);
250 if ((nprobs[8*x+xoffs] = exp(nprobs[8*x+xoffs]))>10)
251 printf("Check out position %d, %c%c%c\n",x,
252 numaa(seq[x-1]), numaa(seq[x]), numaa(seq[x+1]));
257 /* The purpose of the next function to allow the new scoring method to be
258 easily interfaced with the old code so that histograms and the like may
259 be made. The long term future of this function is not a concern.
262 /* Create the CC HMM given global variable prob. functions. */
263 HMM *CCHMM (FILE *fgin, FILE *fpin)
266 double *initprob, *finprob, *transprob;
267 int *transf, *transt;
269 double FractionInCCs = 0.02;
270 double AverageCCLength = 40.0;
271 double CC, Gen, GenGen, GenCC, CCGen, CCCC, CCNewPhase;
275 GenCC = FractionInCCs/AverageCCLength;
277 CCGen = 1/AverageCCLength - (POSNUM-1)*CCNewPhase;
278 CCCC = 1-1/AverageCCLength;
280 CCShift[i] = CCNewPhase;
290 initprob = MALLOC(8,double);
291 finprob = MALLOC(8,double);
292 transprob = MALLOC(64,double);
293 transf = MALLOC(64,int);
294 transt = MALLOC(64,int);
295 /* tables = MALLOC(1,ProbTbls); */
303 for (j=0; j<8; ++j) {
307 transprob[e] = CCShift[(j+7-i)%7];
309 transprob[e] = GenCC;
311 transprob[e] = CCGen;
313 transprob[e] = GenGen;
318 hmm->initprob = initprob;
319 hmm->finprob = finprob;
320 hmm->transf = transf;
321 hmm->transt = transt;
323 hmm->transprob = transprob;
324 hmm->outprobs = context;
327 /*---------------------------------------------------------------------------*/
328 /*---------------------------------------------------------------------------*/
330 void *Calloc (n,elsize)
334 ptr = (void*)calloc(n,elsize);
336 fprintf(stderr,"Memory shortage!!\n");
340 void *Realloc (ptr,n)
344 ptr = (void*)realloc(ptr,n);
346 fprintf(stderr,"Memory shortage!!\n");