1 /* Last changed Time-stamp: <2009-02-24 14:37:05 ivo> */
3 partiton function and base pair probabilities
4 for RNA secvondary structures
5 of a set of aligned sequences
20 #include <float.h> /* #defines FLT_MIN */
24 #include "energy_par.h"
25 #include "fold_vars.h"
30 #include "loop_energies.h"
31 #include "part_func.h"
39 static char rcsid[] = "$Id: alipfold.c,v 1.17 2009/02/24 14:21:33 ivo Exp $";
41 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
42 #define NEW_NINIO 1 /* new asymetry penalty */
43 #define ISOLATED 256.0
45 #define MINPSCORE -2 * UNIT
49 #################################
50 # PUBLIC GLOBAL VARIABLES #
51 #################################
55 #################################
56 # PRIVATE GLOBAL VARIABLES #
57 #################################
59 PRIVATE FLT_OR_DBL *expMLbase=NULL;
60 PRIVATE FLT_OR_DBL *q=NULL, *qb=NULL, *qm=NULL, *qm1=NULL, *qqm=NULL, *qqm1=NULL, *qq=NULL, *qq1=NULL;
61 PRIVATE FLT_OR_DBL *prml=NULL, *prm_l=NULL, *prm_l1=NULL, *q1k=NULL, *qln=NULL;
62 PRIVATE FLT_OR_DBL *probs=NULL;
63 PRIVATE FLT_OR_DBL *scale=NULL;
64 PRIVATE short *pscore=NULL; /* precomputed array of covariance bonus/malus */
65 /* some additional things for circfold */
66 PRIVATE int circular=0;
67 PRIVATE FLT_OR_DBL qo, qho, qio, qmo, *qm2=NULL;
68 PRIVATE int *jindx=NULL;
69 PRIVATE int *my_iindx=NULL;
70 PRIVATE int do_bppm = 1; /* do backtracking per default */
71 PRIVATE short **S=NULL;
72 PRIVATE short **S5=NULL; /*S5[s][i] holds next base 5' of i in sequence s*/
73 PRIVATE short **S3=NULL; /*S3[s][i] holds next base 3' of i in sequence s*/
74 PRIVATE char **Ss=NULL;
75 PRIVATE unsigned short **a2s=NULL;
76 PRIVATE int N_seq = 0;
77 PRIVATE pf_paramT *pf_params = NULL;
78 PRIVATE char *pstruc=NULL;
79 PRIVATE double alpha = 1.0;
80 PRIVATE int struct_constrained = 0;
84 /* NOTE: all variables are assumed to be uninitialized if they are declared as threadprivate
86 #pragma omp threadprivate(expMLbase, q, qb, qm, qm1, qqm, qqm1, qq, qq1,\
87 probs, prml, prm_l, prm_l1, q1k, qln,\
88 scale, pscore, circular,\
89 qo, qho, qio, qmo, qm2, jindx, my_iindx,\
90 S, S5, S3, Ss, a2s, N_seq, pf_params, pstruc, alpha, struct_constrained)
95 #################################
96 # PRIVATE FUNCTION DECLARATIONS #
97 #################################
100 PRIVATE void init_alipf_fold(int length, int n_seq, pf_paramT *parameters);
101 PRIVATE void scale_pf_params(unsigned int length, int n_seq, pf_paramT *parameters);
102 PRIVATE void get_arrays(unsigned int length);
103 PRIVATE void make_pscores(const short *const *S, const char **AS, int n_seq, const char *structure);
104 PRIVATE pair_info *make_pairinfo(const short *const* S, const char **AS, int n_seq);
105 PRIVATE void alipf_circ(const char **sequences, char *structure);
106 PRIVATE void alipf_linear(const char **sequences, char *structure);
107 PRIVATE void alipf_create_bppm(const char **sequences, char *structure, struct plist **pl);
108 PRIVATE void backtrack(int i, int j, int n_seq, double *prob);
109 PRIVATE void backtrack_qm1(int i,int j, int n_seq, double *prob);
114 #################################
115 # BEGIN OF FUNCTION DEFINITIONS #
116 #################################
119 PRIVATE void init_alipf_fold(int length, int n_seq, pf_paramT *parameters){
120 if (length<1) nrerror("init_alipf_fold: length must be greater 0");
123 /* Explicitly turn off dynamic threads */
128 nonstandard_arithmetic();
135 free_alipf_arrays(); /* free previous allocation */
136 get_arrays((unsigned) length);
137 scale_pf_params((unsigned) length, n_seq, parameters);
141 *** Allocate memory for all matrices and other stuff
143 PRIVATE void get_arrays(unsigned int length){
146 if((length+1) >= (unsigned int)sqrt((double)INT_MAX))
147 nrerror("get_arrays@alipfold.c: sequence length exceeds addressable range");
149 size = sizeof(FLT_OR_DBL) * ((length+1)*(length+2)/2);
151 q = (FLT_OR_DBL *) space(size);
152 qb = (FLT_OR_DBL *) space(size);
153 qm = (FLT_OR_DBL *) space(size);
154 qm1 = (FLT_OR_DBL *) space(size);
155 qm2 = (circular) ? (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2)) : NULL;
157 pscore = (short *) space(sizeof(short)*((length+1)*(length+2)/2));
158 q1k = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
159 qln = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
160 qq = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
161 qq1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
162 qqm = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
163 qqm1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
164 prm_l = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
165 prm_l1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
166 prml = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
167 expMLbase = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
168 scale = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
170 my_iindx = get_iindx(length);
171 iindx = get_iindx(length); /* for backward compatibility and Perl wrapper */
172 jindx = get_indx(length);
175 /*----------------------------------------------------------------------*/
178 PUBLIC void free_alipf_arrays(void){
184 if(pscore) free(pscore);
191 if(prm_l) free(prm_l);
192 if(prm_l1) free(prm_l1);
194 if(expMLbase) free(expMLbase);
195 if(scale) free(scale);
196 if(my_iindx) free(my_iindx);
197 if(iindx) free(iindx); /* for backward compatibility and Perl wrapper */
198 if(jindx) free(jindx);
201 free_sequence_arrays(N_seq, &S, &S5, &S3, &a2s, &Ss);
206 q = probs = qb = qm = qm1 = qm2 = qq = qq1 = qqm = qqm1 = q1k = qln = prml = prm_l = prm_l1 = expMLbase = scale = NULL;
207 my_iindx = jindx = iindx = NULL;
211 standard_arithmetic();
219 /*-----------------------------------------------------------------*/
220 PUBLIC float alipf_fold(const char **sequences, char *structure, plist **pl){
221 return alipf_fold_par(sequences, structure, pl, NULL, do_backtrack, fold_constrained, 0);
224 PUBLIC float alipf_circ_fold(const char **sequences, char *structure, plist **pl){
225 return alipf_fold_par(sequences, structure, pl, NULL, do_backtrack, fold_constrained, 1);
228 PUBLIC float alipf_fold_par(const char **sequences,
231 pf_paramT *parameters,
240 circular = is_circular;
241 do_bppm = calculate_bppm;
242 struct_constrained = is_constrained;
245 oldAliEn = 1; /* may be removed if circular alipf fold works with gapfree stuff */
247 n = (int) strlen(sequences[0]);
248 for (s=0; sequences[s]!=NULL; s++);
251 init_alipf_fold(n, n_seq, parameters);
253 alloc_sequence_arrays(sequences, &S, &S5, &S3, &a2s, &Ss, circular);
254 make_pscores((const short *const*)S, sequences, n_seq, structure);
256 alipf_linear(sequences, structure);
258 /* calculate post processing step for circular */
261 alipf_circ(sequences, structure);
263 if (backtrack_type=='C') Q = qb[my_iindx[1]-n];
264 else if (backtrack_type=='M') Q = qm[my_iindx[1]-n];
265 else Q = (circular) ? qo : q[my_iindx[1]-n];
267 /* ensemble free energy in Kcal/mol */
268 if (Q<=FLT_MIN) fprintf(stderr, "pf_scale too large\n");
269 free_energy = (-log(Q)-n*log(pf_params->pf_scale))*pf_params->kT/(1000.0 * n_seq);
270 /* in case we abort because of floating point errors */
271 if (n>1600) fprintf(stderr, "free energy = %8.2f\n", free_energy);
273 /* backtracking to construct binding probabilities of pairs*/
275 alipf_create_bppm(sequences, structure, pl);
277 * Backward compatibility:
278 * This block may be removed if deprecated functions
279 * relying on the global variable "pr" vanish from within the package!
287 PUBLIC FLT_OR_DBL *alipf_export_bppm(void){
293 PRIVATE void alipf_linear(const char **sequences, char *structure)
295 int s, n, n_seq, i,j,k,l, ij, u, u1, d, ii, *type, type_2, tt;
296 FLT_OR_DBL temp, Qmax=0;
297 FLT_OR_DBL qbt1, *tmp;
300 FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
302 for(s=0; sequences[s]!=NULL; s++);
305 n = (int) strlen(sequences[0]);
306 kTn = pf_params->kT/10.; /* kT in cal/mol */
307 type = (int *)space(sizeof(int) * n_seq);
309 /* array initialization ; qb,qm,q
310 qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */
312 for (d=0; d<=TURN; d++)
313 for (i=1; i<=n-d; i++) {
316 q[ij]=1.0*scale[d+1];
321 qq[i]=qq1[i]=qqm[i]=qqm1[i]=0;
323 for (j=TURN+2;j<=n; j++) {
324 for (i=j-TURN-1; i>=1; i--) {
326 /* construction of partition function for segment i,j */
327 /* calculate pf given that i and j pair: qb(i,j) */
330 for (s=0; s<n_seq; s++) {
331 type[s] = pair[S[s][i]][S[s][j]];
332 if (type[s]==0) type[s]=7;
335 if (psc>=cv_fact*MINPSCORE) { /* otherwise ignore this pair */
337 /* hairpin contribution */
338 for (qbt1=1,s=0; s<n_seq; s++) {
339 u = a2s[s][j-1]-a2s[s][i];
340 if (a2s[s][i]<1) continue;
343 strncpy(loopseq, Ss[s]+a2s[s][i]-1, 10);
345 qbt1 *= exp_E_Hairpin(u, type[s], S3[s][i], S5[s][j], loopseq, pf_params);
348 qbt1 *= scale[j-i+1];
350 /* interior loops with interior pair k,l */
351 for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++){
353 for (l=MAX2(k+TURN+1,j-1-MAXLOOP+k-i-1); l<=j-1; l++){
355 if (qb[my_iindx[k]-l]==0) {qloop=0; continue;}
356 for (s=0; s<n_seq; s++) {
357 u1 = a2s[s][k-1]-a2s[s][i]/*??*/;
358 type_2 = pair[S[s][l]][S[s][k]]; if (type_2 == 0) type_2 = 7;
359 qloop *= exp_E_IntLoop( u1, a2s[s][j-1]-a2s[s][l],
360 type[s], type_2, S3[s][i],
361 S5[s][j], S5[s][k], S3[s][l],
365 qbt1 += qb[my_iindx[k]-l] * qloop * scale[k-i+j-l];
369 /* multi-loop loop contribution */
370 ii = my_iindx[i+1]; /* ii-k=[i+1,k-1] */
372 for (k=i+2; k<=j-1; k++) temp += qm[ii-(k-1)]*qqm1[k];
373 for (s=0; s<n_seq; s++) {
375 temp *= exp_E_MLstem(tt, S5[s][j], S3[s][i], pf_params)* expMLclosing;
380 qb[ij] *= exp(psc/kTn);
381 } /* end if (type!=0) */
383 /* construction of qqm matrix containing final stem
384 contributions to multiple loop partition function
386 qqm[i] = qqm1[i]*expMLbase[1]; /* expMLbase[1]^n_seq */
387 for (qbt1=1, s=0; s<n_seq; s++) {
388 qbt1 *= exp_E_MLstem(type[s], (i>1) || circular ? S5[s][i] : -1, (j<n) || circular ? S3[s][j] : -1, pf_params);
390 qqm[i] += qb[ij]*qbt1;
391 qm1[jindx[j]+i] = qqm[i]; /* for circ folding and stochBT */
393 /* construction of qm matrix containing multiple loop
394 partition function contributions from segment i,j */
396 ii = my_iindx[i]; /* ii-k=[i,k-1] */
397 for (k=i+1; k<=j; k++)
398 temp += (qm[ii-(k-1)]+expMLbase[k-i])*qqm[k];
399 qm[ij] = (temp + qqm[i]);
401 /* auxiliary matrix qq for cubic order q calculation below */
404 for (s=0; s<n_seq; s++) {
405 qbt1 *= exp_E_ExtLoop(type[s], (i>1) || circular ? S5[s][i] : -1, (j<n) || circular ? S3[s][j] : -1, pf_params);
407 qq[i] = qq1[i]*scale[1] + qbt1;
409 /* construction of partition function for segment i,j */
410 temp = 1.0*scale[1+j-i] + qq[i];
411 for (k=i; k<=j-1; k++) temp += q[ii-k]*qq[k+1];
417 if (Qmax>FLT_MAX/10.)
418 fprintf(stderr, "%d %d %g\n", i,j,temp);
421 PRIVATE char msg[128];
422 sprintf(msg, "overflow in pf_fold while calculating q[%d,%d]\n"
423 "use larger pf_scale", i,j);
428 tmp = qq1; qq1 =qq; qq =tmp;
429 tmp = qqm1; qqm1=qqm; qqm=tmp;
435 PRIVATE void alipf_create_bppm(const char **sequences, char *structure, plist **pl)
438 int n, n_seq, i,j,k,l, ij, kl, ii, ll, tt, *type, ov=0;
439 FLT_OR_DBL temp, Qmax=0, prm_MLb;
440 FLT_OR_DBL prmt,prmt1;
441 FLT_OR_DBL qbt1, *tmp, tmp2, tmp3;
442 FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
445 n = (int) strlen(sequences[0]);
446 for (s=0; sequences[s]!=NULL; s++);
448 type = (int *)space(sizeof(int) * n_seq);
450 kTn = pf_params->kT/10.; /* kT in cal/mol */
453 prm_l[i]=prm_l1[i]=prml[i]=0;
455 /* backtracking to construct binding probabilities of pairs*/
458 for (k=1; k<=n; k++) {
459 q1k[k] = q[my_iindx[1] - k];
460 qln[k] = q[my_iindx[k] -n];
465 probs = q; /* recycling */
467 /* 1. exterior pair i,j and initialization of pr array */
469 for (i=1; i<=n; i++) {
470 for (j=i; j<=MIN2(i+TURN,n); j++) probs[my_iindx[i]-j] = 0;
471 for (j=i+TURN+1; j<=n; j++) {
474 probs[ij] = exp(pscore[ij]/kTn)/qo;
477 for (s=0; s<n_seq; s++) {
478 type[s] = pair[S[s][j]][S[s][i]];
479 if (type[s]==0) type[s]=7;
483 /* 1.1. Exterior Hairpin Contribution */
484 int u = i + n - j -1;
485 for (qbt1=1.,s=0; s<n_seq; s++) {
490 strcpy(loopseq , sequences[s]+j-1);
491 strncat(loopseq, sequences[s], i);
493 qbt1 *= exp_E_Hairpin(u, type[s], S[s][j+1], S[s][(i>1) ? i-1 : n], loopseq, pf_params);
495 tmp2 = qbt1 * scale[u];
497 /* 1.2. Exterior Interior Loop Contribution */
498 /* recycling of k and l... */
499 /* 1.2.1. first we calc exterior loop energy with constraint, that i,j */
500 /* delimtis the "left" part of the interior loop */
501 /* (j,i) is "outer pair" */
502 for(k=1; k < i-TURN-1; k++){
503 /* so first, lets calc the length of loop between j and k */
506 if(ln1>MAXLOOP) break;
507 lstart = ln1+i-1-MAXLOOP;
508 if(lstart<k+TURN+1) lstart = k + TURN + 1;
509 for(l=lstart; l < i; l++){
510 int ln2,ln2a,ln1a, type_2;
512 if(ln1+ln2>MAXLOOP) continue;
515 if (qb[my_iindx[k]-l]==0.){ qloop=0.; continue;}
517 for (s=0; s<n_seq; s++){
520 ln1a= a2s[s][n]-a2s[s][j];
522 type_2 = pair[S[s][l]][S[s][k]];
523 if (type_2 == 0) type_2 = 7;
524 qloop *= exp_E_IntLoop(ln1a, ln2a, type[s], type_2,
527 S[s][(k>1) ? k-1 : n],
528 S[s][l+1], pf_params);
530 tmp2 += qb[my_iindx[k] - l] * qloop * scale[ln1+ln2];
534 /* 1.2.2. second we calc exterior loop energy with constraint, that i,j */
535 /* delimtis the "right" part of the interior loop */
536 /* (l,k) is "outer pair" */
537 for(k=j+1; k < n-TURN; k++){
538 /* so first, lets calc the length of loop between l and i */
541 if((ln1 + i - 1)>MAXLOOP) break;
542 lstart = ln1+i-1+n-MAXLOOP;
543 if(lstart<k+TURN+1) lstart = k + TURN + 1;
544 for(l=lstart; l <= n; l++){
547 if(ln1+ln2>MAXLOOP) continue;
549 if (qb[my_iindx[k]-l]==0.){ qloop=0.; continue;}
551 for (s=0; s<n_seq; s++){
552 ln1 = a2s[s][k] - a2s[s][j+1];
553 ln2 = a2s[s][i-1] + a2s[s][n] - a2s[s][l];
554 type_2 = pair[S[s][l]][S[s][k]];
555 if (type_2 == 0) type_2 = 7;
556 qloop *= exp_E_IntLoop(ln2, ln1, type_2, type[s],
560 S3[s][j], pf_params);
562 tmp2 += qb[my_iindx[k] - l] * qloop * scale[(k-j-1)+(i-1+n-l)];
566 /* 1.3 Exterior multiloop decomposition */
567 /* 1.3.1 Middle part */
568 if((i>TURN+2) && (j<n-TURN-1)){
570 for (tmp3=1, s=0; s<n_seq; s++){
571 tmp3 *= exp_E_MLstem(rtype[type[s]], S5[s][i], S3[s][j], pf_params);
573 tmp2 += qm[my_iindx[1]-i+1] * qm[my_iindx[j+1]-n] * tmp3 * pow(expMLclosing,n_seq);
575 /* 1.3.2 Left part */
576 for(k=TURN+2; k < i-TURN-2; k++){
578 for (tmp3=1, s=0; s<n_seq; s++){
579 tmp3 *= exp_E_MLstem(rtype[type[s]], S5[s][i], S3[s][j], pf_params);
581 tmp2 += qm[my_iindx[1]-k] * qm1[jindx[i-1]+k+1] * tmp3 * expMLbase[n-j] * pow(expMLclosing,n_seq);
583 /* 1.3.3 Right part */
584 for(k=j+TURN+2; k < n-TURN-1;k++){
586 for (tmp3=1, s=0; s<n_seq; s++){
587 tmp3 *= exp_E_MLstem(rtype[type[s]], S5[s][i], S3[s][j], pf_params);
589 tmp2 += qm[my_iindx[j+1]-k] * qm1[jindx[n]+k+1] * tmp3 * expMLbase[i-1] * pow(expMLclosing,n_seq);
596 } /* end if(circular) */
598 for (i=1; i<=n; i++) {
599 for (j=i; j<=MIN2(i+TURN,n); j++) probs[my_iindx[i]-j] = 0;
600 for (j=i+TURN+1; j<=n; j++) {
603 probs[ij] = q1k[i-1]*qln[j+1]/q1k[n] * exp(pscore[ij]/kTn);
604 for (s=0; s<n_seq; s++) {
606 typ = pair[S[s][i]][S[s][j]]; if (typ==0) typ=7;
607 probs[ij] *= exp_E_ExtLoop(typ, (i>1) ? S5[s][i] : -1, (j<n) ? S3[s][j] : -1, pf_params);
613 } /* end if(!circular) */
614 for (l=n; l>TURN+1; l--) {
616 /* 2. bonding k,l as substem of 2:loop enclosed by i,j */
617 for (k=1; k<l-TURN; k++) {
620 if (qb[kl]==0) continue;
621 for (s=0; s<n_seq; s++) {
622 type[s] = pair[S[s][l]][S[s][k]];
623 if (type[s]==0) type[s]=7;
626 for (i=MAX2(1,k-MAXLOOP-1); i<=k-1; i++)
627 for (j=l+1; j<=MIN2(l+ MAXLOOP -k+i+2,n); j++) {
628 ij = my_iindx[i] - j;
629 if ((probs[ij]>0.)) {
631 for (s=0; s<n_seq; s++) {
633 typ = pair[S[s][i]][S[s][j]]; if (typ==0) typ=7;
634 qloop *= exp_E_IntLoop(a2s[s][k-1]-a2s[s][i], a2s[s][j-1]-a2s[s][l], typ, type[s], S3[s][i], S5[s][j], S5[s][k], S3[s][l], pf_params);
636 pp += probs[ij]*qloop*scale[k-i + j-l];
639 probs[kl] += pp * exp(pscore[kl]/kTn);
641 /* 3. bonding k,l as substem of multi-loop enclosed by i,j */
643 if (l<n) for (k=2; k<l-TURN; k++) {
647 ii = my_iindx[i]; /* ii-j=[i,j] */
648 ll = my_iindx[l+1]; /* ll-j=[l+1,j-1] */
649 prmt1 = probs[ii-(l+1)];
650 for (s=0; s<n_seq; s++) {
651 tt = pair[S[s][l+1]][S[s][i]]; if (tt==0) tt=7;
652 prmt1 *= exp_E_MLstem(tt, S5[s][l+1], S3[s][i], pf_params) * expMLclosing;
655 for (j=l+2; j<=n; j++) {
657 if (probs[ii-j]==0) continue;
658 for (s=0; s<n_seq; s++) {
659 tt=pair[S[s][j]][S[s][i]]; if (tt==0) tt=7;
660 pp *= exp_E_MLstem(tt, S5[s][j], S3[s][i], pf_params)*expMLclosing;
662 prmt += probs[ii-j]*pp*qm[ll-(j-1)];
667 prm_l[i] = prm_l1[i]*expMLbase[1]+prmt1; /* expMLbase[1]^n_seq */
669 prm_MLb = prm_MLb*expMLbase[1] + prml[i];
670 /* same as: prm_MLb = 0;
671 for (i=1; i<=k-1; i++) prm_MLb += prml[i]*expMLbase[k-i-1]; */
673 prml[i] = prml[ i] + prm_l[i];
675 if (qb[kl] == 0.) continue;
679 for (i=1;i<=k-2; i++)
680 temp += prml[i]*qm[my_iindx[i+1] - (k-1)];
682 for (s=0; s<n_seq; s++) {
683 tt=pair[S[s][k]][S[s][l]]; if (tt==0) tt=7;
684 temp *= exp_E_MLstem(tt, S5[s][k], S3[s][l], pf_params);
686 probs[kl] += temp * scale[2] * exp(pscore[kl]/kTn);
690 if (probs[kl]>Qmax) {
692 if (Qmax>FLT_MAX/10.)
693 fprintf(stderr, "%d %d %g %g\n", i,j,probs[kl],qb[kl]);
695 if (probs[kl]>FLT_MAX) {
700 } /* end for (k=2..) */
701 tmp = prm_l1; prm_l1=prm_l; prm_l=tmp;
703 } /* end for (l=..) */
706 for (j=i+TURN+1; j<=n; j++) {
708 probs[ij] *= qb[ij] *exp(-pscore[ij]/kTn);
711 /* did we get an adress where to save a pair-list? */
713 assign_plist_from_pr(pl, probs, n, /*cut_off:*/ 1e-6);
716 bppm_to_structure(structure, probs, n);
718 if (ov>0) fprintf(stderr, "%d overflows occurred while backtracking;\n"
719 "you might try a smaller pf_scale than %g\n",
720 ov, pf_params->pf_scale);
725 PRIVATE void scale_pf_params(unsigned int length, int n_seq, pf_paramT *parameters){
727 double kT, scaling_factor;
729 if(pf_params) free(pf_params);
732 pf_params = get_boltzmann_factor_copy(parameters);
735 set_model_details(&md);
736 pf_params = get_boltzmann_factors_ali(n_seq, temperature, alpha, md, pf_scale);
739 scaling_factor = pf_params->pf_scale;
740 kT = pf_params->kT / n_seq;
742 /* scaling factors (to avoid overflows) */
743 if (scaling_factor == -1) { /* mean energy for random sequences: 184.3*length cal */
744 scaling_factor = exp(-(-185+(pf_params->temperature-37.)*7.27)/kT);
745 if (scaling_factor<1) scaling_factor=1;
746 pf_params->pf_scale = scaling_factor;
749 scale[1] = 1./scaling_factor;
752 expMLbase[1] = pf_params->expMLbase/scaling_factor;
753 for (i=2; i<=length; i++) {
754 scale[i] = scale[i/2]*scale[i-(i/2)];
755 expMLbase[i] = pow(pf_params->expMLbase, (double)i) * scale[i];
760 /*---------------------------------------------------------------------------*/
761 PRIVATE int compare_pair_info(const void *pi1, const void *pi2) {
764 p1 = (pair_info *)pi1; p2 = (pair_info *)pi2;
765 for (nc1=nc2=0, i=1; i<=6; i++) {
766 if (p1->bp[i]>0) nc1++;
767 if (p2->bp[i]>0) nc2++;
769 /* sort mostly by probability, add
770 epsilon * comp_mutations/(non-compatible+1) to break ties */
771 return (p1->p + 0.01*nc1/(p1->bp[0]+1.)) <
772 (p2->p + 0.01*nc2/(p2->bp[0]+1.)) ? 1 : -1;
775 pair_info *make_pairinfo(const short *const* S, const char **AS, int n_seq) {
776 int i,j,n, num_p=0, max_p = 64;
780 max_p = 64; pi = space(max_p*sizeof(pair_info));
781 duck = (double *) space((n+1)*sizeof(double));
783 for (j=i+TURN+1; j<=n; j++)
784 if ((p=probs[my_iindx[i]-j])>0) {
785 duck[i] -= p * log(p);
786 duck[j] -= p * log(p);
790 for (j=i+TURN+1; j<=n; j++) {
791 if ((p=probs[my_iindx[i]-j])>=PMIN) {
796 pi[num_p].ent = duck[i]+duck[j]-p*log(p);
797 for (type=0; type<8; type++) pi[num_p].bp[type]=0;
798 for (s=0; s<n_seq; s++) {
799 if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap */
801 if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7;
802 else type = pair[S[s][i]][S[s][j]];
804 pi[num_p].bp[type]++;
809 pi = xrealloc(pi, max_p * sizeof(pair_info));
814 pi = xrealloc(pi, (num_p+1)*sizeof(pair_info));
816 qsort(pi, num_p, sizeof(pair_info), compare_pair_info );
820 /*---------------------------------------------------------------------------*/
821 PRIVATE void make_pscores(const short *const* S, const char **AS,
822 int n_seq, const char *structure) {
823 /* calculate co-variance bonus for each pair depending on */
824 /* compensatory/consistent mutations and incompatible seqs */
825 /* should be 0 for conserved pairs, >0 for good pairs */
826 #define NONE -10000 /* score for forbidden pairs */
827 int n,i,j,k,l,s,score;
829 int olddm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */
830 {0,0,2,2,1,2,2} /* CG */,
831 {0,2,0,1,2,2,2} /* GC */,
832 {0,2,1,0,2,1,2} /* GU */,
833 {0,1,2,2,0,2,1} /* UG */,
834 {0,2,2,1,2,0,2} /* AU */,
835 {0,2,2,2,1,2,0} /* UA */};
838 int noLP = pf_params->model_details.noLP;
840 n=S[0][0]; /* length of seqs */
842 if (RibosumFile !=NULL) dm=readribosum(RibosumFile);
843 else dm=get_ribosum(AS,n_seq,n);
845 else { /*use usual matrix*/
846 dm=(float **)space(7*sizeof(float*));
848 dm[i]=(float *)space(7*sizeof(float));
850 dm[i][j] = olddm[i][j];
854 n=S[0][0]; /* length of seqs */
855 for (i=1; i<n; i++) {
856 for (j=i+1; (j<i+TURN+1) && (j<=n); j++)
857 pscore[my_iindx[i]-j] = NONE;
858 for (j=i+TURN+1; j<=n; j++) {
859 int pfreq[8]={0,0,0,0,0,0,0,0};
860 for (s=0; s<n_seq; s++) {
862 if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap */
864 if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7;
865 else type = pair[S[s][i]][S[s][j]];
869 if (pfreq[0]*2+pfreq[7]>n_seq) { pscore[my_iindx[i]-j] = NONE; continue;}
870 for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */
872 /* scores for replacements between pairtypes */
873 /* consistent or compensatory mutations score 1 or 2 */
874 score += pfreq[k]*pfreq[l]*dm[k][l];
875 /* counter examples score -1, gap-gap scores -0.25 */
876 pscore[my_iindx[i]-j] = cv_fact *
877 ((UNIT*score)/n_seq - nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25));
881 if (noLP) /* remove unwanted pairs */
882 for (k=1; k<=n-TURN-1; k++)
883 for (l=1; l<=2; l++) {
884 int type,ntype=0,otype=0;
886 type = pscore[my_iindx[i]-j];
887 while ((i>=1)&&(j<=n)) {
888 if ((i>1)&&(j<n)) ntype = pscore[my_iindx[i-1]-j-1];
889 if ((otype<cv_fact*MINPSCORE)&&(ntype<cv_fact*MINPSCORE))
890 /* too many counterexamples */
891 pscore[my_iindx[i]-j] = NONE; /* i.j can only form isolated pairs */
899 if (struct_constrained&&(structure!=NULL)) {
900 int psij, hx, *stack;
901 stack = (int *) space(sizeof(int)*(n+1));
903 for(hx=0, j=1; j<=n; j++) {
904 switch (structure[j-1]) {
905 case 'x': /* j can't pair */
906 for (l=1; l<j-TURN; l++) pscore[my_iindx[l]-j] = NONE;
907 for (l=j+TURN+1; l<=n; l++) pscore[my_iindx[j]-l] = NONE;
912 case '<': /* j pairs upstream */
913 for (l=1; l<j-TURN; l++) pscore[my_iindx[l]-j] = NONE;
915 case ')': /* j pairs with i */
917 fprintf(stderr, "%s\n", structure);
918 nrerror("unbalanced brackets in constraints");
921 psij = pscore[my_iindx[i]-j]; /* store for later */
923 for (k=j; k<=n; k++) pscore[my_iindx[l]-k] = NONE;
925 for (l=i; l<=j; l++) pscore[my_iindx[k]-l] = NONE;
926 for (k=i+1; k<j; k++)
927 pscore[my_iindx[k]-j] = pscore[my_iindx[i]-k] = NONE;
928 pscore[my_iindx[i]-j] = (psij>0) ? psij : 0;
930 case '>': /* j pairs downstream */
931 for (l=j+TURN+1; l<=n; l++) pscore[my_iindx[j]-l] = NONE;
936 fprintf(stderr, "%s\n", structure);
937 nrerror("unbalanced brackets in constraint string");
947 /* calculate partition function for circular case */
948 /* NOTE: this is the postprocessing step ONLY */
949 /* You have to call alipf_linear first to calculate */
950 /* circular case!!! */
952 PUBLIC void alipf_circ(const char **sequences, char *structure){
954 int u, p, q, k, l, n_seq, s, *type;
955 FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
957 int n = (int) strlen(sequences[0]);
958 for (s=0; sequences[s]!=NULL; s++);
962 FLT_OR_DBL qbt1, qot;
963 kTn = pf_params->kT/10.; /* kT in cal/mol */
964 type = (int *)space(sizeof(int) * n_seq);
966 qo = qho = qio = qmo = 0.;
967 /* calculate the qm2 matrix */
968 for(k=1; k<n-TURN; k++){
970 for (u=k+TURN+1; u<n-TURN-1; u++)
971 qot += qm1[jindx[u]+k]*qm1[jindx[n]+(u+1)];
976 for(q=p+TURN+1;q<=n;q++){
979 if (u<TURN) continue;
981 psc = pscore[my_iindx[p]-q];
983 if(psc<cv_fact*MINPSCORE) continue;
985 /* 1. exterior hairpin contribution */
986 /* Note, that we do not scale Hairpin Energy by u+2 but by u cause the scale */
987 /* for the closing pair was already done in the forward recursion */
988 for (qbt1=1,s=0; s<n_seq; s++) {
990 type[s] = pair[S[s][q]][S[s][p]];
991 if (type[s]==0) type[s]=7;
994 strcpy(loopseq , sequences[s]+q-1);
995 strncat(loopseq, sequences[s], p);
997 qbt1 *= exp_E_Hairpin(u, type[s], S[s][q+1], S[s][(p>1) ? p-1 : n], loopseq, pf_params);
999 qho += qb[my_iindx[p]-q] * qbt1 * scale[u];
1001 /* 2. exterior interior loop contribution*/
1003 for(k=q+1; k < n; k++){
1006 if(ln1+p-1>MAXLOOP) break;
1007 lstart = ln1+p-1+n-MAXLOOP;
1008 if(lstart<k+TURN+1) lstart = k + TURN + 1;
1009 for(l=lstart;l <= n; l++){
1012 ln2 = (p - 1) + (n - l);
1013 if((ln1+ln2) > MAXLOOP) continue;
1015 if (qb[my_iindx[k]-l]==0.){ qloop=0.; continue;}
1017 for (s=0; s<n_seq; s++){
1018 int ln1a=a2s[s][k-1]-a2s[s][q];
1019 int ln2a=a2s[s][n]-a2s[s][l]+a2s[s][p-1];
1020 type_2 = pair[S[s][l]][S[s][k]];
1021 if (type_2 == 0) type_2 = 7;
1022 qloop *= exp_E_IntLoop(ln2a, ln1a, type_2, type[s], S3[s][l], S5[s][k], S5[s][p], S3[s][q], pf_params);
1024 qio += qb[my_iindx[p]-q] * qb[my_iindx[k]-l] * qloop * scale[ln1+ln2];
1026 } /* end of kl double loop */
1028 } /* end of pq double loop */
1030 /* 3. exterior multiloop contribution */
1031 for(k=TURN+2; k<n-2*TURN-3; k++)
1032 qmo += qm[my_iindx[1]-k] * qm2[k+1] * pow(expMLclosing,n_seq);
1034 /* add additional pf of 1.0 to take open chain into account */
1035 qo = qho + qio + qmo + 1.0*scale[n];
1041 /*brauch ma nurnoch pscores!*/
1042 PUBLIC char *alipbacktrack(double *prob) {
1043 double r, gr, qt, kTn;
1044 int k,i,j, start,s,n, n_seq;
1048 nrerror("can't backtrack without pf arrays.\n"
1049 "Call pf_fold() before pbacktrack()");
1053 kTn = pf_params->kT/10.;
1056 for (k=1; k<=n; k++) {
1057 qln[k] = q[my_iindx[k] -n];
1062 pstruc = space((n+1)*sizeof(char));
1064 for (i=0; i<n; i++) pstruc[i] = '.';
1068 /* find i position of first pair */
1070 for (i=start; i<n; i++) {
1071 gr = urn() * qln[i];
1072 if (gr > qln[i+1]*scale[1]) {
1073 *prob=*prob*probs*(1-qln[i+1]*scale[1]/qln[i]);
1074 break; /* i is paired */
1076 probs*=qln[i+1]*scale[1]/qln[i];
1080 break; /* no more pairs */
1082 /* now find the pairing partner j */
1083 r = urn() * (qln[i] - qln[i+1]*scale[1]);
1084 for (qt=0, j=i+1; j<=n; j++) {
1086 /* type = ptype[my_iindx[i]-j];
1089 if (qb[my_iindx[i]-j]>0) {
1090 qkl = qb[my_iindx[i]-j]*qln[j+1]; /*if psc too small qb=0!*/
1091 for (s=0; s< n_seq; s++) {
1092 xtype=pair[S[s][i]][S[s][j]];
1093 if (xtype==0) xtype=7;
1094 qkl *= exp_E_ExtLoop(xtype, (i>1) ? S5[s][i] : -1, (j<n) ? S3[s][j] : -1, pf_params);
1096 qt += qkl; /*?*exp(pscore[my_iindx[i]-j]/kTn)*/
1098 *prob=*prob*(qkl/(qln[i] - qln[i+1]*scale[1]));/*probs*=qkl;*/
1099 break; /* j is paired */
1103 if (j==n+1) nrerror("backtracking failed in ext loop");
1105 backtrack(i,j, n_seq, prob); /*?*/
1112 PRIVATE void backtrack(int i, int j, int n_seq, double *prob) {
1113 /*backtrack given i,j basepair!*/
1114 double kTn = pf_params->kT/10.;
1116 int *type = (int *)space(sizeof(int) * n_seq);
1121 pstruc[i-1] = '('; pstruc[j-1] = ')';
1122 for (s=0; s<n_seq; s++) {
1123 type[s] = pair[S[s][i]][S[s][j]];
1124 if (type[s]==0) type[s]=7;
1126 r = urn() * (qb[my_iindx[i]-j]/exp(pscore[my_iindx[i]-j]/kTn)); /*?*exp(pscore[my_iindx[i]-j]/kTn)*/
1129 for (s=0; s<n_seq; s++){
1130 u = a2s[s][j-1]-a2s[s][i];
1131 if (a2s[s][i]<1) continue;
1134 strncpy(loopseq, Ss[s]+a2s[s][i]-1, 10);
1136 qbt1 *= exp_E_Hairpin(u, type[s], S3[s][i], S5[s][j], loopseq, pf_params);
1138 qbt1 *= scale[j-i+1];
1141 *prob=*prob*qbt1/(qb[my_iindx[i]-j]/exp(pscore[my_iindx[i]-j]/kTn));/*probs*=qbt1;*/
1143 return; /* found the hairpin we're done */
1146 for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++){
1148 for (l=MAX2(k+TURN+1,j-1-MAXLOOP+k-i-1); l<j; l++){
1151 if (qb[my_iindx[k]-l]==0) {qloop=0; continue;}
1152 for (s=0; s<n_seq; s++) {
1153 u1 = a2s[s][k-1]-a2s[s][i]/*??*/;
1154 type_2 = pair[S[s][l]][S[s][k]]; if (type_2 == 0) type_2 = 7;
1155 qloop *= exp_E_IntLoop(u1, a2s[s][j-1]-a2s[s][l], type[s], type_2,
1156 S3[s][i], S5[s][j],S5[s][k], S3[s][l], pf_params);
1158 qbt1 += qb[my_iindx[k]-l] * qloop * scale[k-i+j-l];
1161 *prob=*prob*qb[my_iindx[k]-l] * qloop * scale[k-i+j-l]/(qb[my_iindx[i]-j]/exp(pscore[my_iindx[i]-j]/kTn));
1163 prob*=qb[my_iindx[k]-l] * qloop * scale[k-i+j-l];
1168 if (qbt1 > r) break;
1174 *prob=*prob*(1-qbt1/(qb[my_iindx[i]-j]/exp(pscore[my_iindx[i]-j]/kTn)));
1179 /* backtrack in multi-loop */
1180 tempwert=(qb[my_iindx[i]-j]/exp(pscore[my_iindx[i]-j]/kTn));
1186 /* find the first split index */
1187 ii = my_iindx[i]; /* ii-j=[i,j] */
1188 jj = jindx[j]; /* jj+i=[j,i] */
1189 for (qt=0., k=i+1; k<j; k++) qttemp += qm[ii-(k-1)]*qm1[jj+k];
1191 for (qt=0., k=i+1; k<j; k++) {
1192 qt += qm[ii-(k-1)]*qm1[jj+k];
1194 *prob=*prob*qm[ii-(k-1)]*qm1[jj+k]/qttemp;/*qttemp;tempwert*/
1195 /* prob*=qm[ii-(k-1)]*qm1[jj+k];*/
1199 if (k>=j) nrerror("backtrack failed, can't find split index ");
1201 backtrack_qm1(k, j, n_seq, prob);
1205 /* now backtrack [i ... j] in qm[] */
1206 jj = jindx[j];/*habides??*/
1208 r = urn() * qm[ii - j];
1209 qt = qm1[jj+i]; k=i;
1211 for (k=i+1; k<=j; k++) {
1212 qt += (qm[ii-(k-1)]+expMLbase[k-i]/*n_seq??*/)*qm1[jj+k];
1214 *prob=*prob*(qm[ii-(k-1)]+expMLbase[k-i])*qm1[jj+k]/qm[ii - j];/*???*/
1220 *prob=*prob*qt/qm[ii - j];/*??*/
1222 if (k>j) nrerror("backtrack failed in qm");
1224 backtrack_qm1(k,j, n_seq, prob);
1226 if (k<i+TURN) break; /* no more pairs */
1227 r = urn() * (qm[ii-(k-1)] + expMLbase[k-i]);
1228 if (expMLbase[k-i] >= r) {
1229 break; /* no more pairs */
1230 *prob=*prob*expMLbase[k-i]/(qm[ii-(k-1)] + expMLbase[k-i]);
1239 PRIVATE void backtrack_qm1(int i,int j, int n_seq, double *prob) {
1240 /* i is paired to l, i<l<j; backtrack in qm1 to find l */
1242 double qt, r, tempz;
1243 r = urn() * qm1[jindx[j]+i];
1245 for (qt=0., l=i+TURN+1; l<=j; l++) {
1246 if (qb[ii-l]==0) continue;
1248 for (s=0; s<n_seq; s++) {
1249 xtype = pair[S[s][i]][S[s][l]];
1250 if (xtype==0) xtype=7;
1251 tempz*=exp_E_MLstem(xtype, S5[s][i], S3[s][l], pf_params);
1253 qt += qb[ii-l]*tempz*expMLbase[j-l];
1255 *prob=*prob*qb[ii-l]*tempz*expMLbase[j-l]/qm1[jindx[j]+i];
1256 /* probs*=qb[ii-l]*tempz*expMLbase[j-l];*/
1260 if (l>j) nrerror("backtrack failed in qm1");
1262 backtrack(i,l, n_seq, prob);
1265 PUBLIC FLT_OR_DBL *export_ali_bppm(void){
1269 /*-------------------------------------------------------------------------*/
1270 /* make arrays used for alipf_fold available to other routines */
1271 PUBLIC int get_alipf_arrays( short ***S_p,
1274 unsigned short ***a2s_p,
1282 if(qb == NULL) return(0); /* check if alipf_fold() has been called */
1283 *S_p = S; *S5_p = S5; *S3_p = S3;
1286 *qb_p = qb; *qm_p = qm;
1287 *q1k_p = q1k; *qln_p = qln;
1289 return(1); /* success */