1 /* Last changed Time-stamp: <2007-05-09 16:11:21 ivo> */
3 partiton function for RNA secondary structures
10 $Log: part_func_co.c,v $
11 Revision 1.10 2007/05/10 17:27:01 ivo
12 make sure the relative error eps is positive in newton iteration
14 Revision 1.9 2006/05/10 15:12:11 ivo
15 some compiler choked on double semicolon after declaration
17 Revision 1.8 2006/04/05 12:52:31 ivo
18 Fix performance bug (O(n^4) loop)
20 Revision 1.7 2006/01/19 11:30:04 ivo
21 compute_probabilities should only look at one dimer at a time
23 Revision 1.6 2006/01/18 12:55:40 ivo
24 major cleanup of berni code
25 fix bugs related to confusing which free energy is returned by co_pf_fold()
27 Revision 1.5 2006/01/16 11:32:25 ivo
28 small bug in multiloop pair probs
30 Revision 1.4 2006/01/05 18:13:40 ivo
33 Revision 1.3 2006/01/04 15:14:29 ivo
34 fix bug in concentration calculations
36 Revision 1.2 2004/12/23 12:14:41 berni
37 *** empty log message ***
39 Revision 1.1 2004/12/22 10:46:17 berni
41 Partition function Cofolding 0.9, Computation of concentrations.
43 Revision 1.16 2003/08/04 09:14:09 ivo
44 finish up stochastic backtracking
46 Revision 1.15 2002/03/19 16:51:12 ivo
47 more on stochastic backtracking (still incomplete)
49 Revision 1.13 2001/11/16 17:30:04 ivo
50 add stochastic backtracking (still incomplete)
58 #include <float.h> /* #defines FLT_MAX ... */
62 #include "energy_par.h"
63 #include "fold_vars.h"
67 #include "loop_energies.h"
68 #include "part_func.h"
69 #include "part_func_co.h"
77 PRIVATE char rcsid[] UNUSED = "$Id: part_func_co.c,v 1.10 2007/05/10 17:27:01 ivo Exp $";
79 #define ISOLATED 256.0
82 #define SAME_STRAND(I,J) (((I)>=cut_point)||((J)<cut_point))
84 /* #define SAME_STRAND(I,J) (((J)<cut_point)||((I)>=cut_point2)||(((I)>=cut_point)&&((J)<cut_point2)))
88 #################################
90 #################################
93 double F_monomer[2] = {0,0}; /* free energies of the two monomers */
96 #################################
98 #################################
100 PRIVATE FLT_OR_DBL *expMLbase=NULL;
101 PRIVATE FLT_OR_DBL *q=NULL, *qb=NULL, *qm=NULL, *qm1=NULL, *qqm=NULL, *qqm1=NULL, *qq=NULL, *qq1=NULL;
102 PRIVATE FLT_OR_DBL *prml=NULL, *prm_l=NULL, *prm_l1=NULL, *q1k=NULL, *qln=NULL, *probs=NULL;
103 PRIVATE FLT_OR_DBL *scale=NULL;
104 PRIVATE pf_paramT *pf_params = NULL;
105 PRIVATE char *ptype=NULL; /* precomputed array of pair types */
106 PRIVATE int *jindx=NULL;
107 PRIVATE int *my_iindx=NULL;
108 PRIVATE int init_length; /* length in last call to init_pf_fold() */
109 PRIVATE int do_bppm = 1; /* do backtracking per default */
110 PRIVATE short *S=NULL, *S1=NULL;
111 PRIVATE char *pstruc=NULL;
112 PRIVATE char *sequence=NULL;
113 PRIVATE double alpha = 1.0;
114 PRIVATE int struct_constrained = 0;
118 /* NOTE: all variables are assumed to be uninitialized if they are declared as threadprivate
120 #pragma omp threadprivate(expMLbase, q, qb, qm, qm1, qqm, qqm1, qq, qq1, prml, prm_l, prm_l1, q1k, qln,\
121 scale, pf_params, ptype, jindx, my_iindx, init_length, S, S1, pstruc, sequence, probs, do_bppm, alpha, struct_constrained)
127 #################################
128 # PRIVATE FUNCTION DECLARATIONS #
129 #################################
131 PRIVATE void init_partfunc_co(int length, pf_paramT *parameters);
132 PRIVATE void pf_co(const char *sequence);
133 PRIVATE void pf_co_bppm(const char *sequence, char *structure);
134 PRIVATE double *Newton_Conc(double ZAB, double ZAA, double ZBB, double concA, double concB,double* ConcVec);
135 PRIVATE void scale_pf_params(unsigned int length, pf_paramT *parameters);
136 PRIVATE void get_arrays(unsigned int length);
137 PRIVATE void make_ptypes(const short *S, const char *structure);
138 PRIVATE void backtrack(int i, int j);
142 #################################
143 # BEGIN OF FUNCTION DEFINITIONS #
144 #################################
147 PRIVATE void init_partfunc_co(int length, pf_paramT *parameters){
148 if (length<1) nrerror("init_pf_fold: length must be greater 0");
151 /* Explicitly turn off dynamic threads */
153 free_co_pf_arrays(); /* free previous allocation */
155 if (init_length>0) free_co_pf_arrays(); /* free previous allocation */
159 nonstandard_arithmetic();
166 get_arrays((unsigned) length);
167 scale_pf_params((unsigned) length, parameters);
168 init_length = length;
171 PRIVATE void get_arrays(unsigned int length){
174 if((length +1) >= (unsigned int)sqrt((double)INT_MAX))
175 nrerror("get_arrays@part_func_co.c: sequence length exceeds addressable range");
177 size = sizeof(FLT_OR_DBL) * ((length+1)*(length+2)/2);
178 q = (FLT_OR_DBL *) space(size);
179 qb = (FLT_OR_DBL *) space(size);
180 qm = (FLT_OR_DBL *) space(size);
181 probs = (FLT_OR_DBL *) space(size);
182 qm1 = (FLT_OR_DBL *) space(size);
183 q1k = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
184 qln = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
185 qq = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
186 qq1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
187 qqm = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
188 qqm1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
189 prm_l = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
190 prm_l1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
191 prml = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
192 expMLbase = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
193 scale = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
194 ptype = (char *) space(sizeof(char)*((length+1)*(length+2)/2));
195 my_iindx = get_iindx(length);
196 iindx = get_iindx(length); /* for backward compatibility and Perl wrapper */
197 jindx = get_indx(length);
200 PUBLIC void free_co_pf_arrays(void){
205 if(ptype) free(ptype);
212 if(prm_l) free(prm_l);
213 if(prm_l1) free(prm_l1);
215 if(probs) free(probs);
216 if(expMLbase) free(expMLbase);
217 if(scale) free(scale);
218 if(my_iindx) free(my_iindx);
219 if(iindx) free(iindx); /* for backward compatibility and Perl wrapper */
220 if(jindx) free(jindx);
225 q = qb = qm = qm1 = qq = qq1 = qqm = qqm1 = q1k = qln = prm_l = prm_l1 = prml = expMLbase = scale = probs = NULL;
228 my_iindx = jindx = iindx = NULL;
231 standard_arithmetic();
239 /*-----------------------------------------------------------------*/
240 PUBLIC cofoldF co_pf_fold(char *sequence, char *structure){
241 return co_pf_fold_par(sequence, structure, NULL, do_backtrack, fold_constrained);
244 PUBLIC cofoldF co_pf_fold_par(char *sequence,
246 pf_paramT *parameters,
255 n = (int) strlen(sequence);
256 do_bppm = calculate_bppm;
257 struct_constrained = is_constrained;
260 /* always init everything since all global static variables are uninitialized when entering a thread */
261 init_partfunc_co(n, parameters);
263 if(parameters) init_partfunc_co(n, parameters);
264 else if (n > init_length) init_partfunc_co(n, parameters);
265 else if (fabs(pf_params->temperature - temperature)>1e-6) update_co_pf_params_par(n, parameters);
268 /* printf("mirnatog=%d\n",mirnatog); */
271 S = encode_sequence(sequence, 0);
273 S1 = encode_sequence(sequence, 1);
275 make_ptypes(S, structure);
279 if (backtrack_type=='C') Q = qb[my_iindx[1]-n];
280 else if (backtrack_type=='M') Q = qm[my_iindx[1]-n];
281 else Q = q[my_iindx[1]-n];
282 /* ensemble free energy in Kcal/mol */
283 if (Q<=FLT_MIN) fprintf(stderr, "pf_scale too large\n");
284 free_energy = (-log(Q)-n*log(pf_params->pf_scale))*pf_params->kT/1000.0;
285 /* in case we abort because of floating point errors */
286 if (n>1600) fprintf(stderr, "free energy = %8.2f\n", free_energy);
287 /*probability of molecules being bound together*/
290 /*Computation of "real" Partition function*/
291 /*Need that for concentrations*/
293 double kT, pbound, QAB, QToT, Qzero;
295 kT = pf_params->kT/1000.0;
296 Qzero=q[my_iindx[1]-n];
297 QAB=(q[my_iindx[1]-n]-q[my_iindx[1]-(cut_point-1)]*q[my_iindx[cut_point]-n])*pf_params->expDuplexInit;
298 /*correction for symmetry*/
299 if((n-(cut_point-1)*2)==0) {
300 if ((strncmp(sequence, sequence+cut_point-1, cut_point-1))==0) {
304 QToT=q[my_iindx[1]-(cut_point-1)]*q[my_iindx[cut_point]-n]+QAB;
305 pbound=1-(q[my_iindx[1]-(cut_point-1)]*q[my_iindx[cut_point]-n]/QToT);
306 X.FAB = -kT*(log(QToT)+n*log(pf_params->pf_scale));
307 X.F0AB = -kT*(log(Qzero)+n*log(pf_params->pf_scale));
308 X.FcAB = (QAB>1e-17) ? -kT*(log(QAB)+n*log(pf_params->pf_scale)) : 999;
309 X.FA = -kT*(log(q[my_iindx[1]-(cut_point-1)]) + (cut_point-1)*log(pf_params->pf_scale));
310 X.FB = -kT*(log(q[my_iindx[cut_point]-n]) + (n-cut_point+1)*log(pf_params->pf_scale));
312 /* printf("QAB=%.9f\tQtot=%.9f\n",QAB/scale[n],QToT/scale[n]);*/
315 X.FA = X.FB = X.FAB = X.F0AB = free_energy;
319 /* backtracking to construct binding probabilities of pairs*/
321 pf_co_bppm(sequence, structure);
323 * Backward compatibility:
324 * This block may be removed if deprecated functions
325 * relying on the global variable "pr" vanish from within the package!
331 pr = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL) * ((n+1)*(n+2)/2));
332 memcpy(pr, probs, sizeof(FLT_OR_DBL) * ((n+1)*(n+2)/2));
339 /* forward recursion of pf cofolding */
340 PRIVATE void pf_co(const char *sequence){
341 int n, i,j,k,l, ij, u,u1,ii, type, type_2, tt;
342 FLT_OR_DBL temp, Qmax=0;
343 FLT_OR_DBL qbt1, *tmp;
344 FLT_OR_DBL expMLclosing;
346 int noGUclosure = pf_params->model_details.noGUclosure;
348 max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX;
349 n = (int) strlen(sequence);
351 expMLclosing = pf_params->expMLclosing;
354 /*array initialization ; qb,qm,q
355 qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */
357 /* for (d=0; d<=TURN; d++) */
358 for (i=1; i<=n/*-d*/; i++) {
365 qq[i]=qq1[i]=qqm[i]=qqm1[i]=prm_l[i]=prm_l1[i]=prml[i]=0;
367 for (j=TURN+2;j<=n; j++) {
368 for (i=j-TURN-1; i>=1; i--) {
369 /* construction of partition function of segment i,j*/
370 /*firstly that given i bound to j : qb(i,j) */
371 u = j-i-1; ij = my_iindx[i]-j;
375 /*hairpin contribution*/
377 if (((type==3)||(type==4))&&noGUclosure) qbt1 = 0;
379 qbt1 = exp_E_Hairpin(u, type, S1[i+1], S1[j-1], sequence+i-1, pf_params)*scale[u+2];
383 /* interior loops with interior pair k,l */
384 for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++) {
386 for (l=MAX2(k+TURN+1,j-1-MAXLOOP+u1); l<j; l++) {
387 if ((SAME_STRAND(i,k))&&(SAME_STRAND(l,j))){
388 type_2 = ptype[my_iindx[k]-l];
390 type_2 = rtype[type_2];
391 qbt1 += qb[my_iindx[k]-l] *
392 exp_E_IntLoop(u1, j-l-1, type, type_2,
393 S1[i+1], S1[j-1], S1[k-1], S1[l+1], pf_params)*scale[u1+j-l+1];
398 /*multiple stem loop contribution*/
399 ii = my_iindx[i+1]; /* ii-k=[i+1,k-1] */
401 if (SAME_STRAND(i,i+1) && SAME_STRAND(j-1,j)) {
402 for (k=i+2; k<=j-1; k++) {
403 if (SAME_STRAND(k-1,k))
404 temp += qm[ii-(k-1)]*qqm1[k];
407 temp*=exp_E_MLstem(tt, S1[j-1], S1[i+1], pf_params)*scale[2];
413 if (!SAME_STRAND(i,j)){
415 temp=q[my_iindx[i+1]-(cut_point-1)]*q[my_iindx[cut_point]-(j-1)];
416 if ((j==cut_point)&&(i==cut_point-1)) temp=scale[2];
417 else if (i==cut_point-1) temp=q[my_iindx[cut_point]-(j-1)]*scale[1];
418 else if (j==cut_point) temp=q[my_iindx[i+1]-(cut_point-1)]*scale[1];
419 if (j>cut_point) temp*=scale[1];
420 if (i<cut_point-1) temp*=scale[1];
421 temp *= exp_E_ExtLoop(tt, SAME_STRAND(j-1,j) ? S1[j-1] : -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, pf_params);
425 } /* end if (type!=0) */
427 /* construction of qqm matrix containing final stem
428 contributions to multiple loop partition function
430 if (SAME_STRAND(j-1,j)) {
431 qqm[i] = qqm1[i]*expMLbase[1];
434 if (type&&SAME_STRAND(i-1,i)&&SAME_STRAND(j,j+1)) {
436 qbt1 *= exp_E_MLstem(type, (i>1) ? S1[i-1] : -1, (j<n) ? S1[j+1] : -1, pf_params);
440 if (qm1) qm1[jindx[j]+i] = qqm[i]; /* for stochastic backtracking */
443 /*construction of qm matrix containing multiple loop
444 partition function contributions from segment i,j */
446 ii = my_iindx[i]; /* ii-k=[i,k] */
448 for (k=i+1; k<=j; k++) {
449 if (SAME_STRAND(k-1,k)) temp += (qm[ii-(k-1)])*qqm[k];
450 if (SAME_STRAND(i,k)) temp += expMLbase[k-i]*qqm[k];
454 qm[ij] = (temp + qqm[i]);
456 /*auxiliary matrix qq for cubic order q calculation below */
459 qbt1 *= exp_E_ExtLoop(type, ((i>1)&&(SAME_STRAND(i-1,i))) ? S1[i-1] : -1, ((j<n)&&(SAME_STRAND(j,j+1))) ? S1[j+1] : -1, pf_params);
461 qq[i] = qq1[i]*scale[1] + qbt1;
462 /*construction of partition function for segment i,j */
463 temp = 1.0*scale[1+j-i] + qq[i];
464 for (k=i; k<=j-1; k++) temp += q[ii-k]*qq[k+1];
469 if (Qmax>max_real/10.)
470 fprintf(stderr, "Q close to overflow: %d %d %g\n", i,j,temp);
472 if (temp>=max_real) {
473 PRIVATE char msg[128];
474 snprintf(msg, 127, "overflow in co_pf_fold while calculating q[%d,%d]\n"
475 "use larger pf_scale", i,j);
479 tmp = qq1; qq1 =qq; qq =tmp;
480 tmp = qqm1; qqm1=qqm; qqm=tmp;
484 /* backward recursion of pf cofolding */
485 PRIVATE void pf_co_bppm(const char *sequence, char *structure){
486 int n, i,j,k,l, ij, kl, ii, ll, type, type_2, tt, ov=0;
487 FLT_OR_DBL temp, Qmax=0, prm_MLb;
488 FLT_OR_DBL prmt,prmt1;
490 FLT_OR_DBL expMLclosing;
493 max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX;
494 n = (int) strlen(sequence);
496 expMLclosing = pf_params->expMLclosing;
498 /* backtracking to construct binding probabilities of pairs*/
499 if ((S != NULL) && (S1 != NULL)) {
500 FLT_OR_DBL *Qlout, *Qrout;
502 Qrout=(FLT_OR_DBL *)space(sizeof(FLT_OR_DBL) * (n+2));
503 Qlout=(FLT_OR_DBL *)space(sizeof(FLT_OR_DBL) * (cut_point+2));
505 for (k=1; k<=n; k++) {
506 q1k[k] = q[my_iindx[1] - k];
507 qln[k] = q[my_iindx[k] -n];
512 /* pr = q; / * recycling */
514 /* 1. exterior pair i,j and initialization of pr array */
515 for (i=1; i<=n; i++) {
516 for (j=i; j<=MIN2(i+TURN,n); j++) probs[my_iindx[i]-j] = 0;
517 for (j=i+TURN+1; j<=n; j++) {
520 if (type&&(qb[ij]>0.)) {
521 probs[ij] = q1k[i-1]*qln[j+1]/q1k[n];
522 probs[ij] *= exp_E_ExtLoop(type, ((i>1)&&(SAME_STRAND(i-1,i))) ? S1[i-1] : -1, ((j<n)&&(SAME_STRAND(j,j+1))) ? S1[j+1] : -1, pf_params);
528 for (l=n; l>TURN+1; l--) {
530 /* 2. bonding k,l as substem of 2:loop enclosed by i,j */
531 for (k=1; k<l-TURN; k++) {
533 type_2 = ptype[kl]; type_2 = rtype[type_2];
534 if (qb[kl]==0) continue;
536 for (i=MAX2(1,k-MAXLOOP-1); i<=k-1; i++)
537 for (j=l+1; j<=MIN2(l+ MAXLOOP -k+i+2,n); j++) {
538 if ((SAME_STRAND(i,k))&&(SAME_STRAND(l,j))){
539 ij = my_iindx[i] - j;
542 probs[kl] += probs[ij]*exp_E_IntLoop(k-i-1, j-l-1, type, type_2,
543 S1[i+1], S1[j-1], S1[k-1], S1[l+1], pf_params)*scale[k-i+j-l];
548 /* 3. bonding k,l as substem of multi-loop enclosed by i,j */
550 if ((l<n)&&(SAME_STRAND(l,l+1)))
551 for (k=2; k<l-TURN; k++) {
555 ii = my_iindx[i]; /* ii-j=[i,j] */
556 ll = my_iindx[l+1]; /* ll-j=[l+1,j] */
557 tt = ptype[ii-(l+1)]; tt=rtype[tt];
558 if (SAME_STRAND(i,k)){
559 prmt1 = probs[ii-(l+1)]*expMLclosing;
560 prmt1 *= exp_E_MLstem(tt, S1[l], S1[i+1], pf_params);
561 for (j=l+2; j<=n; j++) {
562 if (SAME_STRAND(j-1,j)){ /*??*/
563 tt = ptype[ii-j]; tt = rtype[tt];
564 prmt += probs[ii-j]*exp_E_MLstem(tt, S1[j-1], S1[i+1], pf_params)*qm[ll-(j-1)];
570 prmt *= expMLclosing;
572 prm_l[i] = prm_l1[i]*expMLbase[1]+prmt1;
574 prm_MLb = prm_MLb*expMLbase[1] + prml[i];
575 /* same as: prm_MLb = 0;
576 for (i=1; i<=k-1; i++) prm_MLb += prml[i]*expMLbase[k-i-1]; */
578 prml[i] = prml[ i] + prm_l[i];
580 if (qb[kl] == 0.) continue;
584 for (i=1;i<=k-2; i++) {
585 if ((SAME_STRAND(i,i+1))&&(SAME_STRAND(k-1,k))){
586 temp += prml[i]*qm[my_iindx[i+1] - (k-1)];
589 temp *= exp_E_MLstem( tt,
590 ((k>1)&&SAME_STRAND(k-1,k)) ? S1[k-1] : -1,
591 ((l<n)&&SAME_STRAND(l,l+1)) ? S1[l+1] : -1,
592 pf_params) * scale[2];
595 if (probs[kl]>Qmax) {
597 if (Qmax>max_real/10.)
598 fprintf(stderr, "P close to overflow: %d %d %g %g\n",
599 i, j, probs[kl], qb[kl]);
601 if (probs[kl]>=max_real) {
606 } /* end for (k=..) multloop*/
607 else /* set prm_l to 0 to get prm_l1 to be 0 */
608 for (i=0; i<=n; i++) prm_l[i]=0;
610 tmp = prm_l1; prm_l1=prm_l; prm_l=tmp;
611 /*computation of .(..(...)..&..). type features?*/
612 if (cut_point<=0) continue; /* no .(..(...)..&..). type features*/
613 if ((l==n)||(l<=2)) continue; /* no .(..(...)..&..). type features*/
614 /*new version with O(n^3)??*/
618 for (t=n; t>l; t--) {
619 for (k=1; k<cut_point; k++) {
621 type=rtype[ptype[kt]];
622 temp = probs[kt] * exp_E_ExtLoop(type, S1[t-1], (SAME_STRAND(k,k+1)) ? S1[k+1] : -1, pf_params) * scale[2];
623 if (l+1<t) temp*=q[my_iindx[l+1]-(t-1)];
624 if (SAME_STRAND(k,k+1)) temp*=q[my_iindx[k+1]-(cut_point-1)];
629 for (k=l-1; k>=cut_point; k--) {
630 if (qb[my_iindx[k]-l]) {
634 temp *= exp_E_ExtLoop(type, (k>cut_point) ? S1[k-1] : -1, (l < n) ? S1[l+1] : -1, pf_params);
635 if (k>cut_point) temp*=q[my_iindx[cut_point]-(k-1)];
640 else if (l==cut_point ) {
642 for (t=2; t<cut_point;t++) {
643 for (s=1; s<t; s++) {
644 for (k=cut_point; k<=n; k++) {
647 type=rtype[ptype[sk]];
648 temp=probs[sk]*exp_E_ExtLoop(type, (SAME_STRAND(k-1,k)) ? S1[k-1] : -1, S1[s+1], pf_params)*scale[2];
649 if (s+1<t) temp*=q[my_iindx[s+1]-(t-1)];
650 if (SAME_STRAND(k-1,k)) temp*=q[my_iindx[cut_point]-(k-1)];
657 else if (l<cut_point) {
658 for (k=1; k<l; k++) {
659 if (qb[my_iindx[k]-l]) {
660 type=ptype[my_iindx[k]-l];
662 temp *= exp_E_ExtLoop(type, (k>1) ? S1[k-1] : -1, (l<(cut_point-1)) ? S1[l+1] : -1, pf_params);
663 if (l+1<cut_point) temp*=q[my_iindx[l+1]-(cut_point-1)];
664 probs[my_iindx[k]-l]+=temp;
668 } /* end for (l=..) */
672 for (j=i+TURN+1; j<=n; j++) {
678 bppm_to_structure(structure, probs, n);
679 } /* end if (do_backtrack)*/
681 if (ov>0) fprintf(stderr, "%d overflows occurred while backtracking;\n"
682 "you might try a smaller pf_scale than %g\n",
683 ov, pf_params->pf_scale);
687 PRIVATE void scale_pf_params(unsigned int length, pf_paramT *parameters){
689 double kT, scaling_factor;
691 if(pf_params) free(pf_params);
694 pf_params = get_boltzmann_factor_copy(parameters);
697 set_model_details(&md);
698 pf_params = get_boltzmann_factors(temperature, alpha, md, pf_scale);
701 scaling_factor = pf_params->pf_scale;
702 kT = pf_params->kT; /* kT in cal/mol */
704 /* scaling factors (to avoid overflows) */
705 if (scaling_factor == -1) { /* mean energy for random sequences: 184.3*length cal */
706 scaling_factor = exp(-(-185+(pf_params->temperature-37.)*7.27)/kT);
707 if (scaling_factor<1) scaling_factor=1;
708 pf_params->pf_scale = scaling_factor;
711 scale[1] = 1./scaling_factor;
713 expMLbase[1] = pf_params->expMLbase/scaling_factor;
714 for (i=2; i<=length; i++) {
715 scale[i] = scale[i/2]*scale[i-(i/2)];
716 expMLbase[i] = pow(pf_params->expMLbase, (double)i) * scale[i];
720 /*----------------------------------------------------------------------*/
722 /*----------------------------------------------------------------------*/
724 /*---------------------------------------------------------------------------*/
726 PUBLIC void update_co_pf_params(int length){
727 update_co_pf_params_par(length, NULL);
730 PUBLIC void update_co_pf_params_par(int length, pf_paramT *parameters){
732 scale_pf_params((unsigned) length, parameters);
735 /*---------------------------------------------------------------------------*/
736 PRIVATE void make_ptypes(const short *S, const char *structure) {
738 int noLP = pf_params->model_details.noLP;
741 for (k=1; k<=n-TURN-1; k++)
742 for (l=1; l<=2; l++) {
743 int type,ntype=0,otype=0;
746 type = pair[S[i]][S[j]];
747 while ((i>=1)&&(j<=n)) {
748 if ((i>1)&&(j<n)) ntype = pair[S[i-1]][S[j+1]];
749 if (noLP && (!otype) && (!ntype))
750 type = 0; /* i.j can only form isolated pairs */
751 qb[my_iindx[i]-j] = 0.;
752 ptype[my_iindx[i]-j] = (char) type;
760 if (struct_constrained&&(structure!=NULL)) {
761 constrain_ptypes(structure, (unsigned int)n, ptype, NULL, TURN, 1);
762 for(j=1; j<=n; j++) {
763 switch (structure[j-1]) {
764 case 'l': /*only intramolecular basepairing*/
765 if (j<cut_point) for (l=cut_point; l<=n; l++) ptype[my_iindx[j]-l] = 0;
766 else for (l=1; l<cut_point; l++) ptype[my_iindx[l]-j] =0;
768 case 'e': /*only intermolecular bp*/
770 for (l=1; l<j; l++) ptype[my_iindx[l]-j] =0;
771 for (l=j+1; l<cut_point; l++) ptype[my_iindx[j]-l] = 0;
774 for (l=cut_point; l<j; l++) ptype[my_iindx[l]-j] =0;
775 for (l=j+1; l<=n; l++) ptype[my_iindx[j]-l] = 0;
781 if (mirnatog==1) { /*microRNA toggle: no intramolec. bp in 2. molec*/
782 for (j=cut_point; j<n; j++) {
783 for (l=j+1; l<=n; l++) {
784 ptype[my_iindx[j]-l] = 0;
791 stochastic backtracking in pf_fold arrays
792 returns random structure S with Boltzman probabilty
793 p(S) = exp(-E(S)/kT)/Z
795 PRIVATE void backtrack_qm1(int i,int j) {
796 /* i is paired to l, i<l<j; backtrack in qm1 to find l */
799 r = urn() * qm1[jindx[j]+i];
801 for (qt=0., l=i+TURN+1; l<=j; l++) {
804 qt += qb[ii-l]*exp_E_MLstem(type, S1[i-1], S1[l+1], pf_params) * expMLbase[j-l];
807 if (l>j) nrerror("backtrack failed in qm1");
811 PRIVATE void backtrack(int i, int j) {
812 int noGUclosure = pf_params->model_details.noGUclosure;
816 int k, l, type, u, u1;
818 pstruc[i-1] = '('; pstruc[j-1] = ')';
820 r = urn() * qb[my_iindx[i]-j];
821 type = ptype[my_iindx[i]-j];
823 /*hairpin contribution*/
824 if (((type==3)||(type==4))&&noGUclosure) qbt1 = 0;
826 qbt1 = exp_E_Hairpin(u, type, S1[i+1], S1[j-1], sequence+i-1, pf_params)*scale[u+2];
828 if (qbt1>r) return; /* found the hairpin we're done */
830 for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++) {
832 for (l=MAX2(k+TURN+1,j-1-MAXLOOP+u1); l<j; l++) {
834 type_2 = ptype[my_iindx[k]-l];
836 type_2 = rtype[type_2];
837 qbt1 += qb[my_iindx[k]-l] *
838 exp_E_IntLoop(u1, j-l-1, type, type_2,
839 S1[i+1], S1[j-1], S1[k-1], S1[l+1], pf_params)*scale[u1+j-l+1];
851 /* backtrack in multi-loop */
857 /* find the first split index */
858 ii = my_iindx[i]; /* ii-j=[i,j] */
859 jj = jindx[j]; /* jj+i=[j,i] */
860 for (qt=0., k=i+1; k<j; k++) qt += qm[ii-(k-1)]*qm1[jj+k];
862 for (qt=0., k=i+1; k<j; k++) {
863 qt += qm[ii-(k-1)]*qm1[jj+k];
866 if (k>=j) nrerror("backtrack failed, can't find split index ");
872 /* now backtrack [i ... j] in qm[] */
875 r = urn() * qm[ii - j];
878 for (k=i+1; k<=j; k++) {
879 qt += (qm[ii-(k-1)]+expMLbase[k-i])*qm1[jj+k];
882 if (k>j) nrerror("backtrack failed in qm");
886 if (k<i+TURN) break; /* no more pairs */
887 r = urn() * (qm[ii-(k-1)] + expMLbase[k-i]);
888 if (expMLbase[k-i] >= r) break; /* no more pairs */
894 PUBLIC void compute_probabilities(double FAB, double FA,double FB,
896 struct plist *prA, struct plist *prB,
898 /*computes binding probabilities and dimer free energies*/
902 struct plist *lp1, *lp2;
905 mykT=pf_params->kT/1000.;
907 /* pair probabilities in pr are relative to the null model (without DuplexInit) */
909 /*Compute probabilities pAB, pAA, pBB*/
911 pAB=1.-exp((1/mykT)*(FAB-FA-FB));
913 /* compute pair probabilities given that it is a dimer */
918 for (lp1=prAB; lp1->j>0; lp1++) {
921 while (offset+lp2->i < i && lp2->i>0) lp2++;
922 if (offset+lp2->i == i)
923 while ((offset+lp2->j) < j && (lp2->j>0)) lp2++;
924 if (lp2->j == 0) {lp2=prB; offset=Alength;}/* jump to next list */
925 if ((offset+lp2->i==i) && (offset+lp2->j ==j)) {
929 lp1->p=(lp1->p-(1-pAB)*pp)/pAB;
931 warn_user("part_func_co: numeric instability detected, probability below zero!");
938 PRIVATE double *Newton_Conc(double KAB, double KAA, double KBB, double concA, double concB,double* ConcVec) {
939 double TOL, EPS, xn, yn, det, cA, cB;
941 /*Newton iteration for computing concentrations*/
944 TOL=1e-6; /*Tolerance for convergence*/
945 ConcVec=(double*)space(5*sizeof(double)); /* holds concentrations */
947 /* det = (4.0 * KAA * cA + KAB *cB + 1.0) * (4.0 * KBB * cB + KAB *cA + 1.0) - (KAB *cB) * (KAB *cA); */
948 det = 1 + 16. *KAA*KBB*cA*cB + KAB*(cA+cB) + 4.*KAA*cA + 4.*KBB*cB + 4.*KAB*(KBB*cB*cB + KAA*cA*cA);
949 /* xn = ( (2.0 * KBB * cB*cB + KAB *cA *cB + cB - concB) * (KAB *cA) -
950 (2.0 * KAA * cA*cA + KAB *cA *cB + cA - concA) * (4.0 * KBB * cB + KAB *cA + 1.0) ) /det; */
951 xn = ( (2.0 * KBB * cB*cB + cB - concB) * (KAB *cA) - KAB*cA*cB*(4. * KBB*cB + 1.) -
952 (2.0 * KAA * cA*cA + cA - concA) * (4.0 * KBB * cB + KAB *cA + 1.0) ) /det;
953 /* yn = ( (2.0 * KAA * cA*cA + KAB *cA *cB + cA - concA) * (KAB *cB) -
954 (2.0 * KBB * cB*cB + KAB *cA *cB + cB - concB) * (4.0 * KAA * cA + KAB *cB + 1.0) ) /det; */
955 yn = ( (2.0 * KAA * cA*cA + cA - concA) * (KAB *cB) - KAB*cA*cB*(4. * KAA*cA + 1.) -
956 (2.0 * KBB * cB*cB + cB - concB) * (4.0 * KAA * cA + KAB *cB + 1.0) ) /det;
957 EPS = fabs(xn/cA) + fabs(yn/cB);
962 fprintf(stderr, "Newton did not converge after %d steps!!\n",i);
967 ConcVec[0]= cA*cB*KAB ;/*AB concentration*/
968 ConcVec[1]= cA*cA*KAA ;/*AA concentration*/
969 ConcVec[2]= cB*cB*KBB ;/*BB concentration*/
970 ConcVec[3]= cA; /* A concentration*/
971 ConcVec[4]= cB; /* B concentration*/
976 PUBLIC struct ConcEnt *get_concentrations(double FcAB, double FcAA, double FcBB, double FEA, double FEB, double *startconc)
978 /*takes an array of start concentrations, computes equilibrium concentrations of dimers, monomers, returns array of concentrations in strucutre ConcEnt*/
981 struct ConcEnt *Concentration;
982 double KAA, KAB, KBB, kT;
984 kT=pf_params->kT/1000.;
985 Concentration=(struct ConcEnt *)space(20*sizeof(struct ConcEnt));
986 /* Compute equilibrium constants */
987 /* again note the input free energies are not from the null model (without DuplexInit) */
989 KAA = exp(( 2.0 * FEA - FcAA)/kT);
990 KBB = exp(( 2.0 * FEB - FcBB)/kT);
991 KAB = exp(( FEA + FEB - FcAB)/kT);
992 /* printf("Kaa..%g %g %g\n", KAA, KBB, KAB); */
993 for (i=0; ((startconc[i]!=0)||(startconc[i+1]!=0));i+=2) {
994 ConcVec=Newton_Conc(KAB, KAA, KBB, startconc[i], startconc[i+1], ConcVec);
995 Concentration[i/2].A0=startconc[i];
996 Concentration[i/2].B0=startconc[i+1];
997 Concentration[i/2].ABc=ConcVec[0];
998 Concentration[i/2].AAc=ConcVec[1];
999 Concentration[i/2].BBc=ConcVec[2];
1000 Concentration[i/2].Ac=ConcVec[3];
1001 Concentration[i/2].Bc=ConcVec[4];
1003 if (!(((i+2)/2)%20)) {
1004 Concentration=(struct ConcEnt *)xrealloc(Concentration,((i+2)/2+20)*sizeof(struct ConcEnt));
1009 return Concentration;
1012 PUBLIC FLT_OR_DBL *export_co_bppm(void){
1016 /*###########################################*/
1017 /*# deprecated functions below #*/
1018 /*###########################################*/
1021 PUBLIC struct plist *get_plist(struct plist *pl, int length, double cut_off) {
1023 /*get pair probibilities out of pr array*/
1026 for (i=1; i<length; i++) {
1027 for (j=i+1; j<=length; j++) {
1028 if (pr[my_iindx[i]-j]<cut_off) continue;
1029 if (count==n*length-1) {
1031 pl=(struct plist *)xrealloc(pl,n*length*sizeof(struct plist));
1035 pl[count++].p=pr[my_iindx[i]-j];
1036 /* printf("gpl: %2d %2d %.9f\n",i,j,pr[my_iindx[i]-j]);*/
1040 pl[count].j=0; /*->??*/
1042 pl=(struct plist *)xrealloc(pl,(count)*sizeof(struct plist));
1046 PUBLIC void init_co_pf_fold(int length){ /* DO NOTHING */ }