--- /dev/null
+/*
+ partiton function for RNA secondary structures
+
+ Ivo L Hofacker
+ Vienna RNA package
+*/
+/*
+ $Log: part_func.c,v $
+ Revision 1.29 2008/02/23 10:10:49 ivo
+ list returned from StackProb was sometimes not terminated correctly
+
+ Revision 1.28 2008/01/08 15:08:10 ivo
+ circular fold would fail for open chain
+
+ Revision 1.27 2007/12/05 13:04:04 ivo
+ add various circfold variants from Ronny
+
+ Revision 1.26 2007/09/19 12:41:56 ivo
+ add computation of centroid() structure for RNAfold -p
+
+ Revision 1.25 2007/04/30 15:12:00 ivo
+ merge RNAup into package
+
+ Revision 1.24 2007/03/03 17:57:44 ivo
+ make sure entries in scale[] decrease to 0
+
+ Revision 1.23 2006/12/01 12:40:23 ivo
+ undo Ulli's accidental commit
+
+ Revision 1.21 2006/08/04 15:39:06 ivo
+ new function stackProb returns probability for stacks
+ p[(i,j)(i+1,j-1)]
+
+ Revision 1.20 2004/08/12 12:14:46 ivo
+ update
+
+ Revision 1.19 2004/05/14 16:28:05 ivo
+ fix the bug in make_ptype here too (fixed in 1.27 of fold.c)
+
+ Revision 1.18 2004/02/17 10:46:52 ivo
+ make sure init_pf_fold is called before scale_parameters
+
+ Revision 1.17 2004/02/09 18:37:59 ivo
+ new mean_bp_dist() function to compute ensemble diversity
+
+ Revision 1.16 2003/08/04 09:14:09 ivo
+ finish up stochastic backtracking
+
+ Revision 1.15 2002/03/19 16:51:12 ivo
+ more on stochastic backtracking (still incomplete)
+
+ Revision 1.14 2002/02/08 17:37:23 ivo
+ set freed S,S1 pointers to NULL
+
+ Revision 1.13 2001/11/16 17:30:04 ivo
+ add stochastic backtracking (still incomplete)
+*/
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <float.h> /* #defines FLT_MAX ... */
+#include <limits.h>
+
+#include "utils.h"
+#include "energy_par.h"
+#include "fold_vars.h"
+#include "pair_mat.h"
+#include "params.h"
+#include "loop_energies.h"
+#include "gquad.h"
+#include "part_func.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#define ISOLATED 256.0
+
+/*
+#################################
+# GLOBAL VARIABLES #
+#################################
+*/
+PUBLIC int st_back = 0;
+
+/*
+#################################
+# PRIVATE VARIABLES #
+#################################
+*/
+PRIVATE FLT_OR_DBL *q = NULL, *qb=NULL, *qm = NULL, *qm1 = NULL, *qqm = NULL, *qqm1 = NULL, *qq = NULL, *qq1 = NULL;
+PRIVATE FLT_OR_DBL *probs=NULL, *prml=NULL, *prm_l=NULL, *prm_l1=NULL, *q1k=NULL, *qln=NULL;
+PRIVATE FLT_OR_DBL *scale=NULL;
+PRIVATE FLT_OR_DBL *expMLbase=NULL;
+PRIVATE FLT_OR_DBL qo=0., qho=0., qio=0., qmo=0., *qm2=NULL;
+PRIVATE int *jindx=NULL;
+PRIVATE int *my_iindx=NULL;
+PRIVATE int init_length = -1; /* length in last call to init_pf_fold() */
+PRIVATE int circular=0;
+PRIVATE int do_bppm = 1; /* do backtracking per default */
+PRIVATE int struct_constrained = 0;
+PRIVATE char *pstruc=NULL;
+PRIVATE char *sequence=NULL;
+PRIVATE char *ptype=NULL; /* precomputed array of pair types */
+PRIVATE pf_paramT *pf_params=NULL; /* the precomputed Boltzmann weights */
+PRIVATE short *S=NULL, *S1=NULL;
+PRIVATE int with_gquad = 0;
+
+PRIVATE FLT_OR_DBL *G = NULL, *Gj = NULL, *Gj1 = NULL;
+
+#ifdef _OPENMP
+
+#pragma omp threadprivate(q, qb, qm, qm1, qqm, qqm1, qq, qq1, prml, prm_l, prm_l1, q1k, qln,\
+ probs, scale, expMLbase, qo, qho, qio, qmo, qm2, jindx, my_iindx, init_length,\
+ circular, pstruc, sequence, ptype, pf_params, S, S1, do_bppm, struct_constrained,\
+ G, Gj, Gj1, with_gquad)
+
+#endif
+
+/*
+#################################
+# PRIVATE FUNCTION DECLARATIONS #
+#################################
+*/
+PRIVATE void init_partfunc(int length, pf_paramT *parameters);
+PRIVATE void scale_pf_params(unsigned int length, pf_paramT *parameters);
+PRIVATE void get_arrays(unsigned int length);
+PRIVATE void make_ptypes(const short *S, const char *structure);
+PRIVATE void pf_circ(const char *sequence, char *structure);
+PRIVATE void pf_linear(const char *sequence, char *structure);
+PRIVATE void pf_create_bppm(const char *sequence, char *structure);
+PRIVATE void backtrack(int i, int j);
+PRIVATE void backtrack_qm(int i, int j);
+PRIVATE void backtrack_qm1(int i,int j);
+PRIVATE void backtrack_qm2(int u, int n);
+
+/*
+#################################
+# BEGIN OF FUNCTION DEFINITIONS #
+#################################
+*/
+
+PRIVATE void init_partfunc(int length, pf_paramT *parameters){
+ if (length<1) nrerror("init_pf_fold: length must be greater 0");
+
+#ifdef _OPENMP
+/* Explicitly turn off dynamic threads */
+ omp_set_dynamic(0);
+ free_pf_arrays(); /* free previous allocation */
+#else
+ if (init_length>0) free_pf_arrays(); /* free previous allocation */
+#endif
+
+#ifdef SUN4
+ nonstandard_arithmetic();
+#else
+#ifdef HP9
+ fpsetfastmode(1);
+#endif
+#endif
+ make_pair_matrix();
+ get_arrays((unsigned) length);
+ scale_pf_params((unsigned) length, parameters);
+
+ init_length = length;
+}
+
+PRIVATE void get_arrays(unsigned int length){
+ unsigned int size;
+
+ if((length +1) >= (unsigned int)sqrt((double)INT_MAX))
+ nrerror("get_arrays@part_func.c: sequence length exceeds addressable range");
+
+ size = sizeof(FLT_OR_DBL) * ((length+1)*(length+2)/2);
+
+ q = (FLT_OR_DBL *) space(size);
+ qb = (FLT_OR_DBL *) space(size);
+ qm = (FLT_OR_DBL *) space(size);
+ qm1 = (st_back || circular) ? (FLT_OR_DBL *) space(size) : NULL;
+ qm2 = (circular) ? (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2)) : NULL;
+ probs = (do_bppm) ? (FLT_OR_DBL *) space(size) : NULL;
+
+ ptype = (char *) space(sizeof(char)*((length+1)*(length+2)/2));
+ q1k = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
+ qln = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ qq = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ qq1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ qqm = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ qqm1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ prm_l = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ prm_l1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ prml = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ expMLbase = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
+ scale = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+1));
+
+ Gj = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+ Gj1 = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL)*(length+2));
+
+ my_iindx = get_iindx(length);
+ iindx = get_iindx(length); /* for backward compatibility and Perl wrapper */
+ jindx = get_indx(length);
+}
+
+/**
+*** Allocate memory for all matrices and other stuff
+**/
+PUBLIC void free_pf_arrays(void){
+ if(q) free(q);
+ if(qb) free(qb);
+ if(qm) free(qm);
+ if(qm1) free(qm1);
+ if(qm2) free(qm2);
+ if(ptype) free(ptype);
+ if(qq) free(qq);
+ if(qq1) free(qq1);
+ if(qqm) free(qqm);
+ if(qqm1) free(qqm1);
+ if(q1k) free(q1k);
+ if(qln) free(qln);
+ if(probs) free(probs);
+ if(prm_l) free(prm_l);
+ if(prm_l1) free(prm_l1);
+ if(prml) free(prml);
+ if(expMLbase) free(expMLbase);
+ if(scale) free(scale);
+ if(my_iindx) free(my_iindx);
+ if(iindx) free(iindx); /* for backward compatibility and Perl wrapper */
+ if(jindx) free(jindx);
+ if(S) free(S);
+ if(S1) free(S1);
+ if(G) free(G);
+ if(Gj) free(Gj);
+ if(Gj1) free(Gj1);
+
+ S = S1 = NULL;
+ q = pr = probs = qb = qm = qm1 = qm2 = qq = qq1 = qqm = qqm1 = q1k = qln = prm_l = prm_l1 = prml = expMLbase = scale = G = Gj = Gj1 = NULL;
+ my_iindx = jindx = iindx = NULL;
+
+ ptype = NULL;
+
+#ifdef SUN4
+ standard_arithmetic();
+#else
+#ifdef HP9
+ fpsetfastmode(0);
+#endif
+#endif
+
+ init_length = 0;
+}
+
+/*-----------------------------------------------------------------*/
+PUBLIC float pf_fold(const char *sequence, char *structure){
+ return pf_fold_par(sequence, structure, NULL, do_backtrack, fold_constrained, 0);
+}
+
+PUBLIC float pf_circ_fold(const char *sequence, char *structure){
+ return pf_fold_par(sequence, structure, NULL, do_backtrack, fold_constrained, 1);
+}
+
+PUBLIC float pf_fold_par( const char *sequence,
+ char *structure,
+ pf_paramT *parameters,
+ int calculate_bppm,
+ int is_constrained,
+ int is_circular){
+
+ FLT_OR_DBL Q;
+ double free_energy;
+ int n = (int) strlen(sequence);
+
+ circular = is_circular;
+ do_bppm = calculate_bppm;
+ struct_constrained = is_constrained;
+
+#ifdef _OPENMP
+ init_partfunc(n, parameters);
+#else
+ if(parameters) init_partfunc(n, parameters);
+ else if (n > init_length) init_partfunc(n, parameters);
+ else if (fabs(pf_params->temperature - temperature)>1e-6) update_pf_params_par(n, parameters);
+#endif
+
+ with_gquad = pf_params->model_details.gquad;
+ S = encode_sequence(sequence, 0);
+ S1 = encode_sequence(sequence, 1);
+
+ make_ptypes(S, structure);
+
+ /* do the linear pf fold and fill all matrices */
+ pf_linear(sequence, structure);
+
+ if(circular)
+ pf_circ(sequence, structure); /* do post processing step for circular RNAs */
+
+ if (backtrack_type=='C') Q = qb[my_iindx[1]-n];
+ else if (backtrack_type=='M') Q = qm[my_iindx[1]-n];
+ else Q = (circular) ? qo : q[my_iindx[1]-n];
+
+ /* ensemble free energy in Kcal/mol */
+ if (Q<=FLT_MIN) fprintf(stderr, "pf_scale too large\n");
+ free_energy = (-log(Q)-n*log(pf_params->pf_scale))*pf_params->kT/1000.0;
+ /* in case we abort because of floating point errors */
+ if (n>1600) fprintf(stderr, "free energy = %8.2f\n", free_energy);
+
+ /* calculate base pairing probability matrix (bppm) */
+ if(do_bppm){
+ pf_create_bppm(sequence, structure);
+ /*
+ * Backward compatibility:
+ * This block may be removed if deprecated functions
+ * relying on the global variable "pr" vanish from within the package!
+ */
+ pr = probs;
+ /*
+ {
+ if(pr) free(pr);
+ pr = (FLT_OR_DBL *) space(sizeof(FLT_OR_DBL) * ((n+1)*(n+2)/2));
+ memcpy(pr, probs, sizeof(FLT_OR_DBL) * ((n+1)*(n+2)/2));
+ }
+ */
+ }
+ return free_energy;
+}
+
+PRIVATE void pf_linear(const char *sequence, char *structure){
+
+ int n, i,j,k,l, ij, u,u1,d,ii, type, type_2, tt, minl, maxl;
+
+ int noGUclosure;
+ FLT_OR_DBL expMLstem = 0.;
+
+ FLT_OR_DBL temp, Qmax=0;
+ FLT_OR_DBL qbt1, *tmp;
+
+ FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
+ double max_real;
+
+ max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX;
+
+ n = (int) strlen(sequence);
+
+
+ noGUclosure = pf_params->model_details.noGUclosure;
+
+ /*array initialization ; qb,qm,q
+ qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */
+
+ if(with_gquad){
+ expMLstem = exp_E_MLstem(0, -1, -1, pf_params);
+ G = get_gquad_pf_matrix(S, scale, pf_params);
+ }
+
+ for (d=0; d<=TURN; d++)
+ for (i=1; i<=n-d; i++) {
+ j=i+d;
+ ij = my_iindx[i]-j;
+ q[ij]=1.0*scale[d+1];
+ qb[ij]=qm[ij]=0.0;
+ }
+
+ for (i=1; i<=n; i++)
+ qq[i]=qq1[i]=qqm[i]=qqm1[i]=0;
+
+ for (j=TURN+2;j<=n; j++) {
+ for (i=j-TURN-1; i>=1; i--) {
+ /* construction of partition function of segment i,j*/
+ /*firstly that given i binds j : qb(i,j) */
+ u = j-i-1; ij = my_iindx[i]-j;
+ type = ptype[ij];
+ if (type!=0) {
+ /*hairpin contribution*/
+ if (((type==3)||(type==4))&&noGUclosure) qbt1 = 0;
+ else
+ qbt1 = exp_E_Hairpin(u, type, S1[i+1], S1[j-1], sequence+i-1, pf_params) * scale[u+2];
+ /* interior loops with interior pair k,l */
+ for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++) {
+ u1 = k-i-1;
+ for (l=MAX2(k+TURN+1,j-1-MAXLOOP+u1); l<j; l++) {
+ type_2 = ptype[my_iindx[k]-l];
+ if (type_2) {
+ type_2 = rtype[type_2];
+ qbt1 += qb[my_iindx[k]-l] * (scale[u1+j-l+1] *
+ exp_E_IntLoop(u1, j-l-1, type, type_2,
+ S1[i+1], S1[j-1], S1[k-1], S1[l+1], pf_params));
+ }
+ }
+ }
+ /*multiple stem loop contribution*/
+ ii = my_iindx[i+1]; /* ii-k=[i+1,k-1] */
+ temp = 0.0;
+ for (k=i+2; k<=j-1; k++) temp += qm[ii-(k-1)]*qqm1[k];
+ tt = rtype[type];
+ qbt1 += temp * expMLclosing * exp_E_MLstem(tt, S1[j-1], S1[i+1], pf_params) * scale[2];
+
+ if(with_gquad){
+ qbt1 += exp_E_GQuad_IntLoop(i, j, type, S1, G, my_iindx, pf_params) * scale[2];
+ }
+
+ qb[ij] = qbt1;
+ }
+ /* end if (type!=0) */
+ else
+ qb[ij] = 0.0;
+
+ /* construction of qqm matrix containing final stem
+ contributions to multiple loop partition function
+ from segment i,j */
+ qqm[i] = qqm1[i]*expMLbase[1];
+ if (type) {
+ qbt1 = qb[ij] * exp_E_MLstem(type, ((i>1) || circular) ? S1[i-1] : -1, ((j<n) || circular) ? S1[j+1] : -1, pf_params);
+ qqm[i] += qbt1;
+ }
+
+ if(with_gquad){
+ /*include gquads into qqm*/
+ qqm[i] += G[my_iindx[i]-j] * expMLstem;
+ }
+
+ if (qm1) qm1[jindx[j]+i] = qqm[i]; /* for stochastic backtracking and circfold */
+
+ /*construction of qm matrix containing multiple loop
+ partition function contributions from segment i,j */
+ temp = 0.0;
+ ii = my_iindx[i]; /* ii-k=[i,k-1] */
+ for (k=j; k>i; k--) temp += (qm[ii-(k-1)] + expMLbase[k-i])*qqm[k];
+ qm[ij] = (temp + qqm[i]);
+
+ /*auxiliary matrix qq for cubic order q calculation below */
+ qbt1=0.0;
+ if (type){
+ qbt1 += qb[ij];
+ qbt1 *= exp_E_ExtLoop(type, ((i>1) || circular) ? S1[i-1] : -1, ((j<n) || circular) ? S1[j+1] : -1, pf_params);
+ }
+
+ if(with_gquad){
+ qbt1 += G[ij];
+ }
+
+ qq[i] = qq1[i]*scale[1] + qbt1;
+
+ /*construction of partition function for segment i,j */
+ temp = 1.0*scale[1+j-i] + qq[i];
+ for (k=i; k<=j-1; k++) temp += q[ii-k]*qq[k+1];
+ q[ij] = temp;
+ if (temp>Qmax) {
+ Qmax = temp;
+ if (Qmax>max_real/10.)
+ fprintf(stderr, "Q close to overflow: %d %d %g\n", i,j,temp);
+ }
+ if (temp>=max_real) {
+ PRIVATE char msg[128];
+ sprintf(msg, "overflow in pf_fold while calculating q[%d,%d]\n"
+ "use larger pf_scale", i,j);
+ nrerror(msg);
+ }
+ }
+ tmp = qq1; qq1 =qq; qq =tmp;
+ tmp = qqm1; qqm1=qqm; qqm=tmp;
+
+ if(with_gquad){ /* rotate the auxilary g-quadruplex matrices */
+ tmp = Gj1; Gj1=Gj; Gj=tmp;
+ }
+ }
+}
+
+/* calculate partition function for circular case */
+/* NOTE: this is the postprocessing step ONLY */
+/* You have to call pf_linear first to calculate */
+/* complete circular case!!! */
+PRIVATE void pf_circ(const char *sequence, char *structure){
+
+ int u, p, q, k, l;
+ int noGUclosure;
+ int n = (int) strlen(sequence);
+
+ FLT_OR_DBL qot;
+ FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
+
+ noGUclosure = pf_params->model_details.noGUclosure;
+ qo = qho = qio = qmo = 0.;
+
+ /* construct qm2 matrix from qm1 entries */
+ for(k=1; k<n-TURN-1; k++){
+ qot = 0.;
+ for (u=k+TURN+1; u<n-TURN-1; u++)
+ qot += qm1[jindx[u]+k]*qm1[jindx[n]+(u+1)];
+ qm2[k] = qot;
+ }
+
+ for(p = 1; p < n; p++){
+ for(q = p + TURN + 1; q <= n; q++){
+ int type;
+ /* 1. get exterior hairpin contribution */
+ u = n-q + p-1;
+ if (u<TURN) continue;
+ type = ptype[my_iindx[p]-q];
+ if (!type) continue;
+ /* cause we want to calc the exterior loops, we need the reversed pair type from now on */
+ type=rtype[type];
+
+ char loopseq[10];
+ if (u<7){
+ strcpy(loopseq , sequence+q-1);
+ strncat(loopseq, sequence, p);
+ }
+ qho += (((type==3)||(type==4))&&noGUclosure) ? 0. : qb[my_iindx[p]-q] * exp_E_Hairpin(u, type, S1[q+1], S1[p-1], loopseq, pf_params) * scale[u];
+
+ /* 2. exterior interior loops, i "define" the (k,l) pair as "outer pair" */
+ /* so "outer type" is rtype[type[k,l]] and inner type is type[p,q] */
+ qot = 0.;
+ for(k=q+1; k < n; k++){
+ int ln1, lstart;
+ ln1 = k - q - 1;
+ if(ln1+p-1>MAXLOOP) break;
+ lstart = ln1+p-1+n-MAXLOOP;
+ if(lstart<k+TURN+1) lstart = k + TURN + 1;
+ for(l=lstart;l <= n; l++){
+ int ln2, type2;
+ ln2 = (p - 1) + (n - l);
+
+ if((ln1+ln2) > MAXLOOP) continue;
+
+ type2 = ptype[my_iindx[k]-l];
+ if(!type2) continue;
+ qio += qb[my_iindx[p]-q] * qb[my_iindx[k]-l] * exp_E_IntLoop(ln2, ln1, rtype[type2], type, S1[l+1], S1[k-1], S1[p-1], S1[q+1], pf_params) * scale[ln1+ln2];
+ }
+ } /* end of kl double loop */
+ }
+ } /* end of pq double loop */
+
+ /* 3. Multiloops */
+ for(k=TURN+2; k<n-2*TURN-3; k++)
+ qmo += qm[my_iindx[1]-k] * qm2[k+1] * expMLclosing;
+
+ /* add an additional pf of 1.0 to take the open chain into account too */
+ qo = qho + qio + qmo + 1.0*scale[n];
+}
+
+/* calculate base pairing probs */
+PUBLIC void pf_create_bppm(const char *sequence, char *structure){
+ int n, i,j,k,l, ij, kl, ii, i1, ll, type, type_2, tt, u1, ov=0;
+ FLT_OR_DBL temp, Qmax=0, prm_MLb;
+ FLT_OR_DBL prmt,prmt1;
+ FLT_OR_DBL *tmp;
+ FLT_OR_DBL tmp2;
+ FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
+ double max_real;
+
+ FLT_OR_DBL expMLstem = (with_gquad) ? exp_E_MLstem(0, -1, -1, pf_params) : 0;
+
+ max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX;
+
+ if((S != NULL) && (S1 != NULL)){
+ n = S[0];
+ Qmax=0;
+
+ for (k=1; k<=n; k++) {
+ q1k[k] = q[my_iindx[1] - k];
+ qln[k] = q[my_iindx[k] -n];
+ }
+ q1k[0] = 1.0;
+ qln[n+1] = 1.0;
+
+/* pr = q; */ /* recycling */
+
+
+ /* 1. exterior pair i,j and initialization of pr array */
+ if(circular){
+ for (i=1; i<=n; i++) {
+ for (j=i; j<=MIN2(i+TURN,n); j++)
+ probs[my_iindx[i]-j] = 0;
+ for (j=i+TURN+1; j<=n; j++) {
+ ij = my_iindx[i]-j;
+ type = ptype[ij];
+ if (type&&(qb[ij]>0.)) {
+ probs[ij] = 1./qo;
+ int rt = rtype[type];
+
+ /* 1.1. Exterior Hairpin Contribution */
+ int u = i + n - j -1;
+ /* get the loop sequence */
+ char loopseq[10];
+ if (u<7){
+ strcpy(loopseq , sequence+j-1);
+ strncat(loopseq, sequence, i);
+ }
+ tmp2 = exp_E_Hairpin(u, rt, S1[j+1], S1[i-1], loopseq, pf_params) * scale[u];
+
+ /* 1.2. Exterior Interior Loop Contribution */
+ /* 1.2.1. i,j delimtis the "left" part of the interior loop */
+ /* (j,i) is "outer pair" */
+ for(k=1; k < i-TURN-1; k++){
+ int ln1, lstart;
+ ln1 = k + n - j - 1;
+ if(ln1>MAXLOOP) break;
+ lstart = ln1+i-1-MAXLOOP;
+ if(lstart<k+TURN+1) lstart = k + TURN + 1;
+ for(l=lstart; l < i; l++){
+ int ln2, type_2;
+ type_2 = ptype[my_iindx[k]-l];
+ if (type_2==0) continue;
+ ln2 = i - l - 1;
+ if(ln1+ln2>MAXLOOP) continue;
+ tmp2 += qb[my_iindx[k] - l]
+ * exp_E_IntLoop(ln1,
+ ln2,
+ rt,
+ rtype[type_2],
+ S1[j+1],
+ S1[i-1],
+ S1[k-1],
+ S1[l+1],
+ pf_params)
+ * scale[ln1 + ln2];
+ }
+ }
+ /* 1.2.2. i,j delimtis the "right" part of the interior loop */
+ for(k=j+1; k < n-TURN; k++){
+ int ln1, lstart;
+ ln1 = k - j - 1;
+ if((ln1 + i - 1)>MAXLOOP) break;
+ lstart = ln1+i-1+n-MAXLOOP;
+ if(lstart<k+TURN+1) lstart = k + TURN + 1;
+ for(l=lstart; l <= n; l++){
+ int ln2, type_2;
+ type_2 = ptype[my_iindx[k]-l];
+ if (type_2==0) continue;
+ ln2 = i - 1 + n - l;
+ if(ln1+ln2>MAXLOOP) continue;
+ tmp2 += qb[my_iindx[k] - l]
+ * exp_E_IntLoop(ln2,
+ ln1,
+ rtype[type_2],
+ rt,
+ S1[l+1],
+ S1[k-1],
+ S1[i-1],
+ S1[j+1],
+ pf_params)
+ * scale[ln1 + ln2];
+ }
+ }
+ /* 1.3 Exterior multiloop decomposition */
+ /* 1.3.1 Middle part */
+ if((i>TURN+2) && (j<n-TURN-1))
+ tmp2 += qm[my_iindx[1]-i+1]
+ * qm[my_iindx[j+1]-n]
+ * expMLclosing
+ * exp_E_MLstem(type, S1[i-1], S1[j+1], pf_params);
+
+ /* 1.3.2 Left part */
+ for(k=TURN+2; k < i-TURN-2; k++)
+ tmp2 += qm[my_iindx[1]-k]
+ * qm1[jindx[i-1]+k+1]
+ * expMLbase[n-j]
+ * expMLclosing
+ * exp_E_MLstem(type, S1[i-1], S1[j+1], pf_params);
+
+ /* 1.3.3 Right part */
+ for(k=j+TURN+2; k < n-TURN-1;k++)
+ tmp2 += qm[my_iindx[j+1]-k]
+ * qm1[jindx[n]+k+1]
+ * expMLbase[i-1]
+ * expMLclosing
+ * exp_E_MLstem(type, S1[i-1], S1[j+1], pf_params);
+
+ /* all exterior loop decompositions for pair i,j done */
+ probs[ij] *= tmp2;
+
+ }
+ else probs[ij] = 0;
+ }
+ }
+ } /* end if(circular) */
+ else {
+ for (i=1; i<=n; i++) {
+ for (j=i; j<=MIN2(i+TURN,n); j++) probs[my_iindx[i]-j] = 0;
+ for (j=i+TURN+1; j<=n; j++) {
+ ij = my_iindx[i]-j;
+ type = ptype[ij];
+ if (type&&(qb[ij]>0.)) {
+ probs[ij] = q1k[i-1]*qln[j+1]/q1k[n];
+ probs[ij] *= exp_E_ExtLoop(type, (i>1) ? S1[i-1] : -1, (j<n) ? S1[j+1] : -1, pf_params);
+ }
+ else
+ probs[ij] = 0.;
+ }
+ }
+ } /* end if(!circular) */
+
+ for (l=n; l>TURN+1; l--) {
+
+ /* 2. bonding k,l as substem of 2:loop enclosed by i,j */
+ for (k=1; k<l-TURN; k++) {
+ kl = my_iindx[k]-l;
+ type_2 = ptype[kl];
+ if (type_2==0) continue;
+ type_2 = rtype[type_2];
+ if (qb[kl]==0.) continue;
+
+ tmp2 = 0.;
+ for (i=MAX2(1,k-MAXLOOP-1); i<=k-1; i++)
+ for (j=l+1; j<=MIN2(l+ MAXLOOP -k+i+2,n); j++) {
+ ij = my_iindx[i] - j;
+ type = ptype[ij];
+ if (type && (probs[ij]>0.)) {
+ /* add *scale[u1+u2+2] */
+ tmp2 += probs[ij]
+ * (scale[k-i+j-l]
+ * exp_E_IntLoop(k - i - 1,
+ j - l - 1,
+ type,
+ type_2,
+ S1[i + 1],
+ S1[j - 1],
+ S1[k - 1],
+ S1[l + 1],
+ pf_params));
+ }
+ }
+ probs[kl] += tmp2;
+ }
+
+ if(with_gquad){
+ /* 2.5. bonding k,l as gquad enclosed by i,j */
+ FLT_OR_DBL *expintern = &(pf_params->expinternal[0]);
+
+ if(l < n - 3){
+ for(k = 2; k <= l - VRNA_GQUAD_MIN_BOX_SIZE; k++){
+ kl = my_iindx[k]-l;
+ if (G[kl]==0.) continue;
+ tmp2 = 0.;
+ i = k - 1;
+ for(j = MIN2(l + MAXLOOP + 1, n); j > l + 3; j--){
+ ij = my_iindx[i] - j;
+ type = ptype[ij];
+ if(!type) continue;
+ tmp2 += probs[ij] * expintern[j-l-1] * pf_params->expmismatchI[type][S1[i+1]][S1[j-1]] * scale[2];
+ }
+ probs[kl] += tmp2 * G[kl];
+ }
+ }
+
+ if(l < n){
+ for(k = 4; k <= l - VRNA_GQUAD_MIN_BOX_SIZE; k++){
+ kl = my_iindx[k]-l;
+ if (G[kl]==0.) continue;
+ tmp2 = 0.;
+ j = l + 1;
+ for (i=MAX2(1,k-MAXLOOP-1); i < k - 3; i++){
+ ij = my_iindx[i] - j;
+ type = ptype[ij];
+ if(!type) continue;
+ tmp2 += probs[ij] * expintern[k - i - 1] * pf_params->expmismatchI[type][S1[i+1]][S1[j-1]] * scale[2];
+ }
+ probs[kl] += tmp2 * G[kl];
+ }
+ }
+
+ if (l < n - 1){
+ for (k=3; k<=l-VRNA_GQUAD_MIN_BOX_SIZE; k++) {
+ kl = my_iindx[k]-l;
+ if (G[kl]==0.) continue;
+ tmp2 = 0.;
+ for (i=MAX2(1,k-MAXLOOP-1); i<=k-2; i++){
+ u1 = k - i - 1;
+ for (j=l+2; j<=MIN2(l + MAXLOOP - u1 + 1,n); j++) {
+ ij = my_iindx[i] - j;
+ type = ptype[ij];
+ if(!type) continue;
+ tmp2 += probs[ij] * expintern[u1+j-l-1] * pf_params->expmismatchI[type][S1[i+1]][S1[j-1]] * scale[2];
+ }
+ }
+ probs[kl] += tmp2 * G[kl];
+ }
+ }
+ }
+
+ /* 3. bonding k,l as substem of multi-loop enclosed by i,j */
+ prm_MLb = 0.;
+ if (l<n) for (k=2; k<l-TURN; k++) {
+ i = k-1;
+ prmt = prmt1 = 0.0;
+
+ ii = my_iindx[i]; /* ii-j=[i,j] */
+ ll = my_iindx[l+1]; /* ll-j=[l+1,j-1] */
+ tt = ptype[ii-(l+1)]; tt=rtype[tt];
+ /* (i, l+1) closes the ML with substem (k,l) */
+ if(tt)
+ prmt1 = probs[ii-(l+1)] * expMLclosing * exp_E_MLstem(tt, S1[l], S1[i+1], pf_params);
+
+ /* (i,j) with j>l+1 closes the ML with substem (k,l) */
+ for (j=l+2; j<=n; j++) {
+ tt = ptype[ii-j]; tt = rtype[tt];
+ if(tt)
+ prmt += probs[ii-j] * exp_E_MLstem(tt, S1[j-1], S1[i+1], pf_params) * qm[ll-(j-1)];
+ }
+ kl = my_iindx[k]-l;
+ tt = ptype[kl];
+ prmt *= expMLclosing;
+ prml[ i] = prmt;
+ prm_l[i] = prm_l1[i]*expMLbase[1]+prmt1;
+
+ prm_MLb = prm_MLb*expMLbase[1] + prml[i];
+ /* same as: prm_MLb = 0;
+ for (i=1; i<=k-1; i++) prm_MLb += prml[i]*expMLbase[k-i-1]; */
+
+ prml[i] = prml[ i] + prm_l[i];
+
+ if(with_gquad){
+ if ((!tt) && (G[kl] == 0.)) continue;
+ } else {
+ if (qb[kl] == 0.) continue;
+ }
+
+ temp = prm_MLb;
+
+ for (i=1;i<=k-2; i++)
+ temp += prml[i]*qm[my_iindx[i+1] - (k-1)];
+
+ if(with_gquad){
+ if(tt)
+ temp *= exp_E_MLstem(tt, (k>1) ? S1[k-1] : -1, (l<n) ? S1[l+1] : -1, pf_params) * scale[2];
+ else
+ temp *= G[kl] * expMLstem * scale[2];
+ } else {
+ temp *= exp_E_MLstem(tt, (k>1) ? S1[k-1] : -1, (l<n) ? S1[l+1] : -1, pf_params) * scale[2];
+ }
+
+ probs[kl] += temp;
+
+ if (probs[kl]>Qmax) {
+ Qmax = probs[kl];
+ if (Qmax>max_real/10.)
+ fprintf(stderr, "P close to overflow: %d %d %g %g\n",
+ i, j, probs[kl], qb[kl]);
+ }
+ if (probs[kl]>=max_real) {
+ ov++;
+ probs[kl]=FLT_MAX;
+ }
+
+ } /* end for (k=..) */
+ tmp = prm_l1; prm_l1=prm_l; prm_l=tmp;
+
+ } /* end for (l=..) */
+
+ for (i=1; i<=n; i++)
+ for (j=i+TURN+1; j<=n; j++) {
+ ij = my_iindx[i]-j;
+
+ if(with_gquad){
+ if (qb[ij] > 0.)
+ probs[ij] *= qb[ij];
+ if (G[ij] > 0.){
+ probs[ij] += q1k[i-1] * G[ij] * qln[j+1]/q1k[n];
+ }
+ } else {
+ if (qb[ij] > 0.)
+ probs[ij] *= qb[ij];
+ }
+ }
+
+ if (structure!=NULL)
+ bppm_to_structure(structure, probs, n);
+ if (ov>0) fprintf(stderr, "%d overflows occurred while backtracking;\n"
+ "you might try a smaller pf_scale than %g\n",
+ ov, pf_params->pf_scale);
+ } /* end if((S != NULL) && (S1 != NULL)) */
+ else
+ nrerror("bppm calculations have to be done after calling forward recursion\n");
+ return;
+}
+
+PRIVATE void scale_pf_params(unsigned int length, pf_paramT *parameters){
+ unsigned int i;
+ double scaling_factor;
+
+ if(pf_params) free(pf_params);
+
+ if(parameters){
+ pf_params = get_boltzmann_factor_copy(parameters);
+ } else {
+ model_detailsT md;
+ set_model_details(&md);
+ pf_params = get_boltzmann_factors(temperature, 1.0, md, pf_scale);
+ }
+
+ scaling_factor = pf_params->pf_scale;
+
+ /* scaling factors (to avoid overflows) */
+ if (scaling_factor == -1) { /* mean energy for random sequences: 184.3*length cal */
+ scaling_factor = exp(-(-185+(pf_params->temperature-37.)*7.27)/pf_params->kT);
+ if (scaling_factor<1) scaling_factor=1;
+ pf_params->pf_scale = scaling_factor;
+ pf_scale = pf_params->pf_scale; /* compatibility with RNAup, may be removed sometime */
+ }
+ scale[0] = 1.;
+ scale[1] = 1./scaling_factor;
+ expMLbase[0] = 1;
+ expMLbase[1] = pf_params->expMLbase/scaling_factor;
+ for (i=2; i<=length; i++) {
+ scale[i] = scale[i/2]*scale[i-(i/2)];
+ expMLbase[i] = pow(pf_params->expMLbase, (double)i) * scale[i];
+ }
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC void update_pf_params(int length){
+ update_pf_params_par(length, NULL);
+}
+
+PUBLIC void update_pf_params_par(int length, pf_paramT *parameters){
+#ifdef _OPENMP
+ make_pair_matrix(); /* is this really necessary? */
+ scale_pf_params((unsigned) length, parameters);
+#else
+ if(parameters) init_partfunc(length, parameters);
+ else if (length>init_length) init_partfunc(length, parameters); /* init not update */
+ else {
+ make_pair_matrix();
+ scale_pf_params((unsigned) length, parameters);
+ }
+#endif
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char bppm_symbol(const float *x){
+/* if( ((x[1]-x[2])*(x[1]-x[2]))<0.1&&x[0]<=0.677) return '|'; */
+ if( x[0] > 0.667 ) return '.';
+ if( x[1] > 0.667 ) return '(';
+ if( x[2] > 0.667 ) return ')';
+ if( (x[1]+x[2]) > x[0] ) {
+ if( (x[1]/(x[1]+x[2])) > 0.667) return '{';
+ if( (x[2]/(x[1]+x[2])) > 0.667) return '}';
+ else return '|';
+ }
+ if( x[0] > (x[1]+x[2]) ) return ',';
+ return ':';
+}
+
+PUBLIC void bppm_to_structure(char *structure, FLT_OR_DBL *p, unsigned int length){
+ int i, j;
+ int *index = get_iindx(length);
+ float P[3]; /* P[][0] unpaired, P[][1] upstream p, P[][2] downstream p */
+
+ for( j=1; j<=length; j++ ) {
+ P[0] = 1.0;
+ P[1] = P[2] = 0.0;
+ for( i=1; i<j; i++) {
+ P[2] += p[index[i]-j]; /* j is paired downstream */
+ P[0] -= p[index[i]-j]; /* j is unpaired */
+ }
+ for( i=j+1; i<=length; i++ ) {
+ P[1] += p[index[j]-i]; /* j is paired upstream */
+ P[0] -= p[index[j]-i]; /* j is unpaired */
+ }
+ structure[j-1] = bppm_symbol(P);
+ }
+ structure[length] = '\0';
+ free(index);
+}
+
+
+/*---------------------------------------------------------------------------*/
+PRIVATE void make_ptypes(const short *S, const char *structure){
+ int n,i,j,k,l, noLP;
+
+ noLP = pf_params->model_details.noLP;
+
+ n=S[0];
+ for (k=1; k<n-TURN; k++)
+ for (l=1; l<=2; l++) {
+ int type,ntype=0,otype=0;
+ i=k; j = i+TURN+l; if (j>n) continue;
+ type = pair[S[i]][S[j]];
+ while ((i>=1)&&(j<=n)) {
+ if ((i>1)&&(j<n)) ntype = pair[S[i-1]][S[j+1]];
+ if (noLP && (!otype) && (!ntype))
+ type = 0; /* i.j can only form isolated pairs */
+ qb[my_iindx[i]-j] = 0.;
+ ptype[my_iindx[i]-j] = (char) type;
+ otype = type;
+ type = ntype;
+ i--; j++;
+ }
+ }
+
+ if (struct_constrained && (structure != NULL))
+ constrain_ptypes(structure, (unsigned int)n, ptype, NULL, TURN, 1);
+}
+
+/*
+ stochastic backtracking in pf_fold arrays
+ returns random structure S with Boltzman probabilty
+ p(S) = exp(-E(S)/kT)/Z
+*/
+char *pbacktrack(char *seq){
+ double r, qt;
+ int i,j,n, start;
+ sequence = seq;
+ n = strlen(sequence);
+
+ if (init_length<1)
+ nrerror("can't backtrack without pf arrays.\n"
+ "Call pf_fold() before pbacktrack()");
+ pstruc = space((n+1)*sizeof(char));
+
+ for (i=0; i<n; i++) pstruc[i] = '.';
+
+ start = 1;
+ while (start<n) {
+ /* find i position of first pair */
+ for (i=start; i<n; i++) {
+ r = urn() * qln[i];
+ if (r > qln[i+1]*scale[1]) break; /* i is paired */
+ }
+ if (i>=n) break; /* no more pairs */
+ /* now find the pairing partner j */
+ r = urn() * (qln[i] - qln[i+1]*scale[1]);
+ for (qt=0, j=i+1; j<=n; j++) {
+ int type;
+ type = ptype[my_iindx[i]-j];
+ if (type) {
+ double qkl;
+ qkl = qb[my_iindx[i]-j];
+ if (j<n) qkl *= qln[j+1];
+ qkl *= exp_E_ExtLoop(type, (i>1) ? S1[i-1] : -1, (j<n) ? S1[j+1] : -1, pf_params);
+ qt += qkl;
+ if (qt > r) break; /* j is paired */
+ }
+ }
+ if (j==n+1) nrerror("backtracking failed in ext loop");
+ start = j+1;
+ backtrack(i,j);
+ }
+
+ return pstruc;
+}
+char *pbacktrack_circ(char *seq){
+ double r, qt;
+ int i, j, k, l, n;
+ FLT_OR_DBL expMLclosing = pf_params->expMLclosing;
+
+ sequence = seq;
+ n = strlen(sequence);
+
+ if (init_length<1)
+ nrerror("can't backtrack without pf arrays.\n"
+ "Call pf_circ_fold() before pbacktrack_circ()");
+ pstruc = space((n+1)*sizeof(char));
+
+ /* initialize pstruct with single bases */
+ for (i=0; i<n; i++) pstruc[i] = '.';
+
+ qt = 1.0*scale[n];
+ r = urn() * qo;
+
+ /* open chain? */
+ if(qt > r) return pstruc;
+
+ for(i=1; (i < n); i++){
+ for(j=i+TURN+1;(j<=n); j++){
+
+ int type, u;
+ /* 1. first check, wether we can do a hairpin loop */
+ u = n-j + i-1;
+ if (u<TURN) continue;
+
+ type = ptype[my_iindx[i]-j];
+ if (!type) continue;
+
+ type=rtype[type];
+
+ char loopseq[10];
+ if (u<7){
+ strcpy(loopseq , sequence+j-1);
+ strncat(loopseq, sequence, i);
+ }
+
+ qt += qb[my_iindx[i]-j] * exp_E_Hairpin(u, type, S1[j+1], S1[i-1], loopseq, pf_params) * scale[u];
+ /* found a hairpin? so backtrack in the enclosed part and we're done */
+ if(qt>r){ backtrack(i,j); return pstruc;}
+
+ /* 2. search for (k,l) with which we can close an interior loop */
+ for(k=j+1; (k < n); k++){
+ int ln1, lstart;
+ ln1 = k - j - 1;
+ if(ln1+i-1>MAXLOOP) break;
+
+ lstart = ln1+i-1+n-MAXLOOP;
+ if(lstart<k+TURN+1) lstart = k + TURN + 1;
+ for(l=lstart; (l <= n); l++){
+ int ln2, type2;
+ ln2 = (i - 1) + (n - l);
+ if((ln1+ln2) > MAXLOOP) continue;
+
+ type2 = ptype[my_iindx[k]-l];
+ if(!type) continue;
+ type2 = rtype[type2];
+ qt += qb[my_iindx[i]-j] * qb[my_iindx[k]-l] * exp_E_IntLoop(ln2, ln1, type2, type, S1[l+1], S1[k-1], S1[i-1], S1[j+1], pf_params) * scale[ln1 + ln2];
+ /* found an exterior interior loop? also this time, we can go straight */
+ /* forward and backtracking the both enclosed parts and we're done */
+ if(qt>r){ backtrack(i,j); backtrack(k,l); return pstruc;}
+ }
+ } /* end of kl double loop */
+ }
+ } /* end of ij double loop */
+ {
+ /* as we reach this part, we have to search for our barrier between qm and qm2 */
+ qt = 0.;
+ r = urn()*qmo;
+ for(k=TURN+2; k<n-2*TURN-3; k++){
+ qt += qm[my_iindx[1]-k] * qm2[k+1] * expMLclosing;
+ /* backtrack in qm and qm2 if we've found a valid barrier k */
+ if(qt>r){ backtrack_qm(1,k); backtrack_qm2(k+1,n); return pstruc;}
+ }
+ }
+ /* if we reach the actual end of this function, an error has occured */
+ /* cause we HAVE TO find an exterior loop or an open chain!!! */
+ nrerror("backtracking failed in exterior loop");
+ return pstruc;
+}
+
+PRIVATE void backtrack_qm(int i, int j){
+ /* divide multiloop into qm and qm1 */
+ double qmt, r;
+ int k;
+ while(j>i){
+ /* now backtrack [i ... j] in qm[] */
+ r = urn() * qm[my_iindx[i] - j];
+ qmt = qm1[jindx[j]+i]; k=i;
+ if(qmt<r)
+ for(k=i+1; k<=j; k++){
+ qmt += (qm[my_iindx[i]-(k-1)]+expMLbase[k-i])*qm1[jindx[j]+k];
+ if(qmt >= r) break;
+ }
+ if(k>j) nrerror("backtrack failed in qm");
+
+ backtrack_qm1(k,j);
+
+ if(k<i+TURN) break; /* no more pairs */
+ r = urn() * (qm[my_iindx[i]-(k-1)] + expMLbase[k-i]);
+ if(expMLbase[k-i] >= r) break; /* no more pairs */
+ j = k-1;
+ }
+}
+
+PRIVATE void backtrack_qm1(int i,int j){
+ /* i is paired to l, i<l<j; backtrack in qm1 to find l */
+ int ii, l, type;
+ double qt, r;
+ r = urn() * qm1[jindx[j]+i];
+ ii = my_iindx[i];
+ for (qt=0., l=i+TURN+1; l<=j; l++) {
+ type = ptype[ii-l];
+ if (type)
+ qt += qb[ii-l] * exp_E_MLstem(type, S1[i-1], S1[l+1], pf_params) * expMLbase[j-l];
+ if (qt>=r) break;
+ }
+ if (l>j) nrerror("backtrack failed in qm1");
+ backtrack(i,l);
+}
+
+PRIVATE void backtrack_qm2(int k, int n){
+ double qom2t, r;
+ int u;
+ r= urn()*qm2[k];
+ /* we have to search for our barrier u between qm1 and qm1 */
+ for (qom2t = 0.,u=k+TURN+1; u<n-TURN-1; u++){
+ qom2t += qm1[jindx[u]+k]*qm1[jindx[n]+(u+1)];
+ if(qom2t > r) break;
+ }
+ if(u==n-TURN) nrerror("backtrack failed in qm2");
+ backtrack_qm1(k,u);
+ backtrack_qm1(u+1,n);
+}
+
+PRIVATE void backtrack(int i, int j){
+ int noGUclosure = pf_params->model_details.noGUclosure;
+
+ do {
+ double r, qbt1;
+ int k, l, type, u, u1;
+
+ pstruc[i-1] = '('; pstruc[j-1] = ')';
+
+ r = urn() * qb[my_iindx[i]-j];
+ type = ptype[my_iindx[i]-j];
+ u = j-i-1;
+ /*hairpin contribution*/
+ if (((type==3)||(type==4))&&noGUclosure) qbt1 = 0;
+ else
+ qbt1 = exp_E_Hairpin(u, type, S1[i+1], S1[j-1], sequence+i-1, pf_params)*
+ scale[u+2]; /* add scale[u+2] */
+
+ if (qbt1>=r) return; /* found the hairpin we're done */
+
+ for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++) {
+ u1 = k-i-1;
+ for (l=MAX2(k+TURN+1,j-1-MAXLOOP+u1); l<j; l++) {
+ int type_2;
+ type_2 = ptype[my_iindx[k]-l];
+ if (type_2) {
+ type_2 = rtype[type_2];
+ /* add *scale[u1+u2+2] */
+ qbt1 += qb[my_iindx[k]-l] * (scale[u1+j-l+1] *
+ exp_E_IntLoop(u1, j-l-1, type, type_2,
+ S1[i+1], S1[j-1], S1[k-1], S1[l+1], pf_params));
+ }
+ if (qbt1 > r) break;
+ }
+ if (qbt1 > r) break;
+ }
+ if (l<j) {
+ i=k; j=l;
+ }
+ else break;
+ } while (1);
+
+ /* backtrack in multi-loop */
+ {
+ double r, qt;
+ int k, ii, jj;
+
+ i++; j--;
+ /* find the first split index */
+ ii = my_iindx[i]; /* ii-j=[i,j] */
+ jj = jindx[j]; /* jj+i=[j,i] */
+ for (qt=0., k=i+1; k<j; k++) qt += qm[ii-(k-1)]*qm1[jj+k];
+ r = urn() * qt;
+ for (qt=0., k=i+1; k<j; k++) {
+ qt += qm[ii-(k-1)]*qm1[jj+k];
+ if (qt>=r) break;
+ }
+ if (k>=j) nrerror("backtrack failed, can't find split index ");
+
+ backtrack_qm1(k, j);
+
+ j = k-1;
+ backtrack_qm(i,j);
+ }
+}
+
+PUBLIC void assign_plist_from_pr(plist **pl, FLT_OR_DBL *probs, int length, double cut_off){
+ int i, j, n, count, *index;
+ count = 0;
+ n = 2;
+
+ index = get_iindx(length);
+
+ /* first guess of the size needed for pl */
+ *pl = (plist *)space(n*length*sizeof(plist));
+
+ for (i=1; i<length; i++) {
+ for (j=i+1; j<=length; j++) {
+ /* skip all entries below the cutoff */
+ if (probs[index[i]-j] < cut_off) continue;
+ /* do we need to allocate more memory? */
+ if (count == n * length - 1){
+ n *= 2;
+ *pl = (plist *)xrealloc(*pl, n * length * sizeof(plist));
+ }
+ (*pl)[count].i = i;
+ (*pl)[count].j = j;
+ (*pl)[count].p = probs[index[i] - j];
+ (*pl)[count++].type = 0;
+ }
+ }
+ /* mark the end of pl */
+ (*pl)[count].i = 0;
+ (*pl)[count].j = 0;
+ (*pl)[count].p = 0.;
+ (*pl)[count++].type = 0;
+ /* shrink memory to actual size needed */
+ *pl = (plist *)xrealloc(*pl, count * sizeof(plist));
+
+ free(index);
+}
+
+/* this doesn't work if free_pf_arrays() is called before */
+PUBLIC void assign_plist_gquad_from_pr( plist **pl,
+ int length,
+ double cut_off){
+
+ int i, j, k, n, count, *index;
+ count = 0;
+ n = 2;
+
+ if(!probs){ *pl = NULL; return;}
+
+ index = get_iindx(length);
+
+ /* first guess of the size needed for pl */
+ *pl = (plist *)space(n*length*sizeof(plist));
+
+ for (i=1; i<length; i++) {
+ for (j=i+1; j<=length; j++) {
+ /* skip all entries below the cutoff */
+ if (probs[index[i]-j] < cut_off) continue;
+
+ /* do we need to allocate more memory? */
+ if (count == n * length - 1){
+ n *= 2;
+ *pl = (plist *)xrealloc(*pl, n * length * sizeof(plist));
+ }
+
+ /* check for presence of gquadruplex */
+ if((S[i] == 3) && (S[j] == 3)){
+ /* add probability of a gquadruplex at position (i,j)
+ for dot_plot
+ */
+ (*pl)[count].i = i;
+ (*pl)[count].j = j;
+ (*pl)[count].p = probs[index[i] - j];
+ (*pl)[count++].type = 1;
+ /* now add the probabilies of it's actual pairing patterns */
+ plist *inner, *ptr;
+ inner = get_plist_gquad_from_pr(S, i, j, G, probs, scale, pf_params);
+ for(ptr=inner; ptr->i != 0; ptr++){
+ if (count == n * length - 1){
+ n *= 2;
+ *pl = (plist *)xrealloc(*pl, n * length * sizeof(plist));
+ }
+ /* check if we've already seen this pair */
+ for(k = 0; k < count; k++)
+ if(((*pl)[k].i == ptr->i) && ((*pl)[k].j == ptr->j))
+ break;
+ (*pl)[k].i = ptr->i;
+ (*pl)[k].j = ptr->j;
+ (*pl)[k].type = 0;
+ if(k == count){
+ (*pl)[k].p = ptr->p;
+ count++;
+ } else
+ (*pl)[k].p += ptr->p;
+ }
+ } else {
+ (*pl)[count].i = i;
+ (*pl)[count].j = j;
+ (*pl)[count].p = probs[index[i] - j];
+ (*pl)[count++].type = 0;
+ }
+ }
+ }
+ /* mark the end of pl */
+ (*pl)[count].i = 0;
+ (*pl)[count].j = 0;
+ (*pl)[count++].p = 0.;
+ /* shrink memory to actual size needed */
+ *pl = (plist *)xrealloc(*pl, count * sizeof(plist));
+ free(index);
+}
+
+/* this doesn't work if free_pf_arrays() is called before */
+PUBLIC char *get_centroid_struct_gquad_pr( int length,
+ double *dist){
+
+ /* compute the centroid structure of the ensemble, i.e. the strutcure
+ with the minimal average distance to all other structures
+ <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
+ Thus, the centroid is simply the structure containing all pairs with
+ p_ij>0.5 */
+ int i,j, k;
+ double p;
+ char *centroid;
+ int *my_iindx = get_iindx(length);
+
+ if (probs == NULL)
+ nrerror("get_centroid_struct_pr: probs==NULL!");
+
+ *dist = 0.;
+ centroid = (char *) space((length+1)*sizeof(char));
+ for (i=0; i<length; i++) centroid[i]='.';
+ for (i=1; i<=length; i++)
+ for (j=i+TURN+1; j<=length; j++) {
+ if ((p=probs[my_iindx[i]-j])>0.5) {
+ /* check for presence of gquadruplex */
+ if((S[i] == 3) && (S[j] == 3)){
+ int L, l[3];
+ get_gquad_pattern_pf(S, i, j, pf_params, &L, l);
+ for(k=0;k<L;k++){
+ centroid[i+k-1]\
+ = centroid[i+k+L+l[0]-1]\
+ = centroid[i+k+2*L+l[0]+l[1]-1]\
+ = centroid[i+k+3*L+l[0]+l[1]+l[2]-1]\
+ = '+';
+ }
+ /* skip everything within the gquad */
+ i = j; j = j+TURN+1;
+ *dist += (1-p); /* right? */
+ break;
+ } else {
+ centroid[i-1] = '(';
+ centroid[j-1] = ')';
+ }
+ *dist += (1-p);
+ } else
+ *dist += p;
+ }
+ free(my_iindx);
+ centroid[length] = '\0';
+ return centroid;
+}
+
+/* this function is a threadsafe replacement for centroid() */
+PUBLIC char *get_centroid_struct_pl(int length, double *dist, plist *pl){
+ /* compute the centroid structure of the ensemble, i.e. the strutcure
+ with the minimal average distance to all other structures
+ <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
+ Thus, the centroid is simply the structure containing all pairs with
+ p_ij>0.5 */
+ int i;
+ char *centroid;
+
+ if (pl==NULL)
+ nrerror("get_centroid_struct: pl==NULL!");
+
+ *dist = 0.;
+ centroid = (char *) space((length+1)*sizeof(char));
+ for (i=0; i<length; i++) centroid[i]='.';
+ for (i=0; pl[i].i>0; i++){
+ if ((pl[i].p)>0.5) {
+ centroid[pl[i].i-1] = '(';
+ centroid[pl[i].j-1] = ')';
+ *dist += (1-pl[i].p);
+ } else
+ *dist += pl[i].p;
+ }
+ centroid[length] = '\0';
+ return centroid;
+}
+
+/* this function is a threadsafe replacement for centroid() */
+PUBLIC char *get_centroid_struct_pr(int length, double *dist, FLT_OR_DBL *probs){
+ /* compute the centroid structure of the ensemble, i.e. the strutcure
+ with the minimal average distance to all other structures
+ <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
+ Thus, the centroid is simply the structure containing all pairs with
+ p_ij>0.5 */
+ int i,j;
+ double p;
+ char *centroid;
+ int *index = get_iindx(length);
+
+ if (probs == NULL)
+ nrerror("get_centroid_struct_pr: probs==NULL!");
+
+ *dist = 0.;
+ centroid = (char *) space((length+1)*sizeof(char));
+ for (i=0; i<length; i++) centroid[i]='.';
+ for (i=1; i<=length; i++)
+ for (j=i+TURN+1; j<=length; j++) {
+ if ((p=probs[index[i]-j])>0.5) {
+ centroid[i-1] = '(';
+ centroid[j-1] = ')';
+ *dist += (1-p);
+ } else
+ *dist += p;
+ }
+ free(index);
+ centroid[length] = '\0';
+ return centroid;
+}
+
+PUBLIC plist *stackProb(double cutoff){
+ plist *pl;
+ int i,j,plsize=256;
+ int num = 0;
+
+ if (probs==NULL)
+ nrerror("probs==NULL. You need to call pf_fold() before stackProb()");
+
+ int length = S[0];
+ int *index = get_iindx(length);
+
+ pl = (plist *) space(plsize*sizeof(plist));
+
+ for (i=1; i<length; i++)
+ for (j=i+TURN+3; j<=length; j++) {
+ double p;
+ if((p=probs[index[i]-j]) < cutoff) continue;
+ if (qb[index[i+1]-(j-1)]<FLT_MIN) continue;
+ p *= qb[index[i+1]-(j-1)]/qb[index[i]-j];
+ p *= exp_E_IntLoop(0,0,ptype[index[i]-j],rtype[ptype[index[i+1]-(j-1)]],
+ 0,0,0,0, pf_params)*scale[2];/* add *scale[u1+u2+2] */
+ if (p>cutoff) {
+ pl[num].i = i;
+ pl[num].j = j;
+ pl[num++].p = p;
+ if (num>=plsize) {
+ plsize *= 2;
+ pl = xrealloc(pl, plsize*sizeof(plist));
+ }
+ }
+ }
+ pl[num].i=0;
+ free(index);
+ return pl;
+}
+
+/*-------------------------------------------------------------------------*/
+/* make arrays used for pf_fold available to other routines */
+PUBLIC int get_pf_arrays( short **S_p,
+ short **S1_p,
+ char **ptype_p,
+ FLT_OR_DBL **qb_p,
+ FLT_OR_DBL **qm_p,
+ FLT_OR_DBL **q1k_p,
+ FLT_OR_DBL **qln_p){
+
+ if(qb == NULL) return(0); /* check if pf_fold() has been called */
+ *S_p = S; *S1_p = S1; *ptype_p = ptype;
+ *qb_p = qb; *qm_p = qm;
+ *q1k_p = q1k; *qln_p = qln;
+ return(1); /* success */
+}
+
+/* get the free energy of a subsequence from the q[] array */
+PUBLIC double get_subseq_F(int i, int j){
+ if (!q)
+ nrerror("call pf_fold() to fill q[] array before calling get_subseq_F()");
+ return ((-log(q[my_iindx[i]-j])-(j-i+1)*log(pf_params->pf_scale))*pf_params->kT/1000.0);
+}
+
+
+PUBLIC double mean_bp_distance(int length){
+ return mean_bp_distance_pr(length, probs);
+}
+
+PUBLIC double mean_bp_distance_pr(int length, FLT_OR_DBL *p){
+ /* compute the mean base pair distance in the thermodynamic ensemble */
+ /* <d> = \sum_{a,b} p_a p_b d(S_a,S_b)
+ this can be computed from the pair probs p_ij as
+ <d> = \sum_{ij} p_{ij}(1-p_{ij}) */
+ int i,j;
+ double d=0;
+ int *index = get_iindx((unsigned int) length);
+
+ if (p==NULL)
+ nrerror("p==NULL. You need to supply a valid probability matrix for mean_bp_distance_pr()");
+
+ for (i=1; i<=length; i++)
+ for (j=i+TURN+1; j<=length; j++)
+ d += p[index[i]-j] * (1-p[index[i]-j]);
+
+ free(index);
+ return 2*d;
+}
+
+PUBLIC FLT_OR_DBL *export_bppm(void){
+ return probs;
+}
+
+/*###########################################*/
+/*# deprecated functions below #*/
+/*###########################################*/
+
+/* this function is deprecated since it is not threadsafe */
+PUBLIC char *centroid(int length, double *dist) {
+ /* compute the centroid structure of the ensemble, i.e. the strutcure
+ with the minimal average distance to all other structures
+ <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
+ Thus, the centroid is simply the structure containing all pairs with
+ p_ij>0.5 */
+ int i,j;
+ double p;
+ char *centroid;
+
+ if (pr==NULL)
+ nrerror("pr==NULL. You need to call pf_fold() before centroid()");
+
+ *dist = 0.;
+ centroid = (char *) space((length+1)*sizeof(char));
+ for (i=0; i<length; i++) centroid[i]='.';
+ for (i=1; i<=length; i++)
+ for (j=i+TURN+1; j<=length; j++) {
+ if ((p=pr[my_iindx[i]-j])>0.5) {
+ centroid[i-1] = '(';
+ centroid[j-1] = ')';
+ *dist += (1-p);
+ } else
+ *dist += p;
+ }
+ return centroid;
+}
+
+
+/* This function is deprecated since it uses the global array pr for calculations */
+PUBLIC double mean_bp_dist(int length) {
+ /* compute the mean base pair distance in the thermodynamic ensemble */
+ /* <d> = \sum_{a,b} p_a p_b d(S_a,S_b)
+ this can be computed from the pair probs p_ij as
+ <d> = \sum_{ij} p_{ij}(1-p_{ij}) */
+ int i,j;
+ double d=0;
+
+ if (pr==NULL)
+ nrerror("pr==NULL. You need to call pf_fold() before mean_bp_dist()");
+
+ for (i=1; i<=length; i++)
+ for (j=i+TURN+1; j<=length; j++)
+ d += pr[my_iindx[i]-j] * (1-pr[my_iindx[i]-j]);
+ return 2*d;
+}
+
+/*----------------------------------------------------------------------*/
+PUBLIC double expHairpinEnergy(int u, int type, short si1, short sj1,
+ const char *string) {
+/* compute Boltzmann weight of a hairpin loop, multiply by scale[u+2] */
+ double q, kT;
+ kT = pf_params->kT; /* kT in cal/mol */
+ if(u <= 30)
+ q = pf_params->exphairpin[u];
+ else
+ q = pf_params->exphairpin[30] * exp( -(pf_params->lxc*log( u/30.))*10./kT);
+ if ((tetra_loop)&&(u==4)) {
+ char tl[7]={0}, *ts;
+ strncpy(tl, string, 6);
+ if ((ts=strstr(pf_params->Tetraloops, tl)))
+ return (pf_params->exptetra[(ts-pf_params->Tetraloops)/7]);
+ }
+ if ((tetra_loop)&&(u==6)) {
+ char tl[9]={0}, *ts;
+ strncpy(tl, string, 6);
+ if ((ts=strstr(pf_params->Hexaloops, tl)))
+ return (pf_params->exphex[(ts-pf_params->Hexaloops)/9]);
+ }
+ if (u==3) {
+ char tl[6]={0}, *ts;
+ strncpy(tl, string, 5);
+ if ((ts=strstr(pf_params->Triloops, tl)))
+ return (pf_params->exptri[(ts-pf_params->Triloops)/6]);
+ if (type>2)
+ q *= pf_params->expTermAU;
+ }
+ else /* no mismatches for tri-loops */
+ q *= pf_params->expmismatchH[type][si1][sj1];
+
+ return q;
+}
+PUBLIC double expLoopEnergy(int u1, int u2, int type, int type2,
+ short si1, short sj1, short sp1, short sq1) {
+/* compute Boltzmann weight of interior loop,
+ multiply by scale[u1+u2+2] for scaling */
+ double z=0;
+ int no_close = 0;
+
+ if ((no_closingGU) && ((type2==3)||(type2==4)||(type==2)||(type==4)))
+ no_close = 1;
+
+ if ((u1==0) && (u2==0)) /* stack */
+ z = pf_params->expstack[type][type2];
+ else if (no_close==0) {
+ if ((u1==0)||(u2==0)) { /* bulge */
+ int u;
+ u = (u1==0)?u2:u1;
+ z = pf_params->expbulge[u];
+ if (u2+u1==1) z *= pf_params->expstack[type][type2];
+ else {
+ if (type>2) z *= pf_params->expTermAU;
+ if (type2>2) z *= pf_params->expTermAU;
+ }
+ }
+ else { /* interior loop */
+ if (u1+u2==2) /* size 2 is special */
+ z = pf_params->expint11[type][type2][si1][sj1];
+ else if ((u1==1) && (u2==2))
+ z = pf_params->expint21[type][type2][si1][sq1][sj1];
+ else if ((u1==2) && (u2==1))
+ z = pf_params->expint21[type2][type][sq1][si1][sp1];
+ else if ((u1==2) && (u2==2))
+ z = pf_params->expint22[type][type2][si1][sp1][sq1][sj1];
+ else if (((u1==2)&&(u2==3))||((u1==3)&&(u2==2))){ /*2-3 is special*/
+ z = pf_params->expinternal[5]*
+ pf_params->expmismatch23I[type][si1][sj1]*
+ pf_params->expmismatch23I[type2][sq1][sp1];
+ z *= pf_params->expninio[2][1];
+ }
+ else if ((u1==1)||(u2==1)) { /*1-n is special*/
+ z = pf_params->expinternal[u1+u2]*
+ pf_params->expmismatch1nI[type][si1][sj1]*
+ pf_params->expmismatch1nI[type2][sq1][sp1];
+ z *= pf_params->expninio[2][abs(u1-u2)];
+ }
+ else {
+ z = pf_params->expinternal[u1+u2]*
+ pf_params->expmismatchI[type][si1][sj1]*
+ pf_params->expmismatchI[type2][sq1][sp1];
+ z *= pf_params->expninio[2][abs(u1-u2)];
+ }
+ }
+ }
+ return z;
+}
+
+PUBLIC void init_pf_circ_fold(int length){
+/* DO NOTHING */
+}
+
+PUBLIC void init_pf_fold(int length){
+/* DO NOTHING */
+}
+
+