3 c Ivo L Hofacker, Vienna RNA package
5 /* Last changed Time-stamp: <2009-06-18 14:04:21 ivo> */
10 #include <float.h> /* #defines DBL_EPSILON */
12 #include "fold_vars.h"
17 /* compute an MEA structure, i.e. the structure maximising
18 EA = \sum_{(i,j) \in S} 2\gamma p_{i,j} + \sum_{i is unpaired} p^u_i
20 This can be computed by a variant of the Nussinov recursion:
21 M(i,j) = min(M(i,j-1)+pu[j], min_k M(i,k-1)+C(k,j)
22 C(i,j) = 2*gamma*p_ij + M(i+1,j-1)
24 Just for fun, we implement it as a sparse DP algorithm.
25 At any time we store only the current and previous row of M.
26 The C matrix is implemented as a sparse matrix:
27 For each j we store in C[j] a list of values (i, MEA([i..j])), containing
28 the MEA over all structures closed by (i,j).
29 The list is sparse since only C values where C(i,j)==M(i,j) can
30 contribute to the optimal solution.
33 typedef struct Litem {
39 unsigned size; /* allocated space */
44 PRIVATE int comp_plist(const void *a, const void *b);
45 PRIVATE plist *prune_sort(plist *p, double *pu, int n, double gamma, short *S, int gq);
46 PRIVATE void pushC(List *c, int i, double a);
57 PRIVATE void mea_backtrack(const struct MEAdat *bdat, int i, int j, int paired, short *S, pf_paramT *pf);
59 PUBLIC float MEA(plist *p, char *structure, double gamma) {
60 return MEA_seq(p, NULL, structure, gamma, NULL);
63 PUBLIC float MEA_seq(plist *p, const char *sequence, char *structure, double gamma, pf_paramT *pf){
72 double MEA, *Mi, *Mi1, *tmp, *pu;
75 n = strlen(structure);
76 for (i=0; i<n; i++) structure[i] = '.';
80 S = encode_sequence(sequence, 1);
83 with_gquad = pf->model_details.gquad;
85 pu = space(sizeof(double)*(n+1));
86 pp = pl = prune_sort(p, pu, n, gamma, S, with_gquad);
88 C = (List*) space((n+1)*(sizeof(List)));
90 Mi = (double *) space((n+1)*sizeof(double));
91 Mi1 = (double *) space((n+1)*sizeof(double));
95 for (j=i+1; j<=n; j++) {
97 Mi[j] = Mi[j-1] + pu[j];
98 for (li=C[j].list; li<C[j].list+C[j].nelem; li++) {
99 EA = li->A + Mi[(li->i) -1];
100 Mi[j] = MAX2(Mi[j], EA);
102 if (pp->i == i && pp->j ==j) {
103 EA = 2*gamma*pp->p + Mi1[j-1];
106 pushC(&C[j], i, EA); /* only push into C[j] list if optimal */
112 tmp = Mi1; Mi1 = Mi; Mi = tmp;
117 bdat.structure = structure; bdat.gamma = gamma;
118 bdat.C = C; bdat.Mi=Mi1; bdat.pl=pl; bdat.pu = pu;
119 mea_backtrack(&bdat, 1, n, 0, S, pf);
120 free(Mi); free(Mi1); free(pl); free(pu);
122 if (C[i].list) free(C[i].list);
128 PRIVATE int comp_plist(const void *a, const void *b) {
134 if (di!=0) return di;
135 return (A->j - B->j);
139 PRIVATE plist *prune_sort(plist *p, double *pu, int n, double gamma, short *S, int gq){
141 produce a list containing all base pairs with
142 2*gamma*p_ij > p^u_i + p^u_j
143 already sorted to be in the order we need them within the DP
145 unsigned size, i, nump = 0;
146 plist *pp, *pc, *pc2;
148 for (i=1; i<=n; i++) pu[i]=1.;
150 for (pc=p; pc->i >0; pc++) {
152 if(!S) nrerror("no sequence information available in MEA gquad!");
155 /* now remove all cases where i/j are within a gquad */
156 for(pc2=p; pc2->i > 0; pc2++){
157 if(S[pc2->i] != 3) continue;
158 if(S[pc2->j] != 3) continue;
159 /* check whether pc->i is enclosed in gquad */
163 /* check whether pc->j is enclosed in gquad */
174 pp = space(sizeof(plist)*(n+1));
175 for (pc=p; pc->i >0; pc++) {
176 if (pc->i > n) nrerror("mismatch between plist and structure in MEA()");
177 if (pc->p*2*gamma > pu[pc->i] + pu[pc->j]) {
178 if (nump+1 >= size) {
180 pp = xrealloc(pp, size*sizeof(plist));
185 pp[nump].i = pp[nump].j = pp[nump].p = 0;
186 qsort(pp, nump, sizeof(plist), comp_plist);
190 PRIVATE void pushC(List *c, int i, double a) {
191 if (c->nelem+1>=c->size) {
192 c->size = MAX2(8,c->size*sqrt(2));
193 c->list = xrealloc(c->list, sizeof(Litem)*c->size);
195 c->list[c->nelem].i = i;
196 c->list[c->nelem].A = a;
200 PRIVATE void mea_backtrack(const struct MEAdat *bdat, int i, int j, int pair, short *S, pf_paramT *pf){
201 /* backtrack structure for the interval [i..j] */
202 /* recursively calls itself, recomputes the necessary parts of the M matrix */
209 gq = pf->model_details.gquad;
218 /* if pair == 1, insert pair and re-compute Mi values */
219 /* else Mi is already filled */
221 if((S[i] == 3) && (S[j] == 3)){
223 get_gquad_pattern_pf(S, i, j, pf, &L, l);
225 bdat->structure[i+k-1]\
226 = bdat->structure[i+k+L+l[0]-1]\
227 = bdat->structure[i+k+2*L+l[0]+l[1]-1]\
228 = bdat->structure[i+k+3*L+l[0]+l[1]+l[2]-1]\
233 bdat->structure[i-1] = '(';
234 bdat->structure[j-1] = ')';
236 /* We've done this before in MEA() but didn't keep the results */
237 Mi[i-1]=0; Mi[i]=pu[i];
238 for (k=i+1; k<=j; k++) {
239 Mi[k] = Mi[k-1] + pu[k];
240 for (li=C[k].list; li<C[k].list+C[k].nelem && li->i >= i; li++) {
242 EA = li->A + Mi[(li->i) -1];
243 Mi[k] = MAX2(Mi[k], EA);
248 bdat->structure[i-1] = '(';
249 bdat->structure[j-1] = ')';
251 /* We've done this before in MEA() but didn't keep the results */
252 Mi[i-1]=0; Mi[i]=pu[i];
253 for (k=i+1; k<=j; k++) {
254 Mi[k] = Mi[k-1] + pu[k];
255 for (li=C[k].list; li<C[k].list+C[k].nelem && li->i >= i; li++) {
257 EA = li->A + Mi[(li->i) -1];
258 Mi[k] = MAX2(Mi[k], EA);
264 prec = DBL_EPSILON * Mi[j];
265 /* Mi values are filled, do the backtrace */
266 while (j>i && Mi[j] <= Mi[j-1] + pu[j] + prec) {
267 bdat->structure[j-1]='.';
270 for (li=C[j].list; li<C[j].list + C[j].nelem && li->i >= i; li++) {
271 if (Mi[j] <= li->A + Mi[(li->i) -1] + prec) {
272 if (li->i > i+3) mea_backtrack(bdat, i, (li->i)-1, 0, S, pf);
273 mea_backtrack(bdat, li->i, j, 1, S, pf);
277 if (fail && j>i) nrerror("backtrack failed for MEA()");