1 /* Last changed Time-stamp: <2007-09-04 11:16:01 ivo> */
4 RNA secondary structure prediction
5 with maximum distance base pairs
7 c Ivo Hofacker, Peter Stadler
19 #include "energy_par.h"
20 #include "fold_vars.h"
23 #include "loop_energies.h"
29 #include "svm_utils.h"
38 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
39 #define NEW_NINIO 1 /* new asymetry penalty */
41 #define MAXSECTORS 500 /* dimension for a backtrack array */
42 #define LOCALITY 0. /* locality parameter for base-pairs */
45 #################################
47 #################################
51 #################################
53 #################################
55 PRIVATE paramT *P = NULL;
56 PRIVATE int **c = NULL; /* energy array, given that i-j pair */
57 PRIVATE int *cc = NULL; /* linear array for calculating canonical structures */
58 PRIVATE int *cc1 = NULL; /* " " */
59 PRIVATE int *f3 = NULL; /* energy of 5' end */
60 PRIVATE int **fML = NULL; /* multi-loop auxiliary energy array */
61 PRIVATE int *Fmi = NULL; /* holds row i of fML (avoids jumps in memory) */
62 PRIVATE int *DMLi = NULL; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
63 PRIVATE int *DMLi1 = NULL; /* MIN(fML[i+1,k]+fML[k+1,j]) */
64 PRIVATE int *DMLi2 = NULL; /* MIN(fML[i+2,k]+fML[k+1,j]) */
65 PRIVATE char **ptype = NULL; /* precomputed array of pair types */
66 PRIVATE short *S = NULL, *S1 = NULL;
67 PRIVATE unsigned int length;
68 PRIVATE char *prev = NULL;
70 PRIVATE struct svm_model *avg_model = NULL;
71 PRIVATE struct svm_model *sd_model = NULL;
74 PRIVATE int with_gquad = 0;
75 PRIVATE int **ggg = NULL;
81 #pragma omp threadprivate(P, c, cc, cc1, f3, fML, Fmi, DMLi, DMLi1, DMLi2, ptype, S, S1, length, sd_model, avg_model, ggg, with_gquad)
83 #pragma omp threadprivate(P, c, cc, cc1, f3, fML, Fmi, DMLi, DMLi1, DMLi2, ptype, S, S1, length, ggg, with_gquad)
89 #################################
90 # PRIVATE FUNCTION DECLARATIONS #
91 #################################
93 PRIVATE void initialize_Lfold(int length, int maxdist);
94 PRIVATE void update_fold_params(void);
95 PRIVATE void get_arrays(unsigned int size, int maxdist);
96 PRIVATE void free_arrays(int maxdist);
97 PRIVATE void make_ptypes(const short *S, int i, int maxdist, int n);
98 PRIVATE char *backtrack(const char *sequence, int start, int maxdist);
99 PRIVATE int fill_arrays(const char *sequence, int maxdist, int zsc, double min_z);
102 #################################
103 # BEGIN OF FUNCTION DEFINITIONS #
104 #################################
107 /*--------------------------------------------------------------------------*/
108 PRIVATE void initialize_Lfold(int length, int maxdist){
110 if (length<1) nrerror("initialize_Lfold: argument must be greater 0");
111 get_arrays((unsigned) length, maxdist);
112 update_fold_params();
115 /*--------------------------------------------------------------------------*/
116 PRIVATE void get_arrays(unsigned int size, int maxdist){
118 c = (int **) space(sizeof(int *) *(size+1));
119 fML = (int **) space(sizeof(int *) *(size+1));
120 ptype = (char **) space(sizeof(char *)*(size+1));
121 f3 = (int *) space(sizeof(int) *(size+2)); /* has to be one longer */
122 cc = (int *) space(sizeof(int) *(maxdist+5));
123 cc1 = (int *) space(sizeof(int) *(maxdist+5));
124 Fmi = (int *) space(sizeof(int) *(maxdist+5));
125 DMLi = (int *) space(sizeof(int) *(maxdist+5));
126 DMLi1 = (int *) space(sizeof(int) *(maxdist+5));
127 DMLi2 = (int *) space(sizeof(int) *(maxdist+5));
129 for (i=size; (i>(int)size-maxdist-5) && (i>=0); i--) {
130 c[i] = (int *) space(sizeof(int)*(maxdist+5));
132 for (i=size; (i>(int)size-maxdist-5) && (i>=0); i--) {
133 fML[i] = (int *) space(sizeof(int)*(maxdist+5));
135 for (i=size; (i>(int)size-maxdist-5) && (i>=0); i--) {
136 ptype[i] = (char *) space(sizeof(char)*(maxdist+5));
140 /*--------------------------------------------------------------------------*/
142 PRIVATE void free_arrays(int maxdist){
144 for (i=0; (i<maxdist+5) && (i<=length); i++){
161 for (i=0; (i<maxdist+5) && (i<=length); i++){
168 f3 = cc = cc1 = Fmi = DMLi = DMLi1 = DMLi2 = NULL;
173 /*--------------------------------------------------------------------------*/
175 PUBLIC float Lfold(const char *string, char *structure, int maxdist){
176 return Lfoldz(string, structure, maxdist, 0, 0.0);
179 PUBLIC float Lfoldz(const char *string, char *structure, int maxdist, int zsc, double min_z){
182 length = (int) strlen(string);
183 if (maxdist>length) maxdist = length;
184 initialize_Lfold(length, maxdist);
185 if (fabs(P->temperature - temperature)>1e-6) update_fold_params();
187 with_gquad = P->model_details.gquad;
188 S = encode_sequence(string, 0);
189 S1 = encode_sequence(string, 1);
191 for (i=length; i>=(int)length-(int)maxdist-4 && i>0; i--)
192 make_ptypes(S, i, maxdist, length);
194 #ifdef USE_SVM /*svm*/
196 avg_model = svm_load_model_string(avg_model_string);
197 sd_model = svm_load_model_string(sd_model_string);
201 energy = fill_arrays(string, maxdist, zsc, min_z);
203 #ifdef USE_SVM /*svm*/
205 svm_destroy_model(avg_model);
206 svm_destroy_model(sd_model);
211 free_arrays(maxdist);
213 return (float) energy/100.;
216 PRIVATE int fill_arrays(const char *string, int maxdist, int zsc, double min_z) {
217 /* fill "c", "fML" and "f3" arrays and return optimal energy */
219 int i, j, k, length, energy;
221 int no_close, type, type_2, tt;
225 length = (int) strlen(string);
227 for (j=0; j<maxdist+5; j++)
228 Fmi[j]=DMLi[j]=DMLi1[j]=DMLi2[j]=INF;
229 for (j=length; j>length-maxdist-4; j--) {
230 for (i=(length-maxdist-4>0)?length-maxdist-4:1 ; i<j; i++)
231 c[i][j-i] = fML[i][j-i] = INF;
236 ggg = get_gquad_L_matrix(S, length - maxdist - 4, maxdist, ggg, P);
239 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
240 for (j = i+TURN+1; j <= length && j <= i+maxdist; j++) {
242 type = ptype[i][j-i];
244 no_close = (((type==3)||(type==4))&&no_closingGU);
246 if (type) { /* we have a pair */
247 int new_c=0, stackEnergy=INF;
248 /* hairpin ----------------------------------------------*/
250 new_c = (no_close) ? FORBIDDEN : E_Hairpin(j-i-1, type, S1[i+1], S1[j-1], string+i-1, P);
252 /*--------------------------------------------------------
253 check for elementary structures involving more than one
255 --------------------------------------------------------*/
257 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++){
258 int minq = j-i+p-MAXLOOP-2;
259 if (minq<p+1+TURN) minq = p+1+TURN;
260 for (q = minq; q < j; q++) {
261 type_2 = ptype[p][q-p];
263 if (type_2==0) continue;
264 type_2 = rtype[type_2];
267 if (no_close||(type_2==3)||(type_2==4))
268 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
270 energy = E_IntLoop(p-i-1, j-q-1, type, type_2, S1[i+1], S1[j-1], S1[p-1], S1[q+1],P);
271 new_c = MIN2(new_c, energy + c[p][q-p]);
272 if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
276 /* multi-loop decomposition ------------------------*/
278 decomp = DMLi1[j-1-(i+1)];
282 case 0: decomp += E_MLstem(tt, -1, -1, P);
285 case 2: decomp += E_MLstem(tt, S1[j-1], S1[i+1], P);
287 /* normal dangles, aka dangles = 1 */
288 default: decomp += E_MLstem(tt, -1, -1, P);
289 decomp = MIN2(decomp, DMLi2[j-1-(i+2)] + E_MLstem(tt, -1, S1[i+1], P) + P->MLbase);
290 decomp = MIN2(decomp, DMLi2[j-2-(i+2)] + E_MLstem(tt, S1[j-1], S1[i+1], P) + 2*P->MLbase);
291 decomp = MIN2(decomp, DMLi1[j-2-(i+1)] + E_MLstem(tt, S1[j-1], -1, P) + P->MLbase);
294 new_c = MIN2(new_c, decomp + P->MLclosing);
297 /* coaxial stacking of (i.j) with (i+1.k) or (k+1.j-1) */
301 for (k = i+2+TURN; k < j-2-TURN; k++) {
302 type_2 = ptype[i+1][k-i-1]; type_2 = rtype[type_2];
304 decomp = MIN2(decomp, c[i+1][k-i-1]+P->stack[type][type_2]+
306 type_2 = ptype[k+1][j-1-k-1]; type_2 = rtype[type_2];
308 decomp = MIN2(decomp, c[k+1][j-1-k-1]+P->stack[type][type_2]+
311 /* no TermAU penalty if coax stack */
312 decomp += 2*P->MLintern[1] + P->MLclosing;
313 new_c = MIN2(new_c, decomp);
317 /* include all cases where a g-quadruplex may be enclosed by base pair (i,j) */
320 energy = E_GQuad_IntLoop_L(i, j, type, S1, ggg, maxdist, P);
321 new_c = MIN2(new_c, energy);
325 new_c = MIN2(new_c, cc1[j-1-(i+1)]+stackEnergy);
328 c[i][j-i] = cc1[j-1-(i+1)]+stackEnergy;
332 } /* end >> if (pair) << */
334 else c[i][j-i] = INF;
336 /* done with c[i,j], now compute fML[i,j] */
337 /* free ends ? -----------------------------------------*/
341 case 0: new_fML = fML[i+1][j-i-1] + P->MLbase;
342 new_fML = MIN2(new_fML, fML[i][j-1-i] + P->MLbase);
343 new_fML = MIN2(new_fML, c[i][j-i] + E_MLstem(type, -1, -1, P));
346 case 2: new_fML = fML[i+1][j-i-1] + P->MLbase;
347 new_fML = MIN2(fML[i][j-1-i] + P->MLbase, new_fML);
348 new_fML = MIN2(new_fML, c[i][j-i] + E_MLstem(type, (i>1) ? S1[i-1] : -1, (j<length) ? S1[j+1] : -1, P));
350 /* normal dangles, aka dangles = 1 */
351 default: /* i unpaired */
352 new_fML = fML[i+1][j-i-1] + P->MLbase;
354 new_fML = MIN2(new_fML, fML[i][j-1-i] + P->MLbase);
356 if(type) new_fML = MIN2(new_fML, c[i][j-i] + E_MLstem(type, -1, -1, P));
358 tt = ptype[i+1][j-i-1];
359 if(tt) new_fML = MIN2(new_fML, c[i+1][j-i-1] + E_MLstem(tt, S1[i], -1, P) + P->MLbase);
361 tt = ptype[i][j-1-i];
362 if(tt) new_fML = MIN2(new_fML, c[i][j-1-i] + E_MLstem(tt, -1, S1[j], P) + P->MLbase);
364 tt = ptype[i+1][j-1-i-1];
365 if(tt) new_fML = MIN2(new_fML, c[i+1][j-1-i-1] + E_MLstem(tt, S1[i], S1[j], P) + 2*P->MLbase);
370 new_fML = MIN2(new_fML, ggg[i][j - i] + E_MLstem(0, -1, -1, P));
373 /* modular decomposition -------------------------------*/
374 for (decomp = INF, k = i+1+TURN; k <= j-2-TURN; k++)
375 decomp = MIN2(decomp, Fmi[k-i]+fML[k+1][j-k-1]);
377 DMLi[j-i] = decomp; /* store for use in ML decompositon */
378 new_fML = MIN2(new_fML, decomp);
380 /* coaxial stacking */
382 /* additional ML decomposition as two coaxially stacked helices */
383 for (decomp = INF, k = i+1+TURN; k <= j-2-TURN; k++) {
384 type = ptype[i][k-i]; type = rtype[type];
385 type_2 = ptype[k+1][j-k-1]; type_2 = rtype[type_2];
387 decomp = MIN2(decomp,
388 c[i][k-i]+c[k+1][j-k-1]+P->stack[type][type_2]);
391 decomp += 2*P->MLintern[1]; /* no TermAU penalty if coax stack */
393 /* This is needed for Y shaped ML loops with coax stacking of
394 interior pairts, but backtracking will fail if activated */
395 DMLi[j-i] = MIN2(DMLi[j-i], decomp);
396 DMLi[j-i] = MIN2(DMLi[j-i], DMLi[j-1-i]+P->MLbase);
397 DMLi[j-i] = MIN2(DMLi[j-i], DMLi1[j-(i+1)]+P->MLbase);
398 new_fML = MIN2(new_fML, DMLi[j-i]);
400 new_fML = MIN2(new_fML, decomp);
402 fML[i][j-i] = Fmi[j-i] = new_fML; /* substring energy */
405 /* calculate energies of 5' and 3' fragments */
407 static int do_backtrack = 0, prev_i=0;
412 /* dont use dangling end and mismatch contributions at all */
413 case 0: for(j=i+TURN+1; j<length && j<=i+maxdist; j++){
414 type = ptype[i][j-i];
417 f3[i] = MIN2(f3[i], f3[j+1] + ggg[i][j-i]);
421 f3[i] = MIN2(f3[i], f3[j+1] + c[i][j-i] + E_ExtLoop(type, -1, -1, P));
423 if(length<=i+maxdist){
427 f3[i] = MIN2(f3[i], ggg[i][j-i]);
430 type = ptype[i][j-i];
432 f3[i] = MIN2(f3[i], c[i][j-i] + E_ExtLoop(type, -1, -1, P));
435 /* always use dangles on both sides */
436 case 2: for(j=i+TURN+1; j<length && j<=i+maxdist; j++){
437 type = ptype[i][j-i];
440 if(ggg[i][j-i] != INF)
441 f3[i] = MIN2(f3[i], f3[j+1] + ggg[i][j-i]);
445 f3[i] = MIN2(f3[i], f3[j+1] + c[i][j-i] + E_ExtLoop(type, (i>1) ? S1[i-1] : -1, S1[j+1], P));
447 if(length<=i+maxdist){
451 f3[i] = MIN2(f3[i], ggg[i][j-i]);
454 type = ptype[i][j-i];
456 f3[i] = MIN2(f3[i], c[i][j-i] + E_ExtLoop(type, (i>1) ? S1[i-1] : -1, -1, P));
459 /* normal dangles, aka dangles = 1 */
460 default: for(j=i+TURN+1; j<length && j<=i+maxdist; j++){
461 type = ptype[i][j-i];
464 f3[i] = MIN2(f3[i], f3[j+1] + ggg[i][j-i]);
468 f3[i] = MIN2(f3[i], f3[j+1] + c[i][j-i] + E_ExtLoop(type, -1, -1, P));
469 f3[i] = MIN2(f3[i], ((j+2<=length) ? f3[j+2] : 0) + c[i][j-i] + E_ExtLoop(type, -1, S1[j+1], P));
471 type = ptype[i+1][j-i-1];
473 f3[i] = MIN2(f3[i], f3[j+1] + c[i+1][j-i-1] + E_ExtLoop(type, S1[i], -1, P));
474 f3[i] = MIN2(f3[i], ((j + 1 < length) ? f3[j+2] : 0) + c[i+1][j-i-1] + E_ExtLoop(type, S1[i], S1[j+1], P));
477 if(length<=i+maxdist){
481 f3[i] = MIN2(f3[i], ggg[i][j-i]);
484 type = ptype[i][j-i];
486 f3[i] = MIN2(f3[i], c[i][j-i] + E_ExtLoop(type, -1, -1, P));
487 type = ptype[i+1][j-i-1];
489 f3[i] = MIN2(f3[i], c[i+1][j-i-1] + E_ExtLoop(type, S1[i], -1, P));
492 } /* switch(dangles)... */
494 /* backtrack partial structure */
495 if (f3[i] < f3[i+1]) do_backtrack=1;
496 else if (do_backtrack) {
497 int pairpartner; /*i+1?? is paired with pairpartner*/
503 /*start "short" backtrack*/
506 while(fij==f3[lind+1]) lind++;
509 for (pairpartner = lind + TURN; pairpartner <= lind + maxdist; pairpartner++){
510 type = ptype[lind][pairpartner-lind];
513 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, -1, -1, P);
514 if(fij == cc + f3[pairpartner + 1])
517 else if(with_gquad) {
518 cc = ggg[lind][pairpartner-lind];
519 if(fij == cc + f3[pairpartner + 1])
525 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, (lind > 1) ? S1[lind-1] : -1, (pairpartner < length) ? S1[pairpartner+1] : -1, P);
526 if(fij == cc + f3[pairpartner + 1])
530 cc = ggg[lind][pairpartner-lind];
531 if(fij == cc + f3[pairpartner + 1])
537 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, -1, -1, P);
538 if(fij == cc + f3[pairpartner + 1]){
542 else if(pairpartner < length){
543 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, -1, S1[pairpartner+1], P);
544 if(fij == cc + f3[pairpartner + 2]){
551 cc = ggg[lind][pairpartner-lind];
552 if(fij == cc + f3[pairpartner + 1])
556 type = ptype[lind+1][pairpartner-lind-1];
558 cc = c[lind+1][pairpartner-(lind+1)] + E_ExtLoop(type, S1[lind], -1, P);
559 if(fij == cc + f3[pairpartner+1]){
563 else if(pairpartner < length){
564 cc = c[lind+1][pairpartner-(lind+1)] + E_ExtLoop(type, S1[lind], S1[pairpartner+1], P);
565 if(fij == cc + f3[pairpartner+2])
573 if (!traced2) nrerror("backtrack failed in short backtrack 1");
577 double average_free_energy;
578 double sd_free_energy;
580 int *AUGC = get_seq_composition(S, lind-1, MIN2((pairpartner+1),length));
582 average_free_energy = avg_regression(AUGC[0], AUGC[1], AUGC[2], AUGC[3], AUGC[4], avg_model, &info_avg);
585 double min_sd = minimal_sd(AUGC[0],AUGC[1],AUGC[2],AUGC[3],AUGC[4]);
586 difference=(fij-f3[pairpartner+1])/100.-average_free_energy;
587 if ( difference - ( min_z * min_sd ) <= 0.0001 ) {
588 sd_free_energy = sd_regression(AUGC[0],AUGC[1],AUGC[2],AUGC[3],AUGC[4],sd_model);
589 my_z=difference/sd_free_energy;
591 ss = backtrack(string, lind , pairpartner+1);
593 if ((i+strlen(ss)<prev_i+strlen(prev)) ||
594 strncmp(ss+prev_i-i,prev,strlen(prev))) { /* ss does not contain prev */
596 printf(".%s (%6.2f) %4d z= %.3f\n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)-1])/100., prev_i-1, prevz);
598 printf("%s (%6.2f) %4d z=%.3f\n ", prev, (f3[prev_i]-f3[prev_i+strlen(prev)])/100., prev_i, prevz);
602 prev=ss; prev_i = lind; prevz=my_z;
612 /* original code for Lfold*/
613 ss = backtrack(string, lind , pairpartner+1);
615 if ((i+strlen(ss)<prev_i+strlen(prev)) || strncmp(ss+prev_i-i,prev,strlen(prev))){
616 /* ss does not contain prev */
618 printf(".%s (%6.2f) %4d\n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)-1])/100., prev_i-1);
620 printf("%s (%6.2f) %4d\n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)])/100., prev_i);
633 printf(".%s (%6.2f) %4d z= %.2f\n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)-1])/100., prev_i-1, prevz);
635 printf("%s (%6.2f) %4dz= %.2f \n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)])/100., prev_i, prevz);
636 free(prev); prev=NULL;
640 printf(".%s (%6.2f) %4d\n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)-1])/100., prev_i-1);
642 printf("%s (%6.2f) %4d\n", prev, (f3[prev_i]-f3[prev_i+strlen(prev)])/100., prev_i);
644 } else if (f3[i]<0)do_backtrack=1;
647 int pairpartner; /*i+1?? is paired with pairpartner*/
649 double average_free_energy;
650 double sd_free_energy;
656 while(fij==f3[lind+1]) lind++;
658 for(pairpartner = lind + TURN; pairpartner <= lind + maxdist; pairpartner++){
659 type = ptype[lind][pairpartner-lind];
662 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, -1, -1, P);
663 if(fij == cc + f3[pairpartner + 1])
667 cc = ggg[lind][pairpartner-lind];
668 if(fij == cc + f3[pairpartner + 1])
674 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, (lind > 1) ? S1[lind-1] : -1, (pairpartner < length) ? S1[pairpartner+1] : -1, P);
675 if(fij == cc + f3[pairpartner + 1])
679 cc = ggg[lind][pairpartner-lind];
680 if(fij == cc + f3[pairpartner + 1])
686 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, -1, -1, P);
687 if(fij == cc + f3[pairpartner + 1]){
691 else if(pairpartner < length){
692 cc = c[lind][pairpartner-lind] + E_ExtLoop(type, -1, S1[pairpartner + 1], P);
693 if(fij == cc + f3[pairpartner + 1]){
700 cc = ggg[lind][pairpartner-lind];
701 if(fij == cc + f3[pairpartner + 1])
705 type = ptype[lind+1][pairpartner-lind-1];
707 cc = c[lind+1][pairpartner-(lind+1)] + E_ExtLoop(type, S1[lind], -1, P);
708 if(fij == cc + f3[pairpartner+1]){
712 else if (pairpartner < length){
713 cc = c[lind+1][pairpartner-(lind+1)] + E_ExtLoop(type, S1[lind], S1[pairpartner+1], P);
714 if(fij == cc + f3[pairpartner + 2]){
723 if (!traced2) nrerror("backtrack failed in short backtrack 2");
727 int *AUGC = get_seq_composition(S, lind-1, MIN2((pairpartner+1),length));
728 average_free_energy = avg_regression(AUGC[0],AUGC[1],AUGC[2],AUGC[3],AUGC[4],avg_model,&info_avg);
731 double min_sd = minimal_sd(AUGC[0],AUGC[1],AUGC[2],AUGC[3],AUGC[4]);
732 difference=(fij-f3[pairpartner+1])/100.-average_free_energy;
733 if ( difference - ( min_z * min_sd ) <= 0.0001 ) {
734 sd_free_energy = sd_regression(AUGC[0],AUGC[1],AUGC[2],AUGC[3],AUGC[4],sd_model);
735 my_z=difference/sd_free_energy;
737 ss = backtrack(string, lind , pairpartner+1);
738 printf("%s (%6.2f) %4d z= %.2f\n", ss, (f3[lind]-f3[lind+strlen(ss)-1])/100., lind, my_z);
746 ss = backtrack(string, lind , pairpartner+1);
748 printf("%s (%6.2f) %4d\n", ss, (f3[lind]-f3[lind+strlen(ss)-1])/100., 1);
750 printf("%s (%6.2f) %4d\n", ss, (f3[lind]-f3[lind+strlen(ss)])/100., 1);
758 int ii, *FF; /* rotate the auxilliary arrays */
759 FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
760 FF = cc1; cc1=cc; cc=FF;
761 for (j=0; j< maxdist+5; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
762 if (i+maxdist+4<=length) {
763 c[i-1] = c[i+maxdist+4]; c[i+maxdist+4] = NULL;
764 fML[i-1] = fML[i+maxdist+4]; fML[i+maxdist+4]=NULL;
765 ptype[i-1] = ptype[i+maxdist+4]; ptype[i+maxdist+4] = NULL;
767 make_ptypes(S, i-1, maxdist, length);
770 ggg = get_gquad_L_matrix(S, i - 1, maxdist, ggg, P);
773 for (ii=0; ii<maxdist+5; ii++) {
784 PRIVATE char *backtrack(const char *string, int start, int maxdist){
785 /*------------------------------------------------------------------
786 trace back through the "c", "f3" and "fML" arrays to get the
787 base pairing list. No search for equivalent structures is done.
788 This is fast, since only few structure elements are recalculated.
789 ------------------------------------------------------------------*/
790 sect sector[MAXSECTORS]; /* backtracking sectors */
791 int i, j, k, energy, new, no_close, type, type_2, tt, s=0;
794 /* length = strlen(string); */
795 sector[++s].i = start;
796 sector[s].j = MIN2(length, maxdist+1);
797 sector[s].ml = (backtrack_type=='M') ? 1 : ((backtrack_type=='C')?2:0);
799 structure = (char *) space((MIN2(length-start, maxdist)+3)*sizeof(char));
800 for (i=0; i<=MIN2(length-start, maxdist); i++) structure[i] = '-';
803 int ml, fij, cij, traced, i1, j1, d3, d5, mm, mm5, mm3, mm53, p, q, jj=0, gq=0;
804 int canonical = 1; /* (i,j) closes a canonical structure */
807 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
808 occur in the fML- (1) or in the f-array (0) */
810 structure[i-start] = '(';
811 structure[j-start] = ')';
815 if (j < i+TURN+1) continue; /* no more pairs in this interval */
817 fij = (ml)? fML[i][j-i] : f3[i];
819 if (ml == 0) { /* backtrack in f3 */
821 if (fij == f3[i+1]) {
827 /* i or i+1 is paired. Find pairing partner */
829 case 0: for(traced = 0, k=j; k>i+TURN; k--){
832 if(fij == ggg[i][k-i] + f3[k+1]){
833 /* found the decomposition */
834 traced = i; jj = k + 1; gq = 1;
840 type = ptype[i][k-i];
842 if(fij == c[i][k-i] + E_ExtLoop(type, -1, -1, P) + f3[k+1]){
848 case 2: for(traced = 0, k=j; k>i+TURN; k--){
851 if(fij == ggg[i][k-i] + f3[k+1]){
852 /* found the decomposition */
853 traced = i; jj = k + 1; gq = 1;
859 type = ptype[i][k-i];
861 if(fij == c[i][k-i] + E_ExtLoop(type, (i>1) ? S1[i-1] : -1, (k<length) ? S1[k+1] : -1, P) + f3[k+1]){
867 default: for(traced = 0,k=j; k>i+TURN; k--){
870 if(fij == ggg[i][k-i] + f3[k+1]){
871 /* found the decomposition */
872 traced = i; jj = k + 1; gq = 1;
878 type = ptype[i+1][k-(i+1)];
880 if(fij == c[i+1][k-(i+1)] + E_ExtLoop(type, S1[i], -1, P) + f3[k+1]){
884 if(fij == c[i+1][k-(i+1)] + E_ExtLoop(type, S1[i], S1[k+1], P) + f3[k+2]){
890 type = ptype[i][k-i];
892 if(fij == c[i][k-i] + E_ExtLoop(type, -1, -1, P) + f3[k+1]){
896 if(fij == c[i][k-i] + E_ExtLoop(type, -1, S1[k+1], P) + f3[k+2]){
905 } /* switch(dangles)...*/
907 if (!traced) nrerror("backtrack failed in f3");
908 if (j==length) { /* backtrack only one component, unless j==length */
915 if(with_gquad && gq){
916 /* goto backtrace of gquadruplex */
920 structure[i-start] = '('; structure[j-start] = ')';
921 if (((jj==j+2) || (dangles==2)) && (j < length)) structure[j+1-start] = '.';
924 else { /* trace back in fML array */
925 if (fML[i][j-1-i]+P->MLbase == fij) { /* 3' end is unpaired */
931 if (fML[i+1][j-(i+1)]+P->MLbase == fij) { /* 5' end is unpaired */
939 if(fij == ggg[i][j-i] + E_MLstem(0, -1, -1, P)){
940 /* go to backtracing of quadruplex */
946 case 0: tt = ptype[i][j-i];
947 if(fij == c[i][j-i] + E_MLstem(tt, -1, -1, P)){
948 structure[i-start] = '(';
949 structure[j-start] = ')';
953 case 2: tt = ptype[i][j-i];
954 if(fij == c[i][j-i] + E_MLstem(tt, (i>1) ? S1[i-1] : -1, (j < length) ? S1[j+1] : -1, P)){
955 structure[i-start] = '(';
956 structure[j-start] = ')';
960 default: tt = ptype[i][j-i];
961 if(fij == c[i][j-i] + E_MLstem(tt, -1, -1, P)){
962 structure[i-start] = '(';
963 structure[j-start] = ')';
966 tt = ptype[i+1][j-(i+1)];
967 if(fij == c[i+1][j-(i+1)] + E_MLstem(tt, S1[i], -1, P) + P->MLbase){
968 structure[++i-start] = '(';
969 structure[j-start] = ')';
972 tt = ptype[i][j-1-i];
973 if(fij == c[i][j-1-i] + E_MLstem(tt, -1, S1[j], P) + P->MLbase){
974 structure[i-start] = '(';
975 structure[--j-start] = ')';
978 tt = ptype[i+1][j-1-(i+1)];
979 if(fij == c[i+1][j-1-(i+1)] + E_MLstem(tt, S1[i], S1[j], P) + 2*P->MLbase){
980 structure[++i-start] = '(';
981 structure[--j-start] = ')';
985 } /* switch(dangles)... */
987 /* modular decomposition */
988 for (k = i+1+TURN; k <= j-2-TURN; k++)
989 if (fij == (fML[i][k-i]+fML[k+1][j-(k+1)]))
992 if ((dangles==3)&&(k>j-2-TURN)) { /* must be coax stack */
994 for (k = i+1+TURN; k <= j-2-TURN; k++) {
995 type = ptype[i][k-i]; type= rtype[type];
996 type_2 = ptype[k+1][j-(k+1)]; type_2= rtype[type_2];
998 if (fij == c[i][k-i]+c[k+1][j-(k+1)]+P->stack[type][type_2]+
1007 sector[++s].i = k+1;
1011 if (k>j-2-TURN) nrerror("backtrack failed in fML");
1017 /*----- begin of "repeat:" -----*/
1018 if (canonical) cij = c[i][j-i];
1020 type = ptype[i][j-i];
1024 if (cij == c[i][j-i]) {
1025 /* (i.j) closes canonical structures, thus
1026 (i+1.j-1) must be a pair */
1027 type_2 = ptype[i+1][j-1-(i+1)]; type_2 = rtype[type_2];
1028 cij -= P->stack[type][type_2];
1029 structure[i+1-start] = '('; structure[j-1-start] = ')';
1037 no_close = (((type==3)||(type==4))&&no_closingGU);
1039 if (cij == FORBIDDEN) continue;
1041 if (cij == E_Hairpin(j-i-1, type, S1[i+1], S1[j-1],string+i-1, P))
1044 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
1046 minq = j-i+p-MAXLOOP-2;
1047 if (minq<p+1+TURN) minq = p+1+TURN;
1048 for (q = j-1; q >= minq; q--) {
1050 type_2 = ptype[p][q-p];
1051 if (type_2==0) continue;
1052 type_2 = rtype[type_2];
1054 if (no_close||(type_2==3)||(type_2==4))
1055 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
1057 /* energy = oldLoopEnergy(i, j, p, q, type, type_2); */
1058 energy = E_IntLoop(p-i-1, j-q-1, type, type_2, S1[i+1], S1[j-1], S1[p-1], S1[q+1],P);
1060 new = energy+c[p][q-p];
1061 traced = (cij == new);
1063 structure[p-start] = '(';
1064 structure[q-start] = ')';
1071 /* end of repeat: --------------------------------------------------*/
1073 /* (i.j) must close a multi-loop */
1079 The case that is handled here actually resembles something like
1080 an interior loop where the enclosing base pair is of regular
1081 kind and the enclosed pair is not a canonical one but a g-quadruplex
1082 that should then be decomposed further...
1084 if(backtrack_GQuad_IntLoop_L(cij, i, j, type, S, ggg, maxdist, &p, &q, P)){
1090 sector[s+1].ml = sector[s+2].ml = 1;
1093 case 0: mm = P->MLclosing + E_MLstem(tt, -1, -1, P);
1094 for(k = i+2+TURN; k < j-2-TURN; k++){
1095 if(cij == fML[i+1][k-(i+1)] + fML[k+1][j-1-(k+1)] + mm)
1099 case 2: mm = P->MLclosing + E_MLstem(tt, S1[j-1], S1[i+1], P);
1100 for(k = i+2+TURN; k < j-2-TURN; k++){
1101 if(cij == fML[i+1][k-(i+1)] + fML[k+1][j-1-(k+1)] + mm)
1105 default: mm = P->MLclosing + E_MLstem(tt, -1, -1, P);
1106 mm5 = P->MLclosing + E_MLstem(tt, S1[j-1], -1, P) + P->MLbase;
1107 mm3 = P->MLclosing + E_MLstem(tt, -1, S1[i+1], P) + P->MLbase;
1108 mm53 = P->MLclosing + E_MLstem(tt, S1[j-1], S1[i+1], P) + 2*P->MLbase;
1109 for(k = i+2+TURN; k < j-2-TURN; k++){
1110 if(cij == fML[i+1][k-(i+1)] + fML[k+1][j-1-(k+1)] + mm)
1112 else if(cij == fML[i+2][k-(i+2)] + fML[k+1][j-1-(k+1)] + mm3){
1116 else if(cij == fML[i+1][k-(i+1)] + fML[k+1][j-2-(k+1)] + mm5){
1120 else if(cij == fML[i+2][k-(i+2)] + fML[k+1][j-2-(k+1)] + mm53){
1125 /* coaxial stacking of (i.j) with (i+1.k) or (k.j-1) */
1126 /* use MLintern[1] since coax stacked pairs don't get TerminalAU */
1129 type_2 = ptype[i+1][k-(i+1)]; type_2 = rtype[type_2];
1131 en = c[i+1][k-(i+1)]+P->stack[type][type_2]+fML[k+1][j-1-(k+1)];
1132 if (cij == en+2*P->MLintern[1]+P->MLclosing) {
1138 type_2 = ptype[k+1][j-1-(k+1)]; type_2 = rtype[type_2];
1140 en = c[k+1][j-1-(k+1)]+P->stack[type][type_2]+fML[i+1][k-(i+1)];
1141 if (cij == en+2*P->MLintern[1]+P->MLclosing) {
1149 } /* switch(dangles)... */
1151 if (k<=j-3-TURN) { /* found the decomposition */
1154 sector[++s].i = k+1;
1158 /* Y shaped ML loops fon't work yet */
1160 /* (i,j) must close a Y shaped ML loop with coax stacking */
1161 if (cij == fML[i+1][j-2-(i+2)] + mm + d3 + d5 + P->MLbase + P->MLbase) {
1164 } else if (cij == fML[i+1][j-2-(i+1)] + mm + d5 + P->MLbase)
1166 else if (cij == fML[i+2][j-1-(i+2)] + mm + d3 + P->MLbase)
1168 else /* last chance */
1169 if (cij != fML[i+1][j-1-(i+1)] + mm + P->MLbase)
1170 fprintf(stderr, "backtracking failed in repeat");
1171 /* if we arrive here we can express cij via fML[i1,j1]+dangles */
1177 nrerror("backtracking failed in repeat");
1180 continue; /* this is a workarround to not accidentally proceed in the following block */
1184 now we do some fancy stuff to backtrace the stacksize and linker lengths
1185 of the g-quadruplex that should reside within position i,j
1191 get_gquad_pattern_mfe(S, i, j, P, &L, l);
1193 /* fill the G's of the quadruplex into the structure string */
1195 structure[i+a-start] = '+';
1196 structure[i+L+l[0]+a-start] = '+';
1197 structure[i+L+l[0]+L+l[1]+a-start] = '+';
1198 structure[i+L+l[0]+L+l[1]+L+l[2]+a-start] = '+';
1200 goto repeat_gquad_exit;
1202 nrerror("backtracking failed in repeat_gquad");
1209 for (i=strlen(structure)-1; i>0 && structure[i] == '-'; i--)
1210 structure[i] = '\0';
1212 if (structure[i]=='-') structure[i]='.';
1218 PRIVATE void update_fold_params(void){
1220 P = scale_parameters();
1224 /*---------------------------------------------------------------------------*/
1226 PRIVATE void make_ptypes(const short *S, int i, int maxdist, int n) {
1229 for (k=TURN+1; k<maxdist; k++) {
1232 type = pair[S[i]][S[j]];
1233 if (noLonelyPairs && type) {
1234 if (!ptype[i+1][j-1-i-1])
1235 if (j==n || i==1 || (!pair[S[i-1]][S[j+1]])) type=0;