1 /* Last changed Time-stamp: <2008-12-03 17:44:38 ivo> */
4 RNA secondary structure prediction
6 c Ivo Hofacker, Chrisoph Flamm
7 original implementation by
22 #include "energy_par.h"
23 #include "fold_vars.h"
28 #include "loop_energies.h"
38 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
39 #define NEW_NINIO 1 /* new asymetry penalty */
40 #define MAXSECTORS 500 /* dimension for a backtrack array */
41 #define LOCALITY 0. /* locality parameter for base-pairs */
43 #define TURN 0 /* reset minimal base pair span for intermolecular pairings */
44 #define TURN2 3 /* used by zukersubopt */
45 #define SAME_STRAND(I,J) (((I)>=cut_point)||((J)<cut_point))
48 #################################
50 #################################
55 #################################
57 #################################
60 PRIVATE float mfe1, mfe2; /* minimum free energies of the monomers */
61 PRIVATE int *indx = NULL; /* index for moving in the triangle matrices c[] and fMl[]*/
62 PRIVATE int *c = NULL; /* energy array, given that i-j pair */
63 PRIVATE int *cc = NULL; /* linear array for calculating canonical structures */
64 PRIVATE int *cc1 = NULL; /* " " */
65 PRIVATE int *f5 = NULL; /* energy of 5' end */
66 PRIVATE int *fc = NULL; /* energy from i to cutpoint (and vice versa if i>cut) */
67 PRIVATE int *fML = NULL; /* multi-loop auxiliary energy array */
68 PRIVATE int *fM1 = NULL; /* second ML array, only for subopt */
69 PRIVATE int *Fmi = NULL; /* holds row i of fML (avoids jumps in memory) */
70 PRIVATE int *DMLi = NULL; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
71 PRIVATE int *DMLi1 = NULL; /* MIN(fML[i+1,k]+fML[k+1,j]) */
72 PRIVATE int *DMLi2 = NULL; /* MIN(fML[i+2,k]+fML[k+1,j]) */
73 PRIVATE char *ptype = NULL; /* precomputed array of pair types */
74 PRIVATE short *S = NULL, *S1 = NULL;
75 PRIVATE paramT *P = NULL;
76 PRIVATE int init_length = -1;
77 PRIVATE int zuker = 0; /* Do Zuker style suboptimals? */
78 PRIVATE sect sector[MAXSECTORS]; /* stack for backtracking */
80 PRIVATE bondT *base_pair2 = NULL;
81 PRIVATE int *BP; /* contains the structure constrainsts: BP[i]
82 -1: | = base must be paired
83 -2: < = base must be paired with j<i
84 -3: > = base must be paired with j>i
85 -4: x = base must not pair
86 positive int: base is paired with int */
87 PRIVATE int struct_constrained = 0;
88 PRIVATE int with_gquad = 0;
90 PRIVATE int *ggg = NULL; /* minimum free energies of the gquadruplexes */
94 #pragma omp threadprivate(mfe1, mfe2, indx, c, cc, cc1, f5, fc, fML, fM1, Fmi, DMLi, DMLi1, DMLi2,\
95 ptype, S, S1, P, zuker, sector, length, base_pair2, BP, struct_constrained,\
101 #################################
102 # PRIVATE FUNCTION DECLARATIONS #
103 #################################
106 PRIVATE void init_cofold(int length, paramT *parameters);
107 PRIVATE void get_arrays(unsigned int size);
108 /* PRIVATE void scale_parameters(void); */
109 PRIVATE void make_ptypes(const short *S, const char *structure);
110 PRIVATE void backtrack(const char *sequence);
111 PRIVATE int fill_arrays(const char *sequence);
112 PRIVATE void free_end(int *array, int i, int start);
115 #################################
116 # BEGIN OF FUNCTION DEFINITIONS #
117 #################################
120 /*--------------------------------------------------------------------------*/
121 PRIVATE void init_cofold(int length, paramT *parameters){
124 /* Explicitly turn off dynamic threads */
128 if (length<1) nrerror("init_cofold: argument must be greater 0");
130 get_arrays((unsigned) length);
133 indx = get_indx((unsigned) length);
135 update_cofold_params_par(parameters);
138 /*--------------------------------------------------------------------------*/
140 PRIVATE void get_arrays(unsigned int size){
141 if(size >= (unsigned int)sqrt((double)INT_MAX))
142 nrerror("get_arrays@cofold.c: sequence length exceeds addressable range");
144 c = (int *) space(sizeof(int)*((size*(size+1))/2+2));
145 fML = (int *) space(sizeof(int)*((size*(size+1))/2+2));
147 fM1 = (int *) space(sizeof(int)*((size*(size+1))/2+2));
149 ptype = (char *) space(sizeof(char)*((size*(size+1))/2+2));
150 f5 = (int *) space(sizeof(int)*(size+2));
151 fc = (int *) space(sizeof(int)*(size+2));
152 cc = (int *) space(sizeof(int)*(size+2));
153 cc1 = (int *) space(sizeof(int)*(size+2));
154 Fmi = (int *) space(sizeof(int)*(size+1));
155 DMLi = (int *) space(sizeof(int)*(size+1));
156 DMLi1 = (int *) space(sizeof(int)*(size+1));
157 DMLi2 = (int *) space(sizeof(int)*(size+1));
159 base_pair2 = (bondT *) space(sizeof(bondT)*(1+size/2+200)); /* add a guess of how many G's may be involved in a G quadruplex */
162 /*--------------------------------------------------------------------------*/
164 PUBLIC void free_co_arrays(void){
172 if(ptype) free(ptype);
174 if(base_pair2) free(base_pair2);
177 if(DMLi1) free(DMLi1);
178 if(DMLi2) free(DMLi2);
182 indx = c = fML = f5 = cc = cc1 = fc = fM1 = Fmi = DMLi = DMLi1 = DMLi2 = ggg = NULL;
190 /*--------------------------------------------------------------------------*/
192 PUBLIC void export_cofold_arrays_gq( int **f5_p,
201 /* make the DP arrays available to routines such as subopt() */
202 *f5_p = f5; *c_p = c;
203 *fML_p = fML; *fM1_p = fM1;
205 *indx_p = indx; *ptype_p = ptype;
209 PUBLIC void export_cofold_arrays( int **f5_p,
217 /* make the DP arrays available to routines such as subopt() */
218 *f5_p = f5; *c_p = c;
219 *fML_p = fML; *fM1_p = fM1;
220 *indx_p = indx; *ptype_p = ptype;
224 /*--------------------------------------------------------------------------*/
226 PUBLIC float cofold(const char *string, char *structure) {
227 return cofold_par(string, structure, NULL, fold_constrained);
230 PUBLIC float cofold_par(const char *string,
235 int i, length, energy, bonus=0, bonus_cnt=0;
239 struct_constrained = is_constrained;
240 length = (int) strlen(string);
243 /* always init everything since all global static variables are uninitialized when entering a thread */
244 init_cofold(length, parameters);
246 if(parameters) init_cofold(length, parameters);
247 else if (length>init_length) init_cofold(length, parameters);
248 else if (fabs(P->temperature - temperature)>1e-6) update_cofold_params_par(parameters);
251 with_gquad = P->model_details.gquad;
252 S = encode_sequence(string, 0);
253 S1 = encode_sequence(string, 1);
254 S1[0] = S[0]; /* store length at pos. 0 */
256 BP = (int *)space(sizeof(int)*(length+2));
257 make_ptypes(S, structure);
259 energy = fill_arrays(string);
264 parenthesis_structure(structure, base_pair2, length);
266 letter_structure(structure, base_pair2, length);
270 * Backward compatibility:
271 * This block may be removed if deprecated functions
272 * relying on the global variable "base_pair" vanish from within the package!
274 base_pair = base_pair2;
277 if(base_pair) free(base_pair);
278 base_pair = (bondT *)space(sizeof(bondT) * (1+length/2));
279 memcpy(base_pair, base_pair2, sizeof(bondT) * (1+length/2));
283 /* check constraints */
284 for(i=1;i<=length;i++) {
285 if((BP[i]<0)&&(BP[i]>-4)) {
287 if((BP[i]==-3)&&(structure[i-1]==')')) bonus++;
288 if((BP[i]==-2)&&(structure[i-1]=='(')) bonus++;
289 if((BP[i]==-1)&&(structure[i-1]!='.')) bonus++;
295 for(l=1; l<=base_pair2[0].i; l++)
296 if(base_pair2[l].i != base_pair2[l].j)
297 if((i==base_pair2[l].i)&&(BP[i]==base_pair2[l].j)) bonus++;
301 if (bonus_cnt>bonus) fprintf(stderr,"\ncould not enforce all constraints\n");
304 free(S); free(S1); free(BP);
306 energy += bonus; /*remove bonus energies from result */
308 if (backtrack_type=='C')
309 return (float) c[indx[length]+1]/100.;
310 else if (backtrack_type=='M')
311 return (float) fML[indx[length]+1]/100.;
313 return (float) energy/100.;
316 PRIVATE int fill_arrays(const char *string) {
317 /* fill "c", "fML" and "f5" arrays and return optimal energy */
319 int i, j, k, length, energy;
320 int decomp, new_fML, max_separation;
321 int no_close, type, type_2, tt, maxj;
323 int dangle_model = P->model_details.dangles;
324 int noGUclosure = P->model_details.noGUclosure;
325 int noLP = P->model_details.noLP;
327 length = (int) strlen(string);
329 max_separation = (int) ((1.-LOCALITY)*(double)(length-2)); /* not in use */
332 ggg = get_gquad_matrix(S, P);
334 for (j=1; j<=length; j++) {
335 Fmi[j]=DMLi[j]=DMLi1[j]=DMLi2[j]=INF;
339 for (j = 1; j<=length; j++)
340 for (i=1; i<=j; i++) {
341 c[indx[j]+i] = fML[indx[j]+i] = INF;
342 if (uniq_ML) fM1[indx[j]+i] = INF;
345 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
347 maxj=(zuker)? (MIN2(i+cut_point-1,length)):length;
348 for (j = i+TURN+1; j <= maxj; j++) {
354 /* enforcing structure constraints */
355 if ((BP[i]==j)||(BP[i]==-1)||(BP[i]==-2)) bonus -= BONUS;
356 if ((BP[j]==-1)||(BP[j]==-3)) bonus -= BONUS;
357 if ((BP[i]==-4)||(BP[j]==-4)) type=0;
359 no_close = (((type==3)||(type==4))&&noGUclosure&&(bonus==0));
361 if (j-i-1 > max_separation) type = 0; /* forces locality degree */
363 if (type) { /* we have a pair */
364 int new_c=0, stackEnergy=INF;
366 si = SAME_STRAND(i, i+1) ? S1[i+1] : -1;
367 sj = SAME_STRAND(j-1, j) ? S1[j-1] : -1;
368 /* hairpin ----------------------------------------------*/
370 if (SAME_STRAND(i,j)) {
371 if (no_close) new_c = FORBIDDEN;
373 new_c = E_Hairpin(j-i-1, type, si, sj, string+i-1, P);
377 new_c += E_ExtLoop(rtype[type], sj, si, P);
379 new_c += E_ExtLoop(rtype[type], -1, -1, P);
381 /*--------------------------------------------------------
382 check for elementary structures involving more than one
384 --------------------------------------------------------*/
386 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++) {
387 int minq = j-i+p-MAXLOOP-2;
388 if (minq<p+1+TURN) minq = p+1+TURN;
389 for (q = minq; q < j; q++) {
390 type_2 = ptype[indx[q]+p];
392 if (type_2==0) continue;
393 type_2 = rtype[type_2];
396 if (no_close||(type_2==3)||(type_2==4))
397 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
399 if (SAME_STRAND(i,p) && SAME_STRAND(q,j))
400 energy = E_IntLoop(p-i-1, j-q-1, type, type_2, si, sj, S1[p-1], S1[q+1], P);
402 energy = E_IntLoop_Co(rtype[type], rtype[type_2],
410 new_c = MIN2(energy+c[indx[q]+p], new_c);
411 if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
416 /* multi-loop decomposition ------------------------*/
422 if((si >= 0) && (sj >= 0)){
425 MLenergy = P->MLclosing;
426 switch(dangle_model){
427 case 0: MLenergy += decomp + E_MLstem(tt, -1, -1, P);
429 case 2: MLenergy += decomp + E_MLstem(tt, sj, si, P);
431 default: decomp += E_MLstem(tt, -1, -1, P);
432 decomp = MIN2(decomp, DMLi1[j-2] + E_MLstem(tt, sj, -1, P) + P->MLbase);
433 decomp = MIN2(decomp, DMLi2[j-1] + E_MLstem(tt, -1, si, P) + P->MLbase);
434 decomp = MIN2(decomp, DMLi2[j-2] + E_MLstem(tt, sj, si, P) + 2*P->MLbase);
438 new_c = MIN2(new_c, MLenergy);
441 if (!SAME_STRAND(i,j)) { /* cut is somewhere in the multiloop*/
442 decomp = fc[i+1] + fc[j-1];
444 switch(dangle_model){
445 case 0: decomp += E_ExtLoop(tt, -1, -1, P);
447 case 2: decomp += E_ExtLoop(tt, sj, si, P);
449 default: decomp += E_ExtLoop(tt, -1, -1, P);
450 decomp = MIN2(decomp, fc[i+2] + fc[j-2] + E_ExtLoop(tt, sj, si, P));
451 decomp = MIN2(decomp, fc[i+2] + fc[j-1] + E_ExtLoop(tt, -1, si, P));
452 decomp = MIN2(decomp, fc[i+1] + fc[j-2] + E_ExtLoop(tt, sj, -1, P));
455 new_c = MIN2(new_c, decomp);
457 } /* end >> if (!no_close) << */
459 /* coaxial stacking of (i.j) with (i+1.k) or (k+1.j-1) */
461 if (dangle_model==3) {
463 for (k = i+2+TURN; k < j-2-TURN; k++) {
464 type_2 = ptype[indx[k]+i+1]; type_2 = rtype[type_2];
466 decomp = MIN2(decomp, c[indx[k]+i+1]+P->stack[type][type_2]+
468 type_2 = ptype[indx[j-1]+k+1]; type_2 = rtype[type_2];
470 decomp = MIN2(decomp, c[indx[j-1]+k+1]+P->stack[type][type_2]+
473 /* no TermAU penalty if coax stack */
474 decomp += 2*P->MLintern[1] + P->MLclosing;
475 new_c = MIN2(new_c, decomp);
479 /* include all cases where a g-quadruplex may be enclosed by base pair (i,j) */
480 if (!no_close && SAME_STRAND(i,j)) {
482 energy = E_GQuad_IntLoop(i, j, type, S1, ggg, indx, P);
483 new_c = MIN2(new_c, energy);
487 new_c = MIN2(new_c, cc1[j-1]+stackEnergy);
488 cc[j] = new_c + bonus;
490 if (SAME_STRAND(i,i+1) && SAME_STRAND(j-1,j))
491 c[ij] = cc1[j-1]+stackEnergy+bonus;
492 else /* currently we don't allow stacking over the cut point */
498 } /* end >> if (pair) << */
503 /* done with c[i,j], now compute fML[i,j] */
504 /* free ends ? -----------------------------------------*/
506 if (SAME_STRAND(i-1,i)) {
507 if (SAME_STRAND(i,i+1)) new_fML = fML[ij+1]+P->MLbase;
508 if (SAME_STRAND(j-1,j)) new_fML = MIN2(fML[indx[j-1]+i]+P->MLbase, new_fML);
509 if (SAME_STRAND(j,j+1)) {
511 if(dangle_model == 2) energy += E_MLstem(type,(i>1) ? S1[i-1] : -1, (j<length) ? S1[j+1] : -1, P);
512 else energy += E_MLstem(type, -1, -1, P);
513 new_fML = MIN2(new_fML, energy);
516 if(SAME_STRAND(j-1,j)) fM1[ij] = MIN2(energy, fM1[indx[j-1]+i] + P->MLbase);
519 if (dangle_model%2==1) { /* normal dangles */
520 if (SAME_STRAND(i,i+1)) {
521 tt = ptype[ij+1]; /* i+1,j */
522 new_fML = MIN2(new_fML, c[ij+1] + P->MLbase + E_MLstem(tt, S1[i], -1, P));
524 if (SAME_STRAND(j-1,j)) {
525 tt = ptype[indx[j-1]+i]; /* i,j-1 */
526 new_fML = MIN2(new_fML, c[indx[j-1]+i] + P->MLbase + E_MLstem(tt, -1, S1[j], P));
528 if ((SAME_STRAND(j-1,j))&&(SAME_STRAND(i,i+1))) {
529 tt = ptype[indx[j-1]+i+1]; /* i+1,j-1 */
530 new_fML = MIN2(new_fML, c[indx[j-1]+i+1] + 2*P->MLbase + E_MLstem(tt, S1[i], S1[j], P));
536 if(SAME_STRAND(i, j))
537 new_fML = MIN2(new_fML, ggg[indx[j] + i] + E_MLstem(0, -1, -1, P));
540 /* modular decomposition -------------------------------*/
543 int stopp; /*loop 1 up to cut, then loop 2*/
544 stopp=(cut_point>0)? (cut_point):(j-2-TURN);
545 for (decomp=INF, k = i+1+TURN; k<stopp; k++)
546 decomp = MIN2(decomp, Fmi[k]+fML[indx[j]+k+1]);
548 for (;k <= j-2-TURN;k++)
549 decomp = MIN2(decomp, Fmi[k]+fML[indx[j]+k+1]);
551 DMLi[j] = decomp; /* store for use in ML decompositon */
552 new_fML = MIN2(new_fML,decomp);
554 /* coaxial stacking */
555 if (dangle_model==3) {
557 stopp=(cut_point>0)? (cut_point):(j-2-TURN);
558 /* additional ML decomposition as two coaxially stacked helices */
559 for (decomp = INF, k = i+1+TURN; k<stopp; k++) {
560 type = ptype[indx[k]+i]; type = rtype[type];
561 type_2 = ptype[indx[j]+k+1]; type_2 = rtype[type_2];
563 decomp = MIN2(decomp,
564 c[indx[k]+i]+c[indx[j]+k+1]+P->stack[type][type_2]);
567 for (;k <= j-2-TURN; k++) {
568 type = ptype[indx[k]+i]; type = rtype[type];
569 type_2 = ptype[indx[j]+k+1]; type_2 = rtype[type_2];
571 decomp = MIN2(decomp,
572 c[indx[k]+i]+c[indx[j]+k+1]+P->stack[type][type_2]);
575 decomp += 2*P->MLintern[1];
578 /* This is needed for Y shaped ML loops with coax stacking of
579 interior pairs, but backtracking will fail if activated */
580 DMLi[j] = MIN2(DMLi[j], decomp);
581 if (SAME_STRAND(j-1,j)) DMLi[j] = MIN2(DMLi[j], DMLi[j-1]+P->MLbase);
582 if (SAME_STRAND(i,i+1)) DMLi[j] = MIN2(DMLi[j], DMLi1[j]+P->MLbase);
583 new_fML = MIN2(new_fML, DMLi[j]);
585 new_fML = MIN2(new_fML, decomp);
588 fML[ij] = Fmi[j] = new_fML; /* substring energy */
593 for (j=i; j<=maxj; j++)
594 free_end(fc, j, cut_point);
596 free_end(fc,i,cut_point-1);
600 int *FF; /* rotate the auxilliary arrays */
601 FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
602 FF = cc1; cc1=cc; cc=FF;
603 for (j=1; j<=maxj; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
607 /* calculate energies of 5' and 3' fragments */
609 for (i=1; i<=length; i++)
613 mfe1=f5[cut_point-1];
615 /* add DuplexInit, check whether duplex*/
616 for (i=cut_point; i<=length; i++) {
617 f5[i]=MIN2(f5[i]+P->DuplexInit, fc[i]+fc[1]);
622 if (cut_point<1) mfe1=mfe2=energy;
626 PRIVATE void backtrack_co(const char *string, int s, int b /* b=0: start new structure, b \ne 0: add to existing structure */) {
628 /*------------------------------------------------------------------
629 trace back through the "c", "fc", "f5" and "fML" arrays to get the
630 base pairing list. No search for equivalent structures is done.
631 This is fast, since only few structure elements are recalculated.
632 ------------------------------------------------------------------*/
634 int i, j, k, length, energy, new;
635 int no_close, type, type_2, tt;
637 int dangle_model = P->model_details.dangles;
638 int noGUclosure = P->model_details.noGUclosure;
639 int noLP = P->model_details.noLP;
643 length = strlen(string);
646 sector[s].j = length;
647 sector[s].ml = (backtrack_type=='M') ? 1 : ((backtrack_type=='C')?2:0);
650 int ml, fij, fi, cij, traced, i1, j1, mm, p, q, jj=0, gq=0;
651 int canonical = 1; /* (i,j) closes a canonical structure */
654 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
655 occur in the fML- (1) or in the f-array (0) */
657 base_pair2[++b].i = i;
662 if (j < i+TURN+1) continue; /* no more pairs in this interval */
665 if (ml==0) {fij = f5[j]; fi = f5[j-1];}
666 else if (ml==1) {fij = fML[indx[j]+i]; fi = fML[indx[j-1]+i]+P->MLbase;}
669 fi = (ml==3) ? INF : fc[j-1];
671 if (fij == fi) { /* 3' end is unpaired */
678 if (ml==0 || ml==4) { /* backtrack in f5 or fc[i=cut,j>cut] */
680 ff = (ml==4) ? fc : f5;
681 switch(dangle_model){
682 case 0: /* j or j-1 is paired. Find pairing partner */
683 for (k=j-TURN-1,traced=0; k>=i; k--) {
687 if(fij == ff[k-1] + ggg[indx[j]+k]){
688 /* found the decomposition */
689 traced = j; jj = k - 1; gq = 1;
694 type = ptype[indx[j]+k];
697 if(!SAME_STRAND(k,j)) cc += P->DuplexInit;
698 if(fij == ff[k-1] + cc + E_ExtLoop(type, -1, -1, P)){
699 traced = j; jj = k-1;
707 case 2: /* j or j-1 is paired. Find pairing partner */
708 for (k=j-TURN-1,traced=0; k>=i; k--) {
712 if(fij == ff[k-1] + ggg[indx[j]+k]){
713 /* found the decomposition */
714 traced = j; jj = k - 1; gq = 1;
719 type = ptype[indx[j]+k];
722 if(!SAME_STRAND(k,j)) cc += P->DuplexInit;
723 if(fij == ff[k-1] + cc + E_ExtLoop(type, (k>1) && SAME_STRAND(k-1,k) ? S1[k-1] : -1, (j<length) && SAME_STRAND(j,j+1) ? S1[j+1] : -1, P)){
724 traced = j; jj = k-1;
731 default: for(k=j-TURN-1,traced=0; k>=i; k--){
733 type = ptype[indx[j]+k];
736 if(fij == ff[k-1] + ggg[indx[j]+k]){
737 /* found the decomposition */
738 traced = j; jj = k - 1; gq = 1;
745 if(!SAME_STRAND(k,j)) cc += P->DuplexInit;
746 if(fij == ff[k-1] + cc + E_ExtLoop(type, -1, -1, P)){
747 traced = j; jj = k-1; break;
749 if((k>1) && SAME_STRAND(k-1,k))
750 if(fij == ff[k-2] + cc + E_ExtLoop(type, S1[k-1], -1, P)){
751 traced=j; jj=k-2; break;
755 type = ptype[indx[j-1]+k];
756 if(type && SAME_STRAND(j-1,j)){
758 if (!SAME_STRAND(k,j-1)) cc += P->DuplexInit; /*???*/
759 if (fij == cc + ff[k-1] + E_ExtLoop(type, -1, S1[j], P)){
760 traced=j-1; jj = k-1; break;
763 if (fij == ff[k-2] + cc + E_ExtLoop(type, SAME_STRAND(k-1,k) ? S1[k-1] : -1, S1[j], P)){
764 traced=j-1; jj=k-2; break;
772 if (!traced) nrerror("backtrack failed in f5 (or fc)");
779 if(with_gquad && gq){
780 /* goto backtrace of gquadruplex */
784 base_pair2[++b].i = i;
788 else if (ml==3) { /* backtrack in fc[i<cut,j=cut-1] */
789 if (fc[i] == fc[i+1]) { /* 5' end is unpaired */
795 /* i or i+1 is paired. Find pairing partner */
796 switch(dangle_model){
797 case 0: for (k=i+TURN+1, traced=0; k<=j; k++){
799 type = ptype[indx[k]+i];
801 if(fc[i] == fc[k+1] + c[indx[k]+i] + E_ExtLoop(type, -1, -1, P)){
808 case 2: for (k=i+TURN+1, traced=0; k<=j; k++){
810 type = ptype[indx[k]+i];
812 if(fc[i] == fc[k+1] + c[indx[k]+i] + E_ExtLoop(type,(i>1 && SAME_STRAND(i-1,i)) ? S1[i-1] : -1, SAME_STRAND(k,k+1) ? S1[k+1] : -1, P)){
819 default: for(k=i+TURN+1, traced=0; k<=j; k++){
821 type = ptype[indx[k]+i];
823 if(fc[i] == fc[k+1] + c[indx[k]+i] + E_ExtLoop(type, -1, -1, P)){
826 else if(fc[i] == fc[k+2] + c[indx[k]+i] + E_ExtLoop(type, -1, SAME_STRAND(k,k+1) ? S1[k+1] : -1, P)){
827 traced = i; jj=k+2; break;
830 type = ptype[indx[k]+i+1];
832 if(fc[i] == fc[k+1] + c[indx[k]+i+1] + E_ExtLoop(type, SAME_STRAND(i, i+1) ? S1[i] : -1, -1, P)){
836 if(fc[i] == fc[k+2] + c[indx[k]+i+1] + E_ExtLoop(type, SAME_STRAND(i, i+1) ? S1[i] : -1, SAME_STRAND(k, k+1) ? S1[k+1] : -1, P)){
837 traced = i+1; jj=k+2; break;
845 if (!traced) nrerror("backtrack failed in fc[] 5' of cut");
852 base_pair2[++b].i = i;
857 else { /* true multi-loop backtrack in fML */
858 if (fML[indx[j]+i+1]+P->MLbase == fij) { /* 5' end is unpaired */
866 if(fij == ggg[indx[j]+i] + E_MLstem(0, -1, -1, P)){
867 /* go to backtracing of quadruplex */
872 tt = ptype[indx[j]+i];
874 switch(dangle_model){
875 case 0: if(fij == cij + E_MLstem(tt, -1, -1, P)){
876 base_pair2[++b].i = i;
881 case 2: if(fij == cij + E_MLstem(tt, (i>1) ? S1[i-1] : -1, (j<length) ? S1[j+1] : -1, P)){
882 base_pair2[++b].i = i;
887 default: if(fij == cij + E_MLstem(tt, -1, -1, P)){
888 base_pair2[++b].i = i;
892 tt = ptype[indx[j]+i+1];
893 if(fij == c[indx[j]+i+1] + P->MLbase + E_MLstem(tt, S1[i], -1, P)){
895 base_pair2[++b].i = i;
899 tt = ptype[indx[j-1]+i];
900 if(fij == c[indx[j-1]+i] + P->MLbase + E_MLstem(tt, -1, S1[j], P)){
902 base_pair2[++b].i = i;
906 tt = ptype[indx[j-1]+i+1];
907 if(fij == c[indx[j-1]+i+1] + 2*P->MLbase + E_MLstem(tt, S1[i], S1[j], P)){
909 base_pair2[++b].i = i;
916 /* find next component of multiloop */
917 for (k = i+1+TURN; k <= j-2-TURN; k++)
918 if (fij == (fML[indx[k]+i]+fML[indx[j]+k+1]))
921 if ((dangle_model==3)&&(k>j-2-TURN)) { /* must be coax stack */
923 for (k = i+1+TURN; k <= j-2-TURN; k++) {
924 type = ptype[indx[k]+i]; type= rtype[type];
925 type_2 = ptype[indx[j]+k+1]; type_2= rtype[type_2];
927 if (fij == c[indx[k]+i]+c[indx[j]+k+1]+P->stack[type][type_2]+
940 if (k>j-2-TURN) nrerror("backtrack failed in fML");
946 /*----- begin of "repeat:" -----*/
947 if (canonical) cij = c[indx[j]+i];
948 type = ptype[indx[j]+i];
952 if ((BP[i]==j)||(BP[i]==-1)||(BP[i]==-2)) bonus -= BONUS;
953 if ((BP[j]==-1)||(BP[j]==-3)) bonus -= BONUS;
956 if (cij == c[indx[j]+i]) {
957 /* (i.j) closes canonical structures, thus
958 (i+1.j-1) must be a pair */
959 type_2 = ptype[indx[j-1]+i+1]; type_2 = rtype[type_2];
960 cij -= P->stack[type][type_2] + bonus;
961 base_pair2[++b].i = i+1;
962 base_pair2[b].j = j-1;
970 no_close = (((type==3)||(type==4))&&noGUclosure&&(bonus==0));
971 if (SAME_STRAND(i,j)) {
973 if (cij == FORBIDDEN) continue;
975 if (cij == E_Hairpin(j-i-1, type, S1[i+1], S1[j-1],string+i-1, P)+bonus)
980 if(cij == E_ExtLoop(rtype[type], SAME_STRAND(j-1,j) ? S1[j-1] : -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, P)) continue;
982 else if(cij == E_ExtLoop(rtype[type], -1, -1, P)) continue;
985 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
987 minq = j-i+p-MAXLOOP-2;
988 if (minq<p+1+TURN) minq = p+1+TURN;
989 for (q = j-1; q >= minq; q--) {
991 type_2 = ptype[indx[q]+p];
992 if (type_2==0) continue;
993 type_2 = rtype[type_2];
995 if (no_close||(type_2==3)||(type_2==4))
996 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
998 /* energy = oldLoopEnergy(i, j, p, q, type, type_2); */
999 if (SAME_STRAND(i,p) && SAME_STRAND(q,j))
1000 energy = E_IntLoop(p-i-1, j-q-1, type, type_2,
1001 S1[i+1], S1[j-1], S1[p-1], S1[q+1], P);
1003 energy = E_IntLoop_Co(rtype[type], rtype[type_2], i, j, p, q, cut_point, S1[i+1], S1[j-1], S1[p-1], S1[q+1], dangle_model, P);
1006 new = energy+c[indx[q]+p]+bonus;
1007 traced = (cij == new);
1009 base_pair2[++b].i = p;
1010 base_pair2[b].j = q;
1017 /* end of repeat: --------------------------------------------------*/
1019 /* (i.j) must close a fake or true multi-loop */
1026 The case that is handled here actually resembles something like
1027 an interior loop where the enclosing base pair is of regular
1028 kind and the enclosed pair is not a canonical one but a g-quadruplex
1029 that should then be decomposed further...
1031 if(SAME_STRAND(i,j)){
1032 if(backtrack_GQuad_IntLoop(cij, i, j, type, S, ggg, indx, &p, &q, P)){
1039 /* fake multi-loop */
1040 if(!SAME_STRAND(i,j)){
1043 decomp = fc[i1] + fc[j1];
1044 switch(dangle_model){
1045 case 0: if(cij == decomp + E_ExtLoop(tt, -1, -1, P)){
1049 case 2: if(cij == decomp + E_ExtLoop(tt, SAME_STRAND(j-1,j) ? S1[j-1] : -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, P)){
1054 default: if(cij == decomp + E_ExtLoop(tt, -1, -1, P)){
1057 else if(cij == fc[i+2] + fc[j-1] + E_ExtLoop(tt, -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, P)){
1060 else if(cij == fc[i+1] + fc[j-2] + E_ExtLoop(tt, SAME_STRAND(j-1,j) ? S1[j-1] : -1, -1, P)){
1063 else if(cij == fc[i+2] + fc[j-2] + E_ExtLoop(tt, SAME_STRAND(j-1,j) ? S1[j-1] : -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, P)){
1070 sector[s].j = cut_point-1;
1072 sector[++s].i = cut_point;
1079 /* true multi-loop */
1080 mm = bonus + P->MLclosing;
1081 sector[s+1].ml = sector[s+2].ml = 1;
1082 int ml0 = E_MLstem(tt, -1, -1, P);
1083 int ml5 = E_MLstem(tt, SAME_STRAND(j-1,j) ? S1[j-1] : -1, -1, P);
1084 int ml3 = E_MLstem(tt, -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, P);
1085 int ml53 = E_MLstem(tt, SAME_STRAND(j-1,j) ? S1[j-1] : -1, SAME_STRAND(i,i+1) ? S1[i+1] : -1, P);
1086 for (traced = 0, k = i+2+TURN; k < j-2-TURN; k++) {
1087 switch(dangle_model){
1088 case 0: /* no dangles */
1089 if(cij == mm + fML[indx[k]+i+1] + fML[indx[j-1]+k+1] + ml0)
1092 case 2: /*double dangles */
1093 if(cij == mm + fML[indx[k]+i+1] + fML[indx[j-1]+k+1] + ml53)
1096 default: /* normal dangles */
1097 if(cij == mm + fML[indx[k]+i+1] + fML[indx[j-1]+k+1] + ml0){
1101 else if (cij == fML[indx[k]+i+2] + fML[indx[j-1]+k+1] + ml3 + mm + P->MLbase){
1105 else if (cij == fML[indx[k]+i+1] + fML[indx[j-2]+k+1] + ml5 + mm + P->MLbase){
1106 traced = i1 = i+1; j1 = j-2;
1109 else if (cij == fML[indx[k]+i+2] + fML[indx[j-2]+k+1] + ml53 + mm + 2*P->MLbase){
1110 traced = i1 = i+2; j1 = j-2;
1116 /* coaxial stacking of (i.j) with (i+1.k) or (k.j-1) */
1117 /* use MLintern[1] since coax stacked pairs don't get TerminalAU */
1118 if (dangle_model==3) {
1120 type_2 = ptype[indx[k]+i+1]; type_2 = rtype[type_2];
1122 en = c[indx[k]+i+1]+P->stack[type][type_2]+fML[indx[j-1]+k+1];
1123 if (cij == en+2*P->MLintern[1]+P->MLclosing) {
1129 type_2 = ptype[indx[j-1]+k+1]; type_2 = rtype[type_2];
1131 en = c[indx[j-1]+k+1]+P->stack[type][type_2]+fML[indx[k]+i+1];
1132 if (cij == en+2*P->MLintern[1]+P->MLclosing) {
1139 if (k<=j-3-TURN) { /* found the decomposition */
1142 sector[++s].i = k+1;
1146 /* Y shaped ML loops don't work yet */
1147 if (dangle_model==3) {
1148 /* (i,j) must close a Y shaped ML loop with coax stacking */
1149 if (cij == fML[indx[j-2]+i+2] + mm + d3 + d5 + P->MLbase + P->MLbase) {
1152 } else if (cij == fML[indx[j-2]+i+1] + mm + d5 + P->MLbase)
1154 else if (cij == fML[indx[j-1]+i+2] + mm + d3 + P->MLbase)
1156 else /* last chance */
1157 if (cij != fML[indx[j-1]+i+1] + mm + P->MLbase)
1158 fprintf(stderr, "backtracking failed in repeat");
1159 /* if we arrive here we can express cij via fML[i1,j1]+dangles */
1165 nrerror("backtracking failed in repeat");
1168 continue; /* this is a workarround to not accidentally proceed in the following block */
1172 now we do some fancy stuff to backtrace the stacksize and linker lengths
1173 of the g-quadruplex that should reside within position i,j
1179 get_gquad_pattern_mfe(S, i, j, P, &L, l);
1181 /* fill the G's of the quadruplex into base_pair2 */
1183 base_pair2[++b].i = i+a;
1184 base_pair2[b].j = i+a;
1185 base_pair2[++b].i = i+L+l[0]+a;
1186 base_pair2[b].j = i+L+l[0]+a;
1187 base_pair2[++b].i = i+L+l[0]+L+l[1]+a;
1188 base_pair2[b].j = i+L+l[0]+L+l[1]+a;
1189 base_pair2[++b].i = i+L+l[0]+L+l[1]+L+l[2]+a;
1190 base_pair2[b].j = i+L+l[0]+L+l[1]+L+l[2]+a;
1192 goto repeat_gquad_exit;
1194 nrerror("backtracking failed in repeat_gquad");
1199 } /* end >> while (s>0) << */
1201 base_pair2[0].i = b; /* save the total number of base pairs */
1204 PRIVATE void free_end(int *array, int i, int start) {
1205 int inc, type, energy, length, j, left, right;
1206 int dangle_model = P->model_details.dangles;
1208 inc = (i>start)? 1:-1;
1211 if (i==start) array[i]=0;
1212 else array[i] = array[i-inc];
1214 left = start; right=i;
1216 left = i; right = start;
1219 for (j=start; inc*(i-j)>TURN; j+=inc) {
1222 if (i>j) { ii = j; jj = i;} /* inc>0 */
1223 else { ii = i; jj = j;} /* inc<0 */
1224 type = ptype[indx[jj]+ii];
1225 if (type) { /* i is paired with j */
1226 si = (ii>1) && SAME_STRAND(ii-1,ii) ? S1[ii-1] : -1;
1227 sj = (jj<length) && SAME_STRAND(jj,jj+1) ? S1[jj+1] : -1;
1228 energy = c[indx[jj]+ii];
1229 switch(dangle_model){
1231 array[i] = MIN2(array[i], array[j-inc] + energy + E_ExtLoop(type, -1, -1, P));
1234 array[i] = MIN2(array[i], array[j-inc] + energy + E_ExtLoop(type, si, sj, P));
1237 array[i] = MIN2(array[i], array[j-inc] + energy + E_ExtLoop(type, -1, -1, P));
1240 array[i] = MIN2(array[i], array[j-2] + energy + E_ExtLoop(type, si, -1, P));
1243 array[i] = MIN2(array[i], array[j+2] + energy + E_ExtLoop(type, -1, sj, P));
1249 if(SAME_STRAND(ii, jj))
1250 array[i] = MIN2(array[i], array[j-inc] + ggg[indx[jj]+ii]);
1253 if (dangle_model%2==1) {
1254 /* interval ends in a dangle (i.e. i-inc is paired) */
1255 if (i>j) { ii = j; jj = i-1;} /* inc>0 */
1256 else { ii = i+1; jj = j;} /* inc<0 */
1257 type = ptype[indx[jj]+ii];
1258 if (!type) continue;
1260 si = (ii > left) && SAME_STRAND(ii-1,ii) ? S1[ii-1] : -1;
1261 sj = (jj < right) && SAME_STRAND(jj,jj+1) ? S1[jj+1] : -1;
1262 energy = c[indx[jj]+ii];
1264 array[i] = MIN2(array[i], array[j - inc] + energy + E_ExtLoop(type, -1, sj, P));
1266 array[i] = MIN2(array[i], array[j - inc] + energy + E_ExtLoop(type, si, -1, P));
1267 if(j!= start){ /* dangle_model on both sides */
1268 array[i] = MIN2(array[i], array[j-2*inc] + energy + E_ExtLoop(type, si, sj, P));
1274 PUBLIC void update_cofold_params(void){
1275 update_cofold_params_par(NULL);
1278 PUBLIC void update_cofold_params_par(paramT *parameters){
1282 P = get_parameter_copy(parameters);
1285 set_model_details(&md);
1286 P = get_scaled_parameters(temperature, md);
1289 if (init_length < 0) init_length=0;
1292 /*---------------------------------------------------------------------------*/
1294 PRIVATE void make_ptypes(const short *S, const char *structure) {
1296 int noLP = P->model_details.noLP;
1299 for (k=1; k<n-TURN; k++)
1300 for (l=1; l<=2; l++) {
1301 int type,ntype=0,otype=0;
1302 i=k; j = i+TURN+l; if (j>n) continue;
1303 type = pair[S[i]][S[j]];
1304 while ((i>=1)&&(j<=n)) {
1305 if ((i>1)&&(j<n)) ntype = pair[S[i-1]][S[j+1]];
1306 if (noLP && (!otype) && (!ntype))
1307 type = 0; /* i.j can only form isolated pairs */
1308 ptype[indx[j]+i] = (char) type;
1315 if (struct_constrained && (structure != NULL))
1316 constrain_ptypes(structure, (unsigned int)n, ptype, BP, TURN, 0);
1319 PUBLIC void get_monomere_mfes(float *e1, float *e2) {
1320 /*exports monomere free energies*/
1325 PRIVATE void backtrack(const char *sequence) {
1326 /*routine to call backtrack_co from 1 to n, backtrack type??*/
1327 backtrack_co(sequence, 0,0);
1330 PRIVATE int comp_pair(const void *A, const void *B) {
1335 ex = c[indx[x->j]+x->i]+c[indx[x->i+length]+x->j];
1336 ey = c[indx[y->j]+y->i]+c[indx[y->i+length]+y->j];
1337 if (ex>ey) return 1;
1338 if (ex<ey) return -1;
1339 return (indx[x->j]+x->i - indx[y->j]+y->i);
1342 PUBLIC SOLUTION *zukersubopt(const char *string) {
1343 return zukersubopt_par(string, NULL);
1346 PUBLIC SOLUTION *zukersubopt_par(const char *string, paramT *parameters){
1348 /* Compute zuker suboptimal. Here, we're abusing the cofold() code
1349 "double" sequence, compute dimerarray entries, track back every base pair.
1350 This is slightly wasteful compared to the normal solution */
1352 char *doubleseq, *structure, *mfestructure, **todo;
1353 int i, j, counter, num_pairs, psize, p;
1355 SOLUTION *zukresults;
1358 num_pairs = counter = 0;
1360 length = (int)strlen(string);
1361 doubleseq = (char *)space((2*length+1)*sizeof(char));
1362 mfestructure = (char *) space((unsigned) 2*length+1);
1363 structure = (char *) space((unsigned) 2*length+1);
1364 zukresults = (SOLUTION *)space(((length*(length-1))/2)*sizeof(SOLUTION));
1365 mfestructure[0] = '\0';
1366 BP = (int *)space(sizeof(int)*(2*length+2));
1368 /* double the sequence */
1369 strcpy(doubleseq,string);
1370 strcat(doubleseq,string);
1371 cut_point = length + 1;
1373 /* get mfe and do forward recursion */
1375 /* always init everything since all global static variables are uninitialized when entering a thread */
1376 init_cofold(2 * length, parameters);
1378 if(parameters) init_cofold(2 * length, parameters);
1379 else if ((2 * length) > init_length) init_cofold(2 * length, parameters);
1380 else if (fabs(P->temperature - temperature)>1e-6) update_cofold_params_par(parameters);
1384 S = encode_sequence(doubleseq, 0);
1385 S1 = encode_sequence(doubleseq, 1);
1386 S1[0] = S[0]; /* store length at pos. 0 */
1387 make_ptypes(S, NULL); /* no constraint folding possible (yet?) with zukersubopt */
1389 (void)fill_arrays(doubleseq);
1392 pairlist = (bondT *) space(sizeof(bondT)*(psize+1));
1393 todo = (char **) space(sizeof(char *)*(length+1));
1394 for (i=1; i<length; i++) {
1395 todo[i] = (char *) space(sizeof(char)*(length+1));
1398 /* Make a list of all base pairs */
1399 for (i=1; i<length; i++) {
1400 for (j=i+TURN2+1/*??*/; j<=length; j++) {
1401 if (ptype[indx[j]+i]==0) continue;
1402 if (num_pairs>=psize) {
1403 psize = 1.2*psize + 32;
1404 pairlist = xrealloc(pairlist, sizeof(bondT)*(psize+1));
1406 pairlist[num_pairs].i = i;
1407 pairlist[num_pairs++].j = j;
1411 qsort(pairlist, num_pairs, sizeof(bondT), comp_pair);
1413 for (p=0; p<num_pairs; p++) {
1421 backtrack_co(doubleseq, 1,0);
1423 sector[1].j = i + length;
1425 backtrack_co(doubleseq, 1,base_pair2[0].i);
1426 energy = c[indx[j]+i]+c[indx[i+length]+j];
1427 parenthesis_zuker(structure, base_pair2, length);
1428 zukresults[counter].energy = energy;
1429 zukresults[counter++].structure = strdup(structure);
1430 for (k = 1; k <= base_pair2[0].i; k++) { /* mark all pairs in structure as done */
1434 if (x>length) x-=length;
1435 if (y>length) y-=length;
1438 temp=x; x=y; y=temp;
1446 for (i=1; i<length; i++)
1453 free(S); free(S1); free(BP);
1458 /*###########################################*/
1459 /*# deprecated functions below #*/
1460 /*###########################################*/
1462 PUBLIC void initialize_cofold(int length){ /* DO NOTHING */ }