5 RNA secondary structure prediction
7 c Ivo Hofacker, Chrisoph Flamm
8 original implementation by
10 g-quadruplex support and threadsafety
25 #include "energy_par.h"
26 #include "fold_vars.h"
29 #include "loop_energies.h"
30 #include "data_structures.h"
40 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
41 #define NEW_NINIO 1 /* new asymetry penalty */
42 #define MAXSECTORS 500 /* dimension for a backtrack array */
43 #define LOCALITY 0. /* locality parameter for base-pairs */
45 #define SAME_STRAND(I,J) (((I)>=cut_point)||((J)<cut_point))
48 #################################
50 #################################
52 PUBLIC int logML = 0; /* if nonzero use logarithmic ML energy in energy_of_struct */
53 PUBLIC int uniq_ML = 0; /* do ML decomposition uniquely (for subopt) */
54 PUBLIC int cut_point = -1; /* set to first pos of second seq for cofolding */
55 PUBLIC int eos_debug = 0; /* verbose info from energy_of_struct */
58 #################################
60 #################################
62 PRIVATE int *indx = NULL; /* index for moving in the triangle matrices c[] and fMl[]*/
63 PRIVATE int *c = NULL; /* energy array, given that i-j pair */
64 PRIVATE int *cc = NULL; /* linear array for calculating canonical structures */
65 PRIVATE int *cc1 = NULL; /* " " */
66 PRIVATE int *f5 = NULL; /* energy of 5' end */
67 PRIVATE int *f53 = NULL; /* energy of 5' end with 3' nucleotide not available for mismatches */
68 PRIVATE int *fML = NULL; /* multi-loop auxiliary energy array */
69 PRIVATE int *fM1 = NULL; /* second ML array, only for subopt */
70 PRIVATE int *fM2 = NULL; /* fM2 = multiloop region with exactly two stems, extending to 3' end */
71 PRIVATE int *Fmi = NULL; /* holds row i of fML (avoids jumps in memory) */
72 PRIVATE int *DMLi = NULL; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
73 PRIVATE int *DMLi1 = NULL; /* MIN(fML[i+1,k]+fML[k+1,j]) */
74 PRIVATE int *DMLi2 = NULL; /* MIN(fML[i+2,k]+fML[k+1,j]) */
75 PRIVATE int *DMLi_a = NULL; /* DMLi_a[j] holds min energy for at least two multiloop stems in [i,j], where j is available for dangling onto a surrounding stem */
76 PRIVATE int *DMLi_o = NULL; /* DMLi_o[j] holds min energy for at least two multiloop stems in [i,j], where j is unavailable for dangling onto a surrounding stem */
77 PRIVATE int *DMLi1_a = NULL;
78 PRIVATE int *DMLi1_o = NULL;
79 PRIVATE int *DMLi2_a = NULL;
80 PRIVATE int *DMLi2_o = NULL;
81 PRIVATE int Fc, FcH, FcI, FcM; /* parts of the exterior loop energies */
82 PRIVATE sect sector[MAXSECTORS]; /* stack of partial structures for backtracking */
83 PRIVATE char *ptype = NULL; /* precomputed array of pair types */
84 PRIVATE short *S = NULL, *S1 = NULL;
85 PRIVATE paramT *P = NULL;
86 PRIVATE int init_length = -1;
87 PRIVATE int *BP = NULL; /* contains the structure constrainsts: BP[i]
88 -1: | = base must be paired
89 -2: < = base must be paired with j<i
90 -3: > = base must be paired with j>i
91 -4: x = base must not pair
92 positive int: base is paired with int */
93 PRIVATE short *pair_table = NULL; /* needed by energy of struct */
94 PRIVATE bondT *base_pair2 = NULL; /* this replaces base_pair from fold_vars.c */
95 PRIVATE int circular = 0;
96 PRIVATE int struct_constrained = 0;
97 PRIVATE int with_gquad = 0;
99 PRIVATE int *ggg = NULL; /* minimum free energies of the gquadruplexes */
103 #pragma omp threadprivate(indx, c, cc, cc1, f5, f53, fML, fM1, fM2, Fmi,\
104 DMLi, DMLi1, DMLi2, DMLi_a, DMLi_o, DMLi1_a, DMLi1_o, DMLi2_a, DMLi2_o,\
106 sector, ptype, S, S1, P, init_length, BP, pair_table, base_pair2, circular, struct_constrained,\
112 #################################
113 # PRIVATE FUNCTION DECLARATIONS #
114 #################################
116 PRIVATE void get_arrays(unsigned int size);
117 PRIVATE int stack_energy(int i, const char *string, int verbostiy_level);
118 PRIVATE int energy_of_extLoop_pt(int i, short *pair_table);
119 PRIVATE int energy_of_ml_pt(int i, short *pt);
120 PRIVATE int ML_Energy(int i, int is_extloop);
121 PRIVATE void make_ptypes(const short *S, const char *structure);
122 PRIVATE void backtrack(const char *sequence, int s);
123 PRIVATE int fill_arrays(const char *sequence);
124 PRIVATE void fill_arrays_circ(const char *string, int *bt);
125 PRIVATE void init_fold(int length, paramT *parameters);
126 /* needed by cofold/eval */
127 PRIVATE int cut_in_loop(int i);
129 /* deprecated functions */
131 int oldLoopEnergy(int i, int j, int p, int q, int type, int type_2);
132 int LoopEnergy(int n1, int n2, int type, int type_2, int si1, int sj1, int sp1, int sq1);
133 int HairpinE(int size, int type, int si1, int sj1, const char *string);
137 #################################
138 # BEGIN OF FUNCTION DEFINITIONS #
139 #################################
142 /* allocate memory for folding process */
143 PRIVATE void init_fold(int length, paramT *parameters){
146 /* Explicitly turn off dynamic threads */
150 if (length<1) nrerror("initialize_fold: argument must be greater 0");
152 get_arrays((unsigned) length);
155 indx = get_indx((unsigned)length);
157 update_fold_params_par(parameters);
160 /*--------------------------------------------------------------------------*/
162 PRIVATE void get_arrays(unsigned int size){
163 if(size >= (unsigned int)sqrt((double)INT_MAX))
164 nrerror("get_arrays@fold.c: sequence length exceeds addressable range");
166 c = (int *) space(sizeof(int)*((size*(size+1))/2+2));
167 fML = (int *) space(sizeof(int)*((size*(size+1))/2+2));
169 fM1 = (int *) space(sizeof(int)*((size*(size+1))/2+2));
171 ptype = (char *)space(sizeof(char)*((size*(size+1))/2+2));
172 f5 = (int *) space(sizeof(int)*(size+2));
173 f53 = (int *) space(sizeof(int)*(size+2));
174 cc = (int *) space(sizeof(int)*(size+2));
175 cc1 = (int *) space(sizeof(int)*(size+2));
176 Fmi = (int *) space(sizeof(int)*(size+1));
177 DMLi = (int *) space(sizeof(int)*(size+1));
178 DMLi1 = (int *) space(sizeof(int)*(size+1));
179 DMLi2 = (int *) space(sizeof(int)*(size+1));
181 DMLi_a = (int *) space(sizeof(int)*(size+1));
182 DMLi_o = (int *) space(sizeof(int)*(size+1));
183 DMLi1_a = (int *) space(sizeof(int)*(size+1));
184 DMLi1_o = (int *) space(sizeof(int)*(size+1));
185 DMLi2_a = (int *) space(sizeof(int)*(size+1));
186 DMLi2_o = (int *) space(sizeof(int)*(size+1));
188 base_pair2 = (bondT *) space(sizeof(bondT)*(1+size/2+200)); /* add a guess of how many G's may be involved in a G quadruplex */
190 /* extra array(s) for circfold() */
191 if(circular) fM2 = (int *) space(sizeof(int)*(size+2));
194 /*--------------------------------------------------------------------------*/
196 PUBLIC void free_arrays(void){
204 if(ptype) free(ptype);
207 if(base_pair2) free(base_pair2);
210 if(DMLi1) free(DMLi1);
211 if(DMLi2) free(DMLi2);
212 if(DMLi_a) free(DMLi_a);
213 if(DMLi_o) free(DMLi_o);
214 if(DMLi1_a) free(DMLi1_a);
215 if(DMLi1_o) free(DMLi1_o);
216 if(DMLi2_a) free(DMLi2_a);
217 if(DMLi2_o) free(DMLi2_o);
221 indx = c = fML = f5 = f53 = cc = cc1 = fM1 = fM2 = Fmi = DMLi = DMLi1 = DMLi2 = ggg = NULL;
222 DMLi_a = DMLi_o = DMLi1_a = DMLi1_o = DMLi2_a = DMLi2_o = NULL;
230 /*--------------------------------------------------------------------------*/
232 PUBLIC void export_fold_arrays( int **f5_p,
238 /* make the DP arrays available to routines such as subopt() */
247 PUBLIC void export_fold_arrays_par( int **f5_p,
254 export_fold_arrays(f5_p, c_p, fML_p, fM1_p, indx_p,ptype_p);
258 PUBLIC void export_circfold_arrays( int *Fc_p,
269 /* make the DP arrays available to routines such as subopt() */
283 PUBLIC void export_circfold_arrays_par( int *Fc_p,
295 export_circfold_arrays(Fc_p, FcH_p, FcI_p, FcM_p, fM2_p, f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
298 /*--------------------------------------------------------------------------*/
301 PUBLIC float fold(const char *string, char *structure){
302 return fold_par(string, structure, NULL, fold_constrained, 0);
305 PUBLIC float circfold(const char *string, char *structure){
306 return fold_par(string, structure, NULL, fold_constrained, 1);
309 PUBLIC float fold_par(const char *string,
315 int i, length, energy, bonus, bonus_cnt, s;
320 circular = is_circular;
321 struct_constrained = is_constrained;
322 length = (int) strlen(string);
325 init_fold(length, parameters);
327 if (parameters) init_fold(length, parameters);
328 else if (length>init_length) init_fold(length, parameters);
329 else if (fabs(P->temperature - temperature)>1e-6) update_fold_params();
332 with_gquad = P->model_details.gquad;
333 S = encode_sequence(string, 0);
334 S1 = encode_sequence(string, 1);
335 BP = (int *)space(sizeof(int)*(length+2));
337 make_ptypes(S, structure);
339 energy = fill_arrays(string);
342 fill_arrays_circ(string, &s);
345 backtrack(string, s);
348 parenthesis_structure(structure, base_pair2, length);
350 letter_structure(structure, base_pair2, length);
354 * Backward compatibility:
355 * This block may be removed if deprecated functions
356 * relying on the global variable "base_pair" vanishs from within the package!
358 base_pair = base_pair2;
361 if(base_pair) free(base_pair);
362 base_pair = (bondT *)space(sizeof(bondT) * (1+length/2));
363 memcpy(base_pair, base_pair2, sizeof(bondT) * (1+length/2));
367 /* check constraints */
368 for(i=1;i<=length;i++) {
369 if((BP[i]<0)&&(BP[i]>-4)) {
371 if((BP[i]==-3)&&(structure[i-1]==')')) bonus++;
372 if((BP[i]==-2)&&(structure[i-1]=='(')) bonus++;
373 if((BP[i]==-1)&&(structure[i-1]!='.')) bonus++;
379 for(l=1; l<=base_pair2[0].i; l++)
380 if(base_pair2[l].i != base_pair2[l].j)
381 if((i==base_pair2[l].i)&&(BP[i]==base_pair2[l].j)) bonus++;
385 if (bonus_cnt>bonus) fprintf(stderr,"\ncould not enforce all constraints\n");
388 free(S); free(S1); free(BP);
390 energy += bonus; /*remove bonus energies from result */
392 if (backtrack_type=='C')
393 return (float) c[indx[length]+1]/100.;
394 else if (backtrack_type=='M')
395 return (float) fML[indx[length]+1]/100.;
397 return (float) energy/100.;
401 *** fill "c", "fML" and "f5" arrays and return optimal energy
403 PRIVATE int fill_arrays(const char *string) {
405 int i, j, k, length, energy, en, mm5, mm3;
406 int decomp, new_fML, max_separation;
407 int no_close, type, type_2, tt;
410 int dangle_model, noGUclosure, with_gquads;
412 dangle_model = P->model_details.dangles;
413 noGUclosure = P->model_details.noGUclosure;
415 length = (int) strlen(string);
417 max_separation = (int) ((1.-LOCALITY)*(double)(length-2)); /* not in use */
420 ggg = get_gquad_matrix(S, P);
423 for (j=1; j<=length; j++) {
424 Fmi[j]=DMLi[j]=DMLi1[j]=DMLi2[j]=INF;
427 for (j = 1; j<=length; j++)
428 for (i=(j>TURN?(j-TURN):1); i<j; i++) {
429 c[indx[j]+i] = fML[indx[j]+i] = INF;
430 if (uniq_ML) fM1[indx[j]+i] = INF;
433 if (length <= TURN) return 0;
435 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
437 for (j = i+TURN+1; j <= length; j++) {
438 int p, q, ij, jj, ee;
439 int minq, maxq, l1, up, c0, c1, c2, c3;
445 /* enforcing structure constraints */
446 if ((BP[i]==j)||(BP[i]==-1)||(BP[i]==-2)) bonus -= BONUS;
447 if ((BP[j]==-1)||(BP[j]==-3)) bonus -= BONUS;
448 if ((BP[i]==-4)||(BP[j]==-4)) type=0;
450 no_close = (((type==3)||(type==4))&&noGUclosure&&(bonus==0));
452 if (j-i-1 > max_separation) type = 0; /* forces locality degree */
454 if (type) { /* we have a pair */
455 int new_c=0, stackEnergy=INF;
456 /* hairpin ----------------------------------------------*/
458 new_c = (no_close) ? FORBIDDEN : E_Hairpin(j-i-1, type, S1[i+1], S1[j-1], string+i-1, P);
460 /*--------------------------------------------------------
461 check for elementary structures involving more than one
463 --------------------------------------------------------*/
465 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++) {
466 minq = j-i+p-MAXLOOP-2;
467 if (minq<p+1+TURN) minq = p+1+TURN;
468 for (q = minq; q < j; q++) {
469 type_2 = ptype[indx[q]+p];
471 if (type_2==0) continue;
472 type_2 = rtype[type_2];
475 if (no_close||(type_2==3)||(type_2==4))
476 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
478 energy = E_IntLoop(p-i-1, j-q-1, type, type_2,
479 S1[i+1], S1[j-1], S1[p-1], S1[q+1], P);
481 ee = energy+c[indx[q]+p];
482 new_c = MIN2(new_c, ee);
483 if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
487 /* multi-loop decomposition ------------------------*/
493 switch(dangle_model){
495 case 0: decomp += E_MLstem(tt, -1, -1, P);
499 case 2: decomp += E_MLstem(tt, S1[j-1], S1[i+1], P);
502 /* normal dangles, aka dangles = 1 || 3 */
503 default: decomp += E_MLstem(tt, -1, -1, P);
504 decomp = MIN2(decomp, DMLi2[j-1] + E_MLstem(tt, -1, S1[i+1], P) + P->MLbase);
505 decomp = MIN2(decomp, DMLi2[j-2] + E_MLstem(tt, S1[j-1], S1[i+1], P) + 2*P->MLbase);
506 decomp = MIN2(decomp, DMLi1[j-2] + E_MLstem(tt, S1[j-1], -1, P) + P->MLbase);
509 MLenergy = decomp + P->MLclosing;
510 new_c = MIN2(new_c, MLenergy);
513 /* coaxial stacking of (i.j) with (i+1.k) or (k+1.j-1) */
515 if (dangle_model==3) {
517 for (k = i+2+TURN; k < j-2-TURN; k++) {
518 type_2 = rtype[ptype[indx[k]+i+1]];
520 decomp = MIN2(decomp, c[indx[k]+i+1]+P->stack[type][type_2]+fML[indx[j-1]+k+1]);
521 type_2 = rtype[ptype[indx[j-1]+k+1]];
523 decomp = MIN2(decomp, c[indx[j-1]+k+1]+P->stack[type][type_2]+fML[indx[k]+i+1]);
525 /* no TermAU penalty if coax stack */
526 decomp += 2*P->MLintern[1] + P->MLclosing;
527 new_c = MIN2(new_c, decomp);
531 /* include all cases where a g-quadruplex may be enclosed by base pair (i,j) */
534 energy = E_GQuad_IntLoop(i, j, type, S1, ggg, indx, P);
535 new_c = MIN2(new_c, energy);
539 new_c = MIN2(new_c, cc1[j-1]+stackEnergy);
540 cc[j] = new_c + bonus;
542 c[ij] = cc1[j-1]+stackEnergy+bonus;
546 } /* end >> if (pair) << */
550 /* done with c[i,j], now compute fML[i,j] and fM1[i,j] */
552 /* (i,j) + MLstem ? */
556 switch(dangle_model){
557 case 2: new_fML += E_MLstem(type, (i==1) ? S1[length] : S1[i-1], S1[j+1], P);
559 default: new_fML += E_MLstem(type, -1, -1, P);
565 new_fML = MIN2(new_fML, ggg[indx[j] + i] + E_MLstem(0, -1, -1, P));
569 fM1[ij] = MIN2(fM1[indx[j-1]+i] + P->MLbase, new_fML);
572 /* free ends ? -----------------------------------------*/
573 /* we must not just extend 3'/5' end by unpaired nucleotides if
574 * dangle_model == 1, this could lead to d5+d3 contributions were
575 * mismatch must be taken!
577 switch(dangle_model){
579 case 0: new_fML = MIN2(new_fML, fML[ij+1]+P->MLbase);
580 new_fML = MIN2(fML[indx[j-1]+i]+P->MLbase, new_fML);
584 case 2: new_fML = MIN2(new_fML, fML[ij+1]+P->MLbase);
585 new_fML = MIN2(fML[indx[j-1]+i]+P->MLbase, new_fML);
588 /* normal dangles, aka dangle_model = 1 || 3 */
589 default: mm5 = ((i>1) || circular) ? S1[i] : -1;
590 mm3 = ((j<length) || circular) ? S1[j] : -1;
591 new_fML = MIN2(new_fML, fML[ij+1] + P->MLbase);
592 new_fML = MIN2(new_fML, fML[indx[j-1]+i] + P->MLbase);
594 if(tt) new_fML = MIN2(new_fML, c[ij+1] + E_MLstem(tt, mm5, -1, P) + P->MLbase);
595 tt = ptype[indx[j-1]+i];
596 if(tt) new_fML = MIN2(new_fML, c[indx[j-1]+i] + E_MLstem(tt, -1, mm3, P) + P->MLbase);
597 tt = ptype[indx[j-1]+i+1];
598 if(tt) new_fML = MIN2(new_fML, c[indx[j-1]+i+1] + E_MLstem(tt, mm5, mm3, P) + 2*P->MLbase);
602 /* modular decomposition -------------------------------*/
603 for (decomp = INF, k = i + 1 + TURN; k <= j - 2 - TURN; k++)
604 decomp = MIN2(decomp, Fmi[k]+fML[indx[j]+k+1]);
605 DMLi[j] = decomp; /* store for use in ML decompositon */
606 new_fML = MIN2(new_fML,decomp);
608 /* coaxial stacking */
609 if (dangle_model==3) {
610 /* additional ML decomposition as two coaxially stacked helices */
611 for (decomp = INF, k = i+1+TURN; k <= j-2-TURN; k++) {
612 type = ptype[indx[k]+i]; type = rtype[type];
613 type_2 = ptype[indx[j]+k+1]; type_2 = rtype[type_2];
615 decomp = MIN2(decomp,
616 c[indx[k]+i]+c[indx[j]+k+1]+P->stack[type][type_2]);
619 decomp += 2*P->MLintern[1]; /* no TermAU penalty if coax stack */
621 /* This is needed for Y shaped ML loops with coax stacking of
622 interior pairts, but backtracking will fail if activated */
623 DMLi[j] = MIN2(DMLi[j], decomp);
624 DMLi[j] = MIN2(DMLi[j], DMLi[j-1]+P->MLbase);
625 DMLi[j] = MIN2(DMLi[j], DMLi1[j]+P->MLbase);
626 new_fML = MIN2(new_fML, DMLi[j]);
628 new_fML = MIN2(new_fML, decomp);
630 fML[ij] = Fmi[j] = new_fML; /* substring energy */
635 int *FF; /* rotate the auxilliary arrays */
636 FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
637 FF = cc1; cc1=cc; cc=FF;
638 for (j=1; j<=length; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
642 /* calculate energies of 5' and 3' fragments */
645 /* duplicated code may be faster than conditions inside loop ;) */
646 switch(dangle_model){
647 /* dont use dangling end and mismatch contributions at all */
648 case 0: for(j=TURN+2; j<=length; j++){
650 for (i=j-TURN-1; i>1; i--){
653 f5[j] = MIN2(f5[j], f5[i-1] + ggg[indx[j]+i]);
656 type = ptype[indx[j]+i];
659 f5[j] = MIN2(f5[j], f5[i-1] + en + E_ExtLoop(type, -1, -1, P));
663 f5[j] = MIN2(f5[j], ggg[indx[j]+1]);
666 type=ptype[indx[j]+1];
669 f5[j] = MIN2(f5[j], en + E_ExtLoop(type, -1, -1, P));
673 /* always use dangles on both sides */
674 case 2: for(j=TURN+2; j<length; j++){
676 for (i=j-TURN-1; i>1; i--){
679 f5[j] = MIN2(f5[j], f5[i-1] + ggg[indx[j]+i]);
682 type = ptype[indx[j]+i];
685 f5[j] = MIN2(f5[j], f5[i-1] + en + E_ExtLoop(type, S1[i-1], S1[j+1], P));
689 f5[j] = MIN2(f5[j], ggg[indx[j]+1]);
692 type=ptype[indx[j]+1];
695 f5[j] = MIN2(f5[j], en + E_ExtLoop(type, -1, S1[j+1], P));
697 f5[length] = f5[length-1];
698 for (i=length-TURN-1; i>1; i--){
701 f5[length] = MIN2(f5[length], f5[i-1] + ggg[indx[length]+i]);
704 type = ptype[indx[length]+i];
706 en = c[indx[length]+i];
707 f5[length] = MIN2(f5[length], f5[i-1] + en + E_ExtLoop(type, S1[i-1], -1, P));
711 f5[length] = MIN2(f5[length], ggg[indx[length]+1]);
714 type=ptype[indx[length]+1];
716 en = c[indx[length]+1];
717 f5[length] = MIN2(f5[length], en + E_ExtLoop(type, -1, -1, P));
722 /* normal dangles, aka dangle_model = 1 || 3 */
723 default: for(j=TURN+2; j<=length; j++){
725 for (i=j-TURN-1; i>1; i--){
728 f5[j] = MIN2(f5[j], f5[i-1] + ggg[indx[j]+i]);
731 type = ptype[indx[j]+i];
734 f5[j] = MIN2(f5[j], f5[i-1] + en + E_ExtLoop(type, -1, -1, P));
735 f5[j] = MIN2(f5[j], f5[i-2] + en + E_ExtLoop(type, S1[i-1], -1, P));
737 type = ptype[indx[j-1]+i];
740 f5[j] = MIN2(f5[j], f5[i-1] + en + E_ExtLoop(type, -1, S1[j], P));
741 f5[j] = MIN2(f5[j], f5[i-2] + en + E_ExtLoop(type, S1[i-1], S1[j], P));
746 f5[j] = MIN2(f5[j], ggg[indx[j]+1]);
749 type = ptype[indx[j]+1];
750 if(type) f5[j] = MIN2(f5[j], c[indx[j]+1] + E_ExtLoop(type, -1, -1, P));
751 type = ptype[indx[j-1]+1];
752 if(type) f5[j] = MIN2(f5[j], c[indx[j-1]+1] + E_ExtLoop(type, -1, S1[j], P));
758 #include "circfold.inc"
761 *** trace back through the "c", "f5" and "fML" arrays to get the
762 *** base pairing list. No search for equivalent structures is done.
763 *** This is fast, since only few structure elements are recalculated.
766 *** If s>0 then s items have been already pushed onto the sector stack
768 PRIVATE void backtrack(const char *string, int s) {
769 int i, j, ij, k, l1, mm5, mm3, length, energy, en, new;
770 int no_close, type, type_2, tt, minq, maxq, c0, c1, c2, c3;
773 int dangle_model = P->model_details.dangles;
775 length = strlen(string);
778 sector[s].j = length;
779 sector[s].ml = (backtrack_type=='M') ? 1 : ((backtrack_type=='C')? 2: 0);
782 int ml, fij, fi, cij, traced, i1, j1, p, q, jj=0, gq=0;
783 int canonical = 1; /* (i,j) closes a canonical structure */
786 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
787 occur in the fML- (1) or in the f-array (0) */
789 base_pair2[++b].i = i;
794 else if(ml==7) { /* indicates that i,j are enclosing a gquadruplex */
795 /* actually, do something here */
798 if (j < i+TURN+1) continue; /* no more pairs in this interval */
800 fij = (ml == 1)? fML[indx[j]+i] : f5[j];
801 fi = (ml == 1)?(fML[indx[j-1]+i]+P->MLbase): f5[j-1];
803 if (fij == fi) { /* 3' end is unpaired */
810 if (ml == 0) { /* backtrack in f5 */
811 switch(dangle_model){
812 case 0: /* j is paired. Find pairing partner */
813 for(k=j-TURN-1,traced=0; k>=1; k--){
816 if(fij == f5[k-1] + ggg[indx[j]+k]){
817 /* found the decomposition */
818 traced = j; jj = k - 1; gq = 1;
823 type = ptype[indx[j]+k];
825 if(fij == E_ExtLoop(type, -1, -1, P) + c[indx[j]+k] + f5[k-1]){
832 case 2: mm3 = (j<length) ? S1[j+1] : -1;
833 for(k=j-TURN-1,traced=0; k>=1; k--){
836 if(fij == f5[k-1] + ggg[indx[j]+k]){
837 /* found the decomposition */
838 traced = j; jj = k - 1; gq = 1;
843 type = ptype[indx[j]+k];
845 if(fij == E_ExtLoop(type, (k>1) ? S1[k-1] : -1, mm3, P) + c[indx[j]+k] + f5[k-1]){
852 default: for(traced = 0, k=j-TURN-1; k>1; k--){
855 if(fij == f5[k-1] + ggg[indx[j]+k]){
856 /* found the decomposition */
857 traced = j; jj = k - 1; gq = 1;
862 type = ptype[indx[j] + k];
865 if(fij == f5[k-1] + en + E_ExtLoop(type, -1, -1, P)){
870 if(fij == f5[k-2] + en + E_ExtLoop(type, S1[k-1], -1, P)){
876 type = ptype[indx[j-1] + k];
878 en = c[indx[j-1] + k];
879 if(fij == f5[k-1] + en + E_ExtLoop(type, -1, S1[j], P)){
884 if(fij == f5[k-2] + en + E_ExtLoop(type, S1[k-1], S1[j], P)){
894 if(fij == ggg[indx[j]+1]){
895 /* found the decomposition */
896 traced = j; jj = 0; gq = 1;
901 type = ptype[indx[j]+1];
903 if(fij == c[indx[j]+1] + E_ExtLoop(type, -1, -1, P)){
909 type = ptype[indx[j-1]+1];
911 if(fij == c[indx[j-1]+1] + E_ExtLoop(type, -1, S1[j], P)){
922 fprintf(stderr, "%s\n", string);
923 nrerror("backtrack failed in f5");
925 /* push back the remaining f5 portion */
930 /* trace back the base pair found */
933 if(with_gquad && gq){
934 /* goto backtrace of gquadruplex */
938 base_pair2[++b].i = i;
942 else { /* trace back in fML array */
943 if (fML[indx[j]+i+1]+P->MLbase == fij) { /* 5' end is unpaired */
953 if(fij == ggg[ij] + E_MLstem(0, -1, -1, P)){
954 /* go to backtracing of quadruplex */
961 switch(dangle_model){
962 case 0: if(fij == en + E_MLstem(tt, -1, -1, P)){
963 base_pair2[++b].i = i;
969 case 2: if(fij == en + E_MLstem(tt, S1[i-1], S1[j+1], P)){
970 base_pair2[++b].i = i;
976 default: if(fij == en + E_MLstem(tt, -1, -1, P)){
977 base_pair2[++b].i = i;
982 if(fij == c[ij+1] + E_MLstem(tt, S1[i], -1, P) + P->MLbase){
983 base_pair2[++b].i = ++i;
987 tt = ptype[indx[j-1]+i];
988 if(fij == c[indx[j-1]+i] + E_MLstem(tt, -1, S1[j], P) + P->MLbase){
989 base_pair2[++b].i = i;
990 base_pair2[b].j = --j;
993 tt = ptype[indx[j-1]+i+1];
994 if(fij == c[indx[j-1]+i+1] + E_MLstem(tt, S1[i], S1[j], P) + 2*P->MLbase){
995 base_pair2[++b].i = ++i;
996 base_pair2[b].j = --j;
1002 for(k = i + 1 + TURN; k <= j - 2 - TURN; k++)
1003 if(fij == (fML[indx[k]+i]+fML[indx[j]+k+1]))
1006 if ((dangle_model==3)&&(k > j - 2 - TURN)) { /* must be coax stack */
1008 for (k = i+1+TURN; k <= j - 2 - TURN; k++) {
1009 type = rtype[ptype[indx[k]+i]];
1010 type_2 = rtype[ptype[indx[j]+k+1]];
1012 if (fij == c[indx[k]+i]+c[indx[j]+k+1]+P->stack[type][type_2]+
1020 sector[++s].i = k+1;
1024 if (k>j-2-TURN) nrerror("backtrack failed in fML");
1030 /*----- begin of "repeat:" -----*/
1032 if (canonical) cij = c[ij];
1037 if (struct_constrained) {
1038 if ((BP[i]==j)||(BP[i]==-1)||(BP[i]==-2)) bonus -= BONUS;
1039 if ((BP[j]==-1)||(BP[j]==-3)) bonus -= BONUS;
1043 /* (i.j) closes canonical structures, thus
1044 (i+1.j-1) must be a pair */
1045 type_2 = ptype[indx[j-1]+i+1]; type_2 = rtype[type_2];
1046 cij -= P->stack[type][type_2] + bonus;
1047 base_pair2[++b].i = i+1;
1048 base_pair2[b].j = j-1;
1056 no_close = (((type==3)||(type==4))&&no_closingGU&&(bonus==0));
1058 if (cij == FORBIDDEN) continue;
1060 if (cij == E_Hairpin(j-i-1, type, S1[i+1], S1[j-1],string+i-1, P)+bonus)
1063 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
1064 minq = j-i+p-MAXLOOP-2;
1065 if (minq<p+1+TURN) minq = p+1+TURN;
1066 for (q = j-1; q >= minq; q--) {
1068 type_2 = ptype[indx[q]+p];
1069 if (type_2==0) continue;
1070 type_2 = rtype[type_2];
1072 if (no_close||(type_2==3)||(type_2==4))
1073 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
1075 /* energy = oldLoopEnergy(i, j, p, q, type, type_2); */
1076 energy = E_IntLoop(p-i-1, j-q-1, type, type_2,
1077 S1[i+1], S1[j-1], S1[p-1], S1[q+1], P);
1079 new = energy+c[indx[q]+p]+bonus;
1080 traced = (cij == new);
1082 base_pair2[++b].i = p;
1083 base_pair2[b].j = q;
1090 /* end of repeat: --------------------------------------------------*/
1092 /* (i.j) must close a multi-loop */
1098 The case that is handled here actually resembles something like
1099 an interior loop where the enclosing base pair is of regular
1100 kind and the enclosed pair is not a canonical one but a g-quadruplex
1101 that should then be decomposed further...
1103 if(backtrack_GQuad_IntLoop(cij, i, j, type, S, ggg, indx, &p, &q, P)){
1109 sector[s+1].ml = sector[s+2].ml = 1;
1111 switch(dangle_model){
1112 case 0: en = cij - E_MLstem(tt, -1, -1, P) - P->MLclosing - bonus;
1113 for(k = i+2+TURN; k < j-2-TURN; k++){
1114 if(en == fML[indx[k]+i+1] + fML[indx[j-1]+k+1])
1119 case 2: en = cij - E_MLstem(tt, S1[j-1], S1[i+1], P) - P->MLclosing - bonus;
1120 for(k = i+2+TURN; k < j-2-TURN; k++){
1121 if(en == fML[indx[k]+i+1] + fML[indx[j-1]+k+1])
1126 default: for(k = i+2+TURN; k < j-2-TURN; k++){
1127 en = cij - P->MLclosing - bonus;
1128 if(en == fML[indx[k]+i+1] + fML[indx[j-1]+k+1] + E_MLstem(tt, -1, -1, P)){
1131 else if(en == fML[indx[k]+i+2] + fML[indx[j-1]+k+1] + E_MLstem(tt, -1, S1[i+1], P) + P->MLbase){
1135 else if(en == fML[indx[k]+i+1] + fML[indx[j-2]+k+1] + E_MLstem(tt, S1[j-1], -1, P) + P->MLbase){
1139 else if(en == fML[indx[k]+i+2] + fML[indx[j-2]+k+1] + E_MLstem(tt, S1[j-1], S1[i+1], P) + 2*P->MLbase){
1144 /* coaxial stacking of (i.j) with (i+1.k) or (k.j-1) */
1145 /* use MLintern[1] since coax stacked pairs don't get TerminalAU */
1146 if(dangle_model == 3){
1147 type_2 = rtype[ptype[indx[k]+i+1]];
1149 en = c[indx[k]+i+1]+P->stack[type][type_2]+fML[indx[j-1]+k+1];
1150 if (cij == en+2*P->MLintern[1]+P->MLclosing) {
1157 type_2 = rtype[ptype[indx[j-1]+k+1]];
1159 en = c[indx[j-1]+k+1]+P->stack[type][type_2]+fML[indx[k]+i+1];
1160 if (cij == en+2*P->MLintern[1]+P->MLclosing) {
1171 if (k<=j-3-TURN) { /* found the decomposition */
1174 sector[++s].i = k+1;
1178 /* Y shaped ML loops fon't work yet */
1179 if (dangle_model==3) {
1180 d5 = P->dangle5[tt][S1[j-1]];
1181 d3 = P->dangle3[tt][S1[i+1]];
1182 /* (i,j) must close a Y shaped ML loop with coax stacking */
1183 if (cij == fML[indx[j-2]+i+2] + mm + d3 + d5 + P->MLbase + P->MLbase) {
1186 } else if (cij == fML[indx[j-2]+i+1] + mm + d5 + P->MLbase)
1188 else if (cij == fML[indx[j-1]+i+2] + mm + d3 + P->MLbase)
1190 else /* last chance */
1191 if (cij != fML[indx[j-1]+i+1] + mm + P->MLbase)
1192 fprintf(stderr, "backtracking failed in repeat");
1193 /* if we arrive here we can express cij via fML[i1,j1]+dangles */
1199 nrerror("backtracking failed in repeat");
1202 continue; /* this is a workarround to not accidentally proceed in the following block */
1206 now we do some fancy stuff to backtrace the stacksize and linker lengths
1207 of the g-quadruplex that should reside within position i,j
1213 get_gquad_pattern_mfe(S, i, j, P, &L, l);
1215 /* fill the G's of the quadruplex into base_pair2 */
1217 base_pair2[++b].i = i+a;
1218 base_pair2[b].j = i+a;
1219 base_pair2[++b].i = i+L+l[0]+a;
1220 base_pair2[b].j = i+L+l[0]+a;
1221 base_pair2[++b].i = i+L+l[0]+L+l[1]+a;
1222 base_pair2[b].j = i+L+l[0]+L+l[1]+a;
1223 base_pair2[++b].i = i+L+l[0]+L+l[1]+L+l[2]+a;
1224 base_pair2[b].j = i+L+l[0]+L+l[1]+L+l[2]+a;
1226 goto repeat_gquad_exit;
1228 nrerror("backtracking failed in repeat_gquad");
1233 } /* end of infinite while loop */
1235 base_pair2[0].i = b; /* save the total number of base pairs */
1238 PUBLIC char *backtrack_fold_from_pair(char *sequence, int i, int j) {
1244 S = encode_sequence(sequence, 0);
1245 S1 = encode_sequence(sequence, 1);
1246 backtrack(sequence, 1);
1247 structure = (char *) space((strlen(sequence)+1)*sizeof(char));
1248 parenthesis_structure(structure, base_pair2, strlen(sequence));
1253 /*---------------------------------------------------------------------------*/
1255 PUBLIC void letter_structure(char *structure, bondT *bp, int length){
1257 char alpha[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1259 for (n = 0; n < length; structure[n++] = ' ');
1260 structure[length] = '\0';
1262 for (n = 0, k = 1; k <= bp[0].i; k++) {
1265 if (x-1 > 0 && y+1 <= length) {
1266 if (structure[x-2] != ' ' && structure[y] == structure[x-2]) {
1267 structure[x-1] = structure[x-2];
1268 structure[y-1] = structure[x-1];
1272 if (structure[x] != ' ' && structure[y-2] == structure[x]) {
1273 structure[x-1] = structure[x];
1274 structure[y-1] = structure[x-1];
1278 structure[x-1] = alpha[n-1];
1279 structure[y-1] = alpha[n-1];
1283 /*---------------------------------------------------------------------------*/
1285 PUBLIC void parenthesis_structure(char *structure, bondT *bp, int length){
1288 for (n = 0; n < length; structure[n++] = '.');
1289 structure[length] = '\0';
1291 for (k = 1; k <= bp[0].i; k++){
1293 if(bp[k].i == bp[k].j){ /* Gquad bonds are marked as bp[i].i == bp[i].j */
1294 structure[bp[k].i-1] = '+';
1295 } else { /* the following ones are regular base pairs */
1296 structure[bp[k].i-1] = '(';
1297 structure[bp[k].j-1] = ')';
1302 PUBLIC void parenthesis_zuker(char *structure, bondT *bp, int length){
1305 for (k = 0; k < length; structure[k++] = '.');
1306 structure[length] = '\0';
1308 for (k = 1; k <= bp[0].i; k++) {
1311 if (i>length) i-=length;
1312 if (j>length) j-=length;
1314 temp=i; i=j; j=temp;
1316 if(i == j){ /* Gquad bonds are marked as bp[i].i == bp[i].j */
1317 structure[i-1] = '+';
1318 } else { /* the following ones are regular base pairs */
1319 structure[i-1] = '(';
1320 structure[j-1] = ')';
1326 /*---------------------------------------------------------------------------*/
1328 PUBLIC void update_fold_params(void){
1329 update_fold_params_par(NULL);
1332 PUBLIC void update_fold_params_par(paramT *parameters){
1335 P = get_parameter_copy(parameters);
1338 set_model_details(&md);
1339 P = get_scaled_parameters(temperature, md);
1342 if (init_length < 0) init_length=0;
1345 /*---------------------------------------------------------------------------*/
1346 PUBLIC float energy_of_structure(const char *string, const char *structure, int verbosity_level){
1347 return energy_of_struct_par(string, structure, NULL, verbosity_level);
1350 PUBLIC float energy_of_struct_par(const char *string,
1351 const char *structure,
1353 int verbosity_level){
1357 update_fold_params_par(parameters);
1359 if (strlen(structure)!=strlen(string))
1360 nrerror("energy_of_struct: string and structure have unequal length");
1362 /* save the S and S1 pointers in case they were already in use */
1364 S = encode_sequence(string, 0);
1365 S1 = encode_sequence(string, 1);
1367 pair_table = make_pair_table(structure);
1369 energy = energy_of_structure_pt(string, pair_table, S, S1, verbosity_level);
1374 return (float) energy/100.;
1377 /* returns a correction term that may be added to the energy retrieved
1378 from energy_of_struct_par() to correct misinterpreted loops. This
1379 correction is necessary since energy_of_struct_par() will forget
1380 about the existance of gquadruplexes and just treat them as unpaired
1385 PRIVATE int en_corr_of_loop_gquad(int i,
1388 const char *structure,
1393 int pos, energy, p, q, r, s, u, type, type2;
1398 while((pos = parse_gquad(structure + q-1, &L, l)) > 0){
1400 p = q - 4*L - l[0] - l[1] - l[2] + 1;
1402 /* we've found the first g-quadruplex at position [p,q] */
1403 energy += E_gquad(L, l, P);
1404 /* check if it's enclosed in a base pair */
1405 if(loop_idx[p] == 0){ q++; continue; /* g-quad in exterior loop */}
1407 energy += E_MLstem(0, -1, -1, P);
1408 /* find its enclosing pair */
1409 int num_elem, num_g, elem_i, elem_j, up_mis;
1415 /* seek for first pairing base located 5' of the g-quad */
1416 for(r = p - 1; !pt[r] && (r >= i); r--);
1417 if(r < i) nrerror("this should not happen");
1419 if(r < pt[r]){ /* found the enclosing pair */
1426 /* seek for next pairing base 5' of r */
1427 for(; !pt[r] && (r >= i); r--);
1428 if(r < i) nrerror("so nich");
1429 if(r < pt[r]){ /* found the enclosing pair */
1432 /* hop over stems and unpaired nucleotides */
1433 while((r > pt[r]) && (r >= i)){
1434 if(pt[r]){ r = pt[r]; num_elem++;}
1437 if(r < i) nrerror("so nich");
1438 s = pt[r]; /* found the enclosing pair */
1441 /* now we have the enclosing pair (r,s) */
1444 /* we know everything about the 5' part of this loop so check the 3' part */
1446 if(structure[u-1] == '.') u++;
1447 else if (structure[u-1] == '+'){ /* found another gquad */
1448 pos = parse_gquad(structure + u - 1, &L, l);
1450 energy += E_gquad(L, l, P) + E_MLstem(0, -1, -1, P);
1455 } else { /* we must have found a stem */
1456 if(!(u < pt[u])) nrerror("wtf!");
1457 num_elem++; elem_i = u; elem_j = pt[u];
1458 energy += en_corr_of_loop_gquad(u, pt[u], string, structure, pt, loop_idx, s1);
1462 if(u!=s) nrerror("what the hell");
1463 else{ /* we are done since we've found no other 3' structure element */
1465 /* g-quad was misinterpreted as hairpin closed by (r,s) */
1466 case 0: /* if(num_g == 1)
1467 if((p-r-1 == 0) || (s-q-1 == 0))
1468 nrerror("too few unpaired bases");
1470 type = pair[s1[r]][s1[s]];
1472 energy += P->mismatchI[type][s1[r+1]][s1[s-1]];
1474 energy += P->TerminalAU;
1475 energy += P->internal_loop[s - r - 1 - up_mis];
1476 energy -= E_Hairpin(s - r - 1,
1483 /* g-quad was misinterpreted as interior loop closed by (r,s) with enclosed pair (elem_i, elem_j) */
1484 case 1: type = pair[s1[r]][s1[s]];
1485 type2 = pair[s1[elem_i]][s1[elem_j]];
1486 energy += P->MLclosing
1487 + E_MLstem(rtype[type], s1[s-1], s1[r+1], P)
1488 + (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase
1489 + E_MLstem(type2, s1[elem_i-1], s1[elem_j+1], P);
1490 energy -= E_IntLoop(elem_i - r - 1,
1500 /* gquad was misinterpreted as unpaired nucleotides in a multiloop */
1501 default: energy -= (up_mis) * P->MLbase;
1511 PUBLIC float energy_of_gquad_structure( const char *string,
1512 const char *structure,
1513 int verbosity_level){
1515 int energy, gge, *loop_idx;
1519 if(P == NULL) update_fold_params();
1521 if((init_length<0)||(P==NULL)) update_fold_params();
1524 if (fabs(P->temperature - temperature)>1e-6) update_fold_params();
1526 if (strlen(structure)!=strlen(string))
1527 nrerror("energy_of_struct: string and structure have unequal length");
1529 /* save the S and S1 pointers in case they were already in use */
1531 S = encode_sequence(string, 0);
1532 S1 = encode_sequence(string, 1);
1534 /* the pair_table looses every information about the gquad position
1535 thus we have to find add the energy contributions for each loop
1536 that contains a gquad by ourself, substract all miscalculated
1537 contributions, i.e. loops that actually contain a gquad, from
1538 energy_of_structure_pt()
1540 pair_table = make_pair_table(structure);
1541 energy = energy_of_structure_pt(string, pair_table, S, S1, verbosity_level);
1543 loop_idx = make_loop_index_pt(pair_table);
1544 gge = en_corr_of_loop_gquad(1, S[0], string, structure, pair_table, loop_idx, S1);
1551 return (float) energy/100.;
1554 PUBLIC int energy_of_structure_pt(const char *string,
1558 int verbosity_level){
1559 return energy_of_struct_pt_par(string, ptable, s, s1, NULL, verbosity_level);
1562 PUBLIC int energy_of_struct_pt_par( const char *string,
1567 int verbosity_level){
1568 /* auxiliary function for kinfold,
1569 for most purposes call energy_of_struct instead */
1571 int i, length, energy;
1574 update_fold_params_par(parameters);
1576 pair_table = ptable;
1583 /* energy = backtrack_type=='M' ? ML_Energy(0, 0) : ML_Energy(0, 1); */
1584 energy = backtrack_type=='M' ? energy_of_ml_pt(0, ptable) : energy_of_extLoop_pt(0, ptable);
1585 if (verbosity_level>0)
1586 printf("External loop : %5d\n", energy);
1587 for (i=1; i<=length; i++) {
1588 if (pair_table[i]==0) continue;
1589 energy += stack_energy(i, string, verbosity_level);
1592 for (i=1; !SAME_STRAND(i,length); i++) {
1593 if (!SAME_STRAND(i,pair_table[i])) {
1594 energy+=P->DuplexInit;
1603 PUBLIC float energy_of_circ_structure(const char *string,
1604 const char *structure,
1605 int verbosity_level){
1606 return energy_of_circ_struct_par(string, structure, NULL, verbosity_level);
1609 PUBLIC float energy_of_circ_struct_par( const char *string,
1610 const char *structure,
1612 int verbosity_level){
1614 int i, j, length, energy=0, en0, degree=0, type;
1617 update_fold_params_par(parameters);
1619 int dangle_model = P->model_details.dangles;
1621 if (strlen(structure)!=strlen(string))
1622 nrerror("energy_of_struct: string and structure have unequal length");
1624 /* save the S and S1 pointers in case they were already in use */
1626 S = encode_sequence(string, 0);
1627 S1 = encode_sequence(string, 1);
1629 pair_table = make_pair_table(structure);
1633 for (i=1; i<=length; i++) {
1634 if (pair_table[i]==0) continue;
1636 energy += stack_energy(i, string, verbosity_level);
1640 if (degree==0) return 0.;
1641 for (i=1; pair_table[i]==0; i++);
1643 type=pair[S[j]][S[i]];
1644 if (type==0) type=7;
1648 for (i=1; pair_table[i]==0; i++);
1651 strcpy(loopseq , string+j-1);
1652 strncat(loopseq, string, i);
1654 si1 = (i==1)?S1[length] : S1[i-1];
1655 sj1 = (j==length)?S1[1] : S1[j+1];
1656 en0 = E_Hairpin(u, type, sj1, si1, loopseq, P);
1659 int p,q, u1,u2, si1, sq1, type_2;
1660 for (p=j+1; pair_table[p]==0; p++);
1663 u2 = i-1 + length-q;
1664 type_2 = pair[S[q]][S[p]];
1665 if (type_2==0) type_2=7;
1666 si1 = (i==1)? S1[length] : S1[i-1];
1667 sq1 = (q==length)? S1[1] : S1[q+1];
1668 en0 = E_IntLoop(u1, u2, type, type_2,
1669 S1[j+1], si1, S1[p-1], sq1,P);
1670 } else { /* degree > 2 */
1671 en0 = ML_Energy(0, 0) - P->MLintern[0];
1674 if (pair_table[1]) {
1676 type = pair[S[1]][S[j]];
1677 if (dangle_model==2)
1678 en0 += P->dangle5[type][S1[length]];
1679 else { /* dangle_model==1 */
1680 if (pair_table[length]==0) {
1681 d5 = P->dangle5[type][S1[length]];
1682 if (pair_table[length-1]!=0) {
1684 tt = pair[S[pair_table[length-1]]][S[length-1]];
1685 d3 = P->dangle3[tt][S1[length]];
1693 if (pair_table[length]) {
1694 i = pair_table[length];
1695 type = pair[S[i]][S[length]];
1696 if (dangle_model==2)
1697 en0 += P->dangle3[type][S1[1]];
1698 else { /* dangle_model==1 */
1699 if (pair_table[1]==0) {
1700 d3 = P->dangle3[type][S1[1]];
1701 if (pair_table[2]) {
1703 tt = pair[S[2]][S[pair_table[2]]];
1704 d5 = P->dangle5[tt][1];
1715 if (verbosity_level>0)
1716 printf("External loop : %5d\n", en0);
1718 /* fprintf(stderr, "ext loop degree %d tot %d\n", degree, energy); */
1721 return (float) energy/100.0;
1724 /*---------------------------------------------------------------------------*/
1725 PRIVATE int stack_energy(int i, const char *string, int verbosity_level)
1727 /* calculate energy of substructure enclosed by (i,j) */
1732 type = pair[S[i]][S[j]];
1735 if (verbosity_level>=0)
1736 fprintf(stderr,"WARNING: bases %d and %d (%c%c) can't pair!\n", i, j,
1737 string[i-1],string[j-1]);
1741 while (p<q) { /* process all stacks and interior loops */
1743 while (pair_table[++p]==0);
1744 while (pair_table[--q]==0);
1745 if ((pair_table[q]!=(short)p)||(p>q)) break;
1746 type_2 = pair[S[q]][S[p]];
1749 if (verbosity_level>=0)
1750 fprintf(stderr,"WARNING: bases %d and %d (%c%c) can't pair!\n", p, q,
1751 string[p-1],string[q-1]);
1753 /* energy += LoopEnergy(i, j, p, q, type, type_2); */
1754 if ( SAME_STRAND(i,p) && SAME_STRAND(q,j) )
1755 ee = E_IntLoop(p-i-1, j-q-1, type, type_2, S1[i+1], S1[j-1], S1[p-1], S1[q+1],P);
1757 ee = energy_of_extLoop_pt(cut_in_loop(i), pair_table);
1758 if (verbosity_level>0)
1759 printf("Interior loop (%3d,%3d) %c%c; (%3d,%3d) %c%c: %5d\n",
1760 i,j,string[i-1],string[j-1],p,q,string[p-1],string[q-1], ee);
1762 i=p; j=q; type = rtype[type_2];
1765 /* p,q don't pair must have found hairpin or multiloop */
1767 if (p>q) { /* hair pin */
1768 if (SAME_STRAND(i,j))
1769 ee = E_Hairpin(j-i-1, type, S1[i+1], S1[j-1], string+i-1, P);
1771 ee = energy_of_extLoop_pt(cut_in_loop(i), pair_table);
1773 if (verbosity_level>0)
1774 printf("Hairpin loop (%3d,%3d) %c%c : %5d\n",
1775 i, j, string[i-1],string[j-1], ee);
1780 /* (i,j) is exterior pair of multiloop */
1782 /* add up the contributions of the substructures of the ML */
1783 energy += stack_energy(p, string, verbosity_level);
1785 /* search for next base pair in multiloop */
1786 while (pair_table[++p]==0);
1790 ii = cut_in_loop(i);
1791 ee = (ii==0) ? energy_of_ml_pt(i, pair_table) : energy_of_extLoop_pt(ii, pair_table);
1794 if (verbosity_level>0)
1795 printf("Multi loop (%3d,%3d) %c%c : %5d\n",
1796 i,j,string[i-1],string[j-1],ee);
1801 /*---------------------------------------------------------------------------*/
1806 *** Calculate the energy contribution of
1807 *** stabilizing dangling-ends/mismatches
1808 *** for all stems branching off the exterior
1811 PRIVATE int energy_of_extLoop_pt(int i, short *pair_table) {
1812 int energy, mm5, mm3;
1814 int length = (int)pair_table[0];
1816 /* helper variables for dangles == 1 case */
1817 int E3_available; /* energy of 5' part where 5' mismatch is available for current stem */
1818 int E3_occupied; /* energy of 5' part where 5' mismatch is unavailable for current stem */
1820 int dangle_model = P->model_details.dangles;
1822 /* initialize vars */
1827 if(dangle_model%2 == 1){
1832 /* seek to opening base of first stem */
1833 while(p <= length && !pair_table[p]) p++;
1837 /* p must have a pairing partner */
1838 q = (int)pair_table[p];
1839 /* get type of base pair (p,q) */
1840 tt = pair[S[p]][S[q]];
1843 switch(dangle_model){
1845 case 0: energy += E_ExtLoop(tt, -1, -1, P);
1847 /* the beloved double dangles */
1848 case 2: mm5 = ((SAME_STRAND(p-1,p)) && (p>1)) ? S1[p-1] : -1;
1849 mm3 = ((SAME_STRAND(q,q+1)) && (q<length)) ? S1[q+1] : -1;
1850 energy += E_ExtLoop(tt, mm5, mm3, P);
1856 E3_available = MIN2(E3_available, E3_occupied);
1857 E3_occupied = E3_available;
1859 mm5 = ((SAME_STRAND(p-1,p)) && (p>1) && !pair_table[p-1]) ? S1[p-1] : -1;
1860 mm3 = ((SAME_STRAND(q,q+1)) && (q<length) && !pair_table[q+1]) ? S1[q+1] : -1;
1862 E3_occupied + E_ExtLoop(tt, -1, mm3, P),
1863 E3_available + E_ExtLoop(tt, mm5, mm3, P)
1865 E3_available = MIN2(
1866 E3_occupied + E_ExtLoop(tt, -1, -1, P),
1867 E3_available + E_ExtLoop(tt, mm5, -1, P)
1873 } /* end switch dangle_model */
1874 /* seek to the next stem */
1877 while (p <= length && !pair_table[p]) p++;
1878 if(p==i) break; /* cut was in loop */
1881 if(dangle_model%2 == 1)
1882 energy = MIN2(E3_occupied, E3_available);
1888 *** i is the 5'-base of the closing pair
1890 *** since each helix can coaxially stack with at most one of its
1891 *** neighbors we need an auxiliarry variable cx_energy
1892 *** which contains the best energy given that the last two pairs stack.
1893 *** energy holds the best energy given the previous two pairs do not
1894 *** stack (i.e. the two current helices may stack)
1895 *** We don't allow the last helix to stack with the first, thus we have to
1896 *** walk around the Loop twice with two starting points and take the minimum
1898 PRIVATE int energy_of_ml_pt(int i, short *pt){
1900 int energy, cx_energy, tmp, tmp2, best_energy=INF;
1901 int i1, j, p, q, q_prev, q_prev2, u, x, type, count, mm5, mm3, tt, ld5, new_cx, dang5, dang3, dang;
1902 int mlintern[NBPAIRS+1];
1904 /* helper variables for dangles == 1|5 case */
1905 int E_mm5_available; /* energy of 5' part where 5' mismatch of current stem is available */
1906 int E_mm5_occupied; /* energy of 5' part where 5' mismatch of current stem is unavailable */
1907 int E2_mm5_available; /* energy of 5' part where 5' mismatch of current stem is available with possible 3' dangle for enclosing pair (i,j) */
1908 int E2_mm5_occupied; /* energy of 5' part where 5' mismatch of current stem is unavailable with possible 3' dangle for enclosing pair (i,j) */
1909 int dangle_model = P->model_details.dangles;
1912 nrerror("energy_of_ml_pt: i is not 5' base of a closing pair!");
1916 /* init the variables */
1922 for (x = 0; x <= NBPAIRS; x++) mlintern[x] = P->MLintern[x];
1924 /* seek to opening base of first stem */
1925 while(p <= j && !pair_table[p]) p++;
1928 switch(dangle_model){
1929 case 0: while(p < j){
1930 /* p must have a pairing partner */
1931 q = (int)pair_table[p];
1932 /* get type of base pair (p,q) */
1933 tt = pair[S[p]][S[q]];
1935 energy += E_MLstem(tt, -1, -1, P);
1936 /* seek to the next stem */
1938 q_prev = q_prev2 = q;
1939 while (p <= j && !pair_table[p]) p++;
1940 u += p - q - 1; /* add unpaired nucleotides */
1942 /* now lets get the energy of the enclosing stem */
1943 type = pair[S[j]][S[i]]; if (type==0) type=7;
1944 energy += E_MLstem(type, -1, -1, P);
1947 case 2: while(p < j){
1948 /* p must have a pairing partner */
1949 q = (int)pair_table[p];
1950 /* get type of base pair (p,q) */
1951 tt = pair[S[p]][S[q]];
1953 mm5 = (SAME_STRAND(p-1,p)) ? S1[p-1] : -1;
1954 mm3 = (SAME_STRAND(q,q+1)) ? S1[q+1] : -1;
1955 energy += E_MLstem(tt, mm5, mm3, P);
1956 /* seek to the next stem */
1958 q_prev = q_prev2 = q;
1959 while (p <= j && !pair_table[p]) p++;
1960 u += p - q - 1; /* add unpaired nucleotides */
1962 type = pair[S[j]][S[i]]; if (type==0) type=7;
1963 mm5 = ((SAME_STRAND(j-1,j)) && !pair_table[j-1]) ? S1[j-1] : -1;
1964 mm3 = ((SAME_STRAND(i,i+1)) && !pair_table[i+1]) ? S1[i+1] : -1;
1965 energy += E_MLstem(type, S1[j-1], S1[i+1], P);
1968 case 3: /* we treat helix stacking different */
1969 for (count=0; count<2; count++) { /* do it twice */
1970 ld5 = 0; /* 5' dangle energy on prev pair (type) */
1972 j = (unsigned int)pair_table[0]+1;
1973 type = 0; /* no pair */
1976 j = (unsigned int)pair_table[i];
1977 type = pair[S[j]][S[i]]; if (type==0) type=7;
1978 /* prime the ld5 variable */
1979 if (SAME_STRAND(j-1,j)) {
1980 ld5 = P->dangle5[type][S1[j-1]];
1981 if ((p=(unsigned int)pair_table[j-2]) && SAME_STRAND(j-2, j-1))
1982 if (P->dangle3[pair[S[p]][S[j-2]]][S1[j-1]]<ld5) ld5 = 0;
1986 energy = 0; cx_energy=INF;
1987 do { /* walk around the multi-loop */
1990 /* hop over unpaired positions */
1991 while (p <= (unsigned int)pair_table[0] && pair_table[p]==0) p++;
1993 /* memorize number of unpaired positions */
1995 /* get position of pairing partner */
1996 if ( p == (unsigned int)pair_table[0]+1 ){
1997 q = 0;tt = 0; /* virtual root pair */
1999 q = (unsigned int)pair_table[p];
2000 /* get type of base pair P->q */
2001 tt = pair[S[p]][S[q]]; if (tt==0) tt=7;
2004 energy += mlintern[tt];
2005 cx_energy += mlintern[tt];
2008 if ((SAME_STRAND(p-1,p))&&(p>1))
2009 dang5=P->dangle5[tt][S1[p-1]]; /* 5'dangle of pq pair */
2010 if ((SAME_STRAND(i1,i1+1))&&(i1<(unsigned int)S[0]))
2011 dang3 = P->dangle3[type][S1[i1+1]]; /* 3'dangle of previous pair */
2014 case 0: /* adjacent helices */
2016 if (SAME_STRAND(i1,p)) {
2017 new_cx = energy + P->stack[rtype[type]][rtype[tt]];
2018 /* subtract 5'dangle and TerminalAU penalty */
2019 new_cx += -ld5 - mlintern[tt]-mlintern[type]+2*mlintern[1];
2022 energy = MIN2(energy, cx_energy);
2025 case 1: /* 1 unpaired base between helices */
2026 dang = MIN2(dang3, dang5);
2027 energy = energy +dang; ld5 = dang - dang3;
2028 /* may be problem here: Suppose
2029 cx_energy>energy, cx_energy+dang5<energy
2030 and the following helices are also stacked (i.e.
2031 we'll subtract the dang5 again */
2032 if (cx_energy+dang5 < energy) {
2033 energy = cx_energy+dang5;
2036 new_cx = INF; /* no coax stacking with mismatch for now */
2038 default: /* many unpaired base between helices */
2039 energy += dang5 +dang3;
2040 energy = MIN2(energy, cx_energy + dang5);
2041 new_cx = INF; /* no coax stacking possible */
2049 best_energy = MIN2(energy, best_energy); /* don't use cx_energy here */
2050 /* fprintf(stderr, "%6.2d\t", energy); */
2051 /* skip a helix and start again */
2052 while (pair_table[p]==0) p++;
2053 if (i == (unsigned int)pair_table[p]) break;
2054 i = (unsigned int)pair_table[p];
2055 } /* end doing it twice */
2056 energy = best_energy;
2059 default: E_mm5_available = E2_mm5_available = INF;
2060 E_mm5_occupied = E2_mm5_occupied = 0;
2062 /* p must have a pairing partner */
2063 q = (int)pair_table[p];
2064 /* get type of base pair (p,q) */
2065 tt = pair[S[p]][S[q]];
2068 E_mm5_available = MIN2(E_mm5_available, E_mm5_occupied);
2069 E_mm5_occupied = E_mm5_available;
2071 if(q_prev2 + 2 < p){
2072 E2_mm5_available = MIN2(E2_mm5_available, E2_mm5_occupied);
2073 E2_mm5_occupied = E2_mm5_available;
2075 mm5 = ((SAME_STRAND(p-1,p)) && !pair_table[p-1]) ? S1[p-1] : -1;
2076 mm3 = ((SAME_STRAND(q,q+1)) && !pair_table[q+1]) ? S1[q+1] : -1;
2078 E_mm5_occupied + E_MLstem(tt, -1, mm3, P),
2079 E_mm5_available + E_MLstem(tt, mm5, mm3, P)
2081 tmp = MIN2(tmp, E_mm5_available + E_MLstem(tt, -1, mm3, P));
2083 E_mm5_occupied + E_MLstem(tt, -1, -1, P),
2084 E_mm5_available + E_MLstem(tt, mm5, -1, P)
2086 E_mm5_available = MIN2(tmp2, E_mm5_available + E_MLstem(tt, -1, -1, P));
2087 E_mm5_occupied = tmp;
2090 E2_mm5_occupied + E_MLstem(tt, -1, mm3, P),
2091 E2_mm5_available + E_MLstem(tt, mm5, mm3, P)
2093 tmp = MIN2(tmp, E2_mm5_available + E_MLstem(tt, -1, mm3, P));
2095 E2_mm5_occupied + E_MLstem(tt, -1, -1, P),
2096 E2_mm5_available + E_MLstem(tt, mm5, -1, P)
2098 E2_mm5_available = MIN2(tmp2, E2_mm5_available + E_MLstem(tt, -1, -1, P));
2099 E2_mm5_occupied = tmp;
2100 /* printf("(%d,%d): \n E_o = %d, E_a = %d, E2_o = %d, E2_a = %d\n", p, q, E_mm5_occupied,E_mm5_available,E2_mm5_occupied,E2_mm5_available); */
2101 /* seek to the next stem */
2103 q_prev = q_prev2 = q;
2104 while (p <= j && !pair_table[p]) p++;
2105 u += p - q - 1; /* add unpaired nucleotides */
2107 /* now lets see how we get the minimum including the enclosing stem */
2108 type = pair[S[j]][S[i]]; if (type==0) type=7;
2109 mm5 = ((SAME_STRAND(j-1,j)) && !pair_table[j-1]) ? S1[j-1] : -1;
2110 mm3 = ((SAME_STRAND(i,i+1)) && !pair_table[i+1]) ? S1[i+1] : -1;
2112 E_mm5_available = MIN2(E_mm5_available, E_mm5_occupied);
2113 E_mm5_occupied = E_mm5_available;
2115 if(q_prev2 + 2 < p){
2116 E2_mm5_available = MIN2(E2_mm5_available, E2_mm5_occupied);
2117 E2_mm5_occupied = E2_mm5_available;
2119 energy = MIN2(E_mm5_occupied + E_MLstem(type, -1, -1, P),
2120 E_mm5_available + E_MLstem(type, mm5, -1, P)
2122 energy = MIN2(energy, E_mm5_available + E_MLstem(type, -1, -1, P));
2123 energy = MIN2(energy, E2_mm5_occupied + E_MLstem(type, -1, mm3, P));
2124 energy = MIN2(energy, E2_mm5_occupied + E_MLstem(type, -1, -1, P));
2125 energy = MIN2(energy, E2_mm5_available + E_MLstem(type, mm5, mm3, P));
2126 energy = MIN2(energy, E2_mm5_available + E_MLstem(type, -1, mm3, P));
2127 energy = MIN2(energy, E2_mm5_available + E_MLstem(type, mm5, -1, P));
2128 energy = MIN2(energy, E2_mm5_available + E_MLstem(type, -1, -1, P));
2130 }/* end switch dangle_model */
2132 energy += P->MLclosing;
2133 /* logarithmic ML loop energy if logML */
2135 energy += 6*P->MLbase+(int)(P->lxc*log((double)u/6.));
2137 energy += (u*P->MLbase);
2142 /*---------------------------------------------------------------------------*/
2144 PUBLIC int loop_energy(short * ptable, short *s, short *s1, int i) {
2145 /* compute energy of a single loop closed by base pair (i,j) */
2146 int j, type, p,q, energy;
2147 short *Sold, *S1old, *ptold;
2149 ptold=pair_table; Sold = S; S1old = S1;
2150 pair_table = ptable; S = s; S1 = s1;
2152 if (i==0) { /* evaluate exterior loop */
2153 energy = energy_of_extLoop_pt(0,pair_table);
2154 pair_table=ptold; S=Sold; S1=S1old;
2158 if (j<i) nrerror("i is unpaired in loop_energy()");
2159 type = pair[S[i]][S[j]];
2163 fprintf(stderr,"WARNING: bases %d and %d (%c%c) can't pair!\n", i, j,
2164 Law_and_Order[S[i]],Law_and_Order[S[j]]);
2169 while (pair_table[++p]==0);
2170 while (pair_table[--q]==0);
2171 if (p>q) { /* Hairpin */
2172 char loopseq[8] = "";
2173 if (SAME_STRAND(i,j)) {
2176 for (u=0; i+u<=j; u++) loopseq[u] = Law_and_Order[S[i+u]];
2179 energy = E_Hairpin(j-i-1, type, S1[i+1], S1[j-1], loopseq, P);
2181 energy = energy_of_extLoop_pt(cut_in_loop(i), pair_table);
2184 else if (pair_table[q]!=(short)p) { /* multi-loop */
2186 ii = cut_in_loop(i);
2187 energy = (ii==0) ? energy_of_ml_pt(i, pair_table) : energy_of_extLoop_pt(ii, pair_table);
2189 else { /* found interior loop */
2191 type_2 = pair[S[q]][S[p]];
2195 fprintf(stderr,"WARNING: bases %d and %d (%c%c) can't pair!\n", p, q,
2196 Law_and_Order[S[p]],Law_and_Order[S[q]]);
2198 /* energy += LoopEnergy(i, j, p, q, type, type_2); */
2199 if ( SAME_STRAND(i,p) && SAME_STRAND(q,j) )
2200 energy = E_IntLoop(p-i-1, j-q-1, type, type_2,
2201 S1[i+1], S1[j-1], S1[p-1], S1[q+1], P);
2203 energy = energy_of_extLoop_pt(cut_in_loop(i), pair_table);
2206 pair_table=ptold; S=Sold; S1=S1old;
2210 /*---------------------------------------------------------------------------*/
2213 PUBLIC float energy_of_move(const char *string, const char *structure, int m1, int m2) {
2218 if(P == NULL) update_fold_params();
2220 if((init_length<0)||(P==NULL)) update_fold_params();
2223 if (fabs(P->temperature - temperature)>1e-6) update_fold_params();
2225 if (strlen(structure)!=strlen(string))
2226 nrerror("energy_of_struct: string and structure have unequal length");
2228 /* save the S and S1 pointers in case they were already in use */
2230 S = encode_sequence(string, 0);
2231 S1 = encode_sequence(string, 1);
2233 pair_table = make_pair_table(structure);
2235 energy = energy_of_move_pt(pair_table, S, S1, m1, m2);
2240 return (float) energy/100.;
2243 /*---------------------------------------------------------------------------*/
2245 PUBLIC int energy_of_move_pt(short *pt, short *s, short *s1, int m1, int m2) {
2246 /*compute change in energy given by move (m1,m2)*/
2247 int en_post, en_pre, i,j,k,l, len;
2252 /* first find the enclosing pair i<k<l<j */
2253 for (j=l+1; j<=len; j++) {
2254 if (pt[j]<=0) continue; /* unpaired */
2255 if (pt[j]<k) break; /* found it */
2256 if (pt[j]>j) j=pt[j]; /* skip substructure */
2258 fprintf(stderr, "%d %d %d %d ", m1, m2, j, pt[j]);
2259 nrerror("illegal move or broken pair table in energy_of_move()");
2262 i = (j<=len) ? pt[j] : 0;
2263 en_pre = loop_energy(pt, s, s1, i);
2265 if (m1<0) { /*it's a delete move */
2266 en_pre += loop_energy(pt, s, s1, k);
2269 } else { /* insert move */
2272 en_post += loop_energy(pt, s, s1, k);
2274 en_post += loop_energy(pt, s, s1, i);
2275 /* restore pair table */
2283 return (en_post - en_pre);
2288 PRIVATE int cut_in_loop(int i) {
2289 /* walk around the loop; return j pos of pair after cut if
2290 cut_point in loop else 0 */
2292 p = j = pair_table[i];
2294 i = pair_table[p]; p = i+1;
2295 while ( pair_table[p]==0 ) p++;
2296 } while (p!=j && SAME_STRAND(i,p));
2297 return SAME_STRAND(i,p) ? 0 : j;
2300 /*---------------------------------------------------------------------------*/
2302 PRIVATE void make_ptypes(const short *S, const char *structure) {
2306 for (k=1; k<n-TURN; k++)
2307 for (l=1; l<=2; l++) {
2308 int type,ntype=0,otype=0;
2309 i=k; j = i+TURN+l; if (j>n) continue;
2310 type = pair[S[i]][S[j]];
2311 while ((i>=1)&&(j<=n)) {
2312 if ((i>1)&&(j<n)) ntype = pair[S[i-1]][S[j+1]];
2313 if (noLonelyPairs && (!otype) && (!ntype))
2314 type = 0; /* i.j can only form isolated pairs */
2315 ptype[indx[j]+i] = (char) type;
2322 if (struct_constrained && (structure != NULL))
2323 constrain_ptypes(structure, (unsigned int)n, ptype, BP, TURN, 0);
2326 PUBLIC void assign_plist_from_db(plist **pl, const char *struc, float pr){
2327 /* convert bracket string to plist */
2329 int i, k = 0, size, n;
2332 size = strlen(struc);
2335 pt = make_pair_table(struc);
2336 *pl = (plist *)space(n*size*sizeof(plist));
2337 for(i = 1; i < size; i++){
2342 (*pl)[k++].type = 0;
2346 gpl = get_plist_gquad_from_db(struc, pr);
2347 for(ptr = gpl; ptr->i != 0; ptr++){
2348 if (k == n * size - 1){
2350 *pl = (plist *)xrealloc(*pl, n * size * sizeof(plist));
2352 (*pl)[k].i = ptr->i;
2353 (*pl)[k].j = ptr->j;
2354 (*pl)[k].p = ptr->p;
2355 (*pl)[k++].type = ptr->type;
2362 (*pl)[k++].type = 0.;
2364 *pl = (plist *)xrealloc(*pl, k * sizeof(plist));
2368 /*###########################################*/
2369 /*# deprecated functions below #*/
2370 /*###########################################*/
2372 PUBLIC int HairpinE(int size, int type, int si1, int sj1, const char *string) {
2375 energy = (size <= 30) ? P->hairpin[size] :
2376 P->hairpin[30]+(int)(P->lxc*log((size)/30.));
2379 if (size == 4) { /* check for tetraloop bonus */
2380 char tl[7]={0}, *ts;
2381 strncpy(tl, string, 6);
2382 if ((ts=strstr(P->Tetraloops, tl)))
2383 return (P->Tetraloop_E[(ts - P->Tetraloops)/7]);
2386 char tl[9]={0}, *ts;
2387 strncpy(tl, string, 8);
2388 if ((ts=strstr(P->Hexaloops, tl)))
2389 return (energy = P->Hexaloop_E[(ts - P->Hexaloops)/9]);
2392 char tl[6]={0,0,0,0,0,0}, *ts;
2393 strncpy(tl, string, 5);
2394 if ((ts=strstr(P->Triloops, tl))) {
2395 return (P->Triloop_E[(ts - P->Triloops)/6]);
2397 if (type>2) /* neither CG nor GC */
2398 energy += P->TerminalAU; /* penalty for closing AU GU pair IVOO??
2399 sind dass jetzt beaunuesse oder mahlnuesse (vorzeichen?)*/
2403 energy += P->mismatchH[type][si1][sj1];
2408 /*---------------------------------------------------------------------------*/
2410 PUBLIC int oldLoopEnergy(int i, int j, int p, int q, int type, int type_2) {
2411 /* compute energy of degree 2 loop (stack bulge or interior) */
2412 int n1, n2, m, energy;
2416 if (n1>n2) { m=n1; n1=n2; n2=m; } /* so that n2>=n1 */
2419 energy = P->stack[type][type_2]; /* stack */
2421 else if (n1==0) { /* bulge */
2422 energy = (n2<=MAXLOOP)?P->bulge[n2]:
2423 (P->bulge[30]+(int)(P->lxc*log(n2/30.)));
2426 if (n2==1) energy+=P->stack[type][type_2];
2428 } else { /* interior loop */
2430 if ((n1+n2==2)&&(james_rule))
2431 /* special case for loop size 2 */
2432 energy = P->int11[type][type_2][S1[i+1]][S1[j-1]];
2434 energy = (n1+n2<=MAXLOOP)?(P->internal_loop[n1+n2]):
2435 (P->internal_loop[30]+(int)(P->lxc*log((n1+n2)/30.)));
2438 energy += MIN2(MAX_NINIO, (n2-n1)*P->ninio[2]);
2441 energy += MIN2(MAX_NINIO,((n2-n1)*P->ninio[m]));
2443 energy += P->mismatchI[type][S1[i+1]][S1[j-1]]+
2444 P->mismatchI[type_2][S1[q+1]][S1[p-1]];
2450 /*--------------------------------------------------------------------------*/
2452 PUBLIC int LoopEnergy(int n1, int n2, int type, int type_2,
2453 int si1, int sj1, int sp1, int sq1) {
2454 /* compute energy of degree 2 loop (stack bulge or interior) */
2457 if (n1>n2) { nl=n1; ns=n2;}
2458 else {nl=n2; ns=n1;}
2461 return P->stack[type][type_2]; /* stack */
2463 if (ns==0) { /* bulge */
2464 energy = (nl<=MAXLOOP)?P->bulge[nl]:
2465 (P->bulge[30]+(int)(P->lxc*log(nl/30.)));
2466 if (nl==1) energy += P->stack[type][type_2];
2468 if (type>2) energy += P->TerminalAU;
2469 if (type_2>2) energy += P->TerminalAU;
2473 else { /* interior loop */
2475 if (nl==1) /* 1x1 loop */
2476 return P->int11[type][type_2][si1][sj1];
2477 if (nl==2) { /* 2x1 loop */
2479 energy = P->int21[type][type_2][si1][sq1][sj1];
2481 energy = P->int21[type_2][type][sq1][si1][sp1];
2484 else { /* 1xn loop */
2485 energy = (nl+1<=MAXLOOP)?(P->internal_loop[nl+1]):
2486 (P->internal_loop[30]+(int)(P->lxc*log((nl+1)/30.)));
2487 energy += MIN2(MAX_NINIO, (nl-ns)*P->ninio[2]);
2488 energy += P->mismatch1nI[type][si1][sj1]+
2489 P->mismatch1nI[type_2][sq1][sp1];
2494 if(nl==2) { /* 2x2 loop */
2495 return P->int22[type][type_2][si1][sp1][sq1][sj1];}
2496 else if (nl==3) { /* 2x3 loop */
2497 energy = P->internal_loop[5]+P->ninio[2];
2498 energy += P->mismatch23I[type][si1][sj1]+
2499 P->mismatch23I[type_2][sq1][sp1];
2504 { /* generic interior loop (no else here!)*/
2505 energy = (n1+n2<=MAXLOOP)?(P->internal_loop[n1+n2]):
2506 (P->internal_loop[30]+(int)(P->lxc*log((n1+n2)/30.)));
2508 energy += MIN2(MAX_NINIO, (nl-ns)*P->ninio[2]);
2510 energy += P->mismatchI[type][si1][sj1]+
2511 P->mismatchI[type_2][sq1][sp1];
2517 PRIVATE int ML_Energy(int i, int is_extloop) {
2518 /* i is the 5'-base of the closing pair (or 0 for exterior loop)
2519 loop is scored as ML if extloop==0 else as exterior loop
2521 since each helix can coaxially stack with at most one of its
2522 neighbors we need an auxiliarry variable cx_energy
2523 which contains the best energy given that the last two pairs stack.
2524 energy holds the best energy given the previous two pairs do not
2525 stack (i.e. the two current helices may stack)
2526 We don't allow the last helix to stack with the first, thus we have to
2527 walk around the Loop twice with two starting points and take the minimum
2530 int energy, cx_energy, best_energy=INF;
2531 int i1, j, p, q, u, x, type, count;
2532 int mlintern[NBPAIRS+1], mlclosing, mlbase;
2533 int dangle_model = P->model_details.dangles;
2536 for (x = 0; x <= NBPAIRS; x++)
2537 mlintern[x] = P->MLintern[x]-P->MLintern[1]; /* 0 or TerminalAU */
2538 mlclosing = mlbase = 0;
2540 for (x = 0; x <= NBPAIRS; x++) mlintern[x] = P->MLintern[x];
2541 mlclosing = P->MLclosing; mlbase = P->MLbase;
2544 /* as we do not only have dangling end but also mismatch contributions,
2545 ** we do this a bit different to previous implementations
2552 int E_mm5_available, E_mm5_occupied;
2553 /* find out if we may have 5' mismatch for the next stem */
2554 while (p <= (int)pair_table[0] && pair_table[p]==0) p++;
2555 /* get position of pairing partner */
2556 if(p < (int)pair_table[0]){
2557 E_mm5_occupied = (p - i - 1 > 0) ? INF : 0;
2558 E_mm5_available = (p - i - 1 > 0) ? 0 : INF;
2561 if(p < (int)pair_table[0])
2564 /* p must have a pairing partner */
2565 q = (int)pair_table[p];
2566 /* get type of base pair (p,q) */
2567 tt = pair[S[p]][S[q]];
2570 int mm5 = ((SAME_STRAND(p-1,p)) && (p>1)) ? S1[p-1]: -1;
2571 int mm3 = ((SAME_STRAND(q,q+1)) && (q<(unsigned int)pair_table[0])) ? S1[q+1]: -1;
2573 switch(dangle_model){
2574 /* dangle_model == 0 */
2575 case 0: energy += E_ExtLoop(tt, -1, -1, P);
2577 /* dangle_model == 1 */
2579 /* check for unpaired nucleotide 3' to the current stem */
2580 int u3 = ((q < pair_table[0]) && (pair_table[q+1] == 0)) ? 1 : 0;
2581 if(pair_table[p-1] != 0) mm5 = -1;
2585 E_mm5_occupied = MIN2(
2586 E_mm5_occupied + E_ExtLoop(tt, -1, -1, P),
2587 E_mm5_available + E_ExtLoop(tt, mm5, -1, P)
2589 E_mm5_available = E_mm5_occupied;
2592 E_mm5_occupied = MIN2(
2593 E_mm5_occupied + E_ExtLoop(tt, -1, mm3, P),
2594 E_mm5_available + E_ExtLoop(tt, mm5, mm3, P)
2596 E_mm5_available = MIN2(
2597 E_mm5_occupied + E_ExtLoop(tt, -1, -1, P),
2598 E_mm5_available + E_ExtLoop(tt, mm5, -1, P)
2604 /* the beloved case dangle_model == 2 */
2605 case 2: energy += E_ExtLoop(tt, mm5, mm3, P);
2608 /* dangle_model == 3 a.k.a. helix stacking */
2611 } /* end switch dangle_model */
2613 /* seek to the next stem */
2615 while (p <= (int)pair_table[0] && pair_table[p]==0) p++;
2616 if(p == (int)pair_table[0] + 1){
2617 if(dangle_model == 1)
2618 energy = (p > q + 1) ? E_mm5_occupied : E_mm5_available;
2624 /* not exterior loop */
2626 for (count=0; count<2; count++) { /* do it twice */
2627 int ld5 = 0; /* 5' dangle energy on prev pair (type) */
2629 j = (unsigned int)pair_table[0]+1;
2630 type = 0; /* no pair */
2633 j = (unsigned int)pair_table[i];
2634 type = pair[S[j]][S[i]]; if (type==0) type=7;
2636 if (dangle_model==3) { /* prime the ld5 variable */
2637 if (SAME_STRAND(j-1,j)) {
2638 ld5 = P->dangle5[type][S1[j-1]];
2639 if ((p=(unsigned int)pair_table[j-2]) && SAME_STRAND(j-2, j-1))
2640 if (P->dangle3[pair[S[p]][S[j-2]]][S1[j-1]]<ld5) ld5 = 0;
2645 energy = 0; cx_energy=INF;
2646 do { /* walk around the multi-loop */
2647 int tt, new_cx = INF;
2649 /* hop over unpaired positions */
2650 while (p <= (unsigned int)pair_table[0] && pair_table[p]==0) p++;
2652 /* memorize number of unpaired positions */
2654 /* get position of pairing partner */
2655 if ( p == (unsigned int)pair_table[0]+1 ){
2656 q = 0;tt = 0; /* virtual root pair */
2658 q = (unsigned int)pair_table[p];
2659 /* get type of base pair P->q */
2660 tt = pair[S[p]][S[q]]; if (tt==0) tt=7;
2663 energy += mlintern[tt];
2664 cx_energy += mlintern[tt];
2667 int dang5=0, dang3=0, dang;
2668 if ((SAME_STRAND(p-1,p))&&(p>1))
2669 dang5=P->dangle5[tt][S1[p-1]]; /* 5'dangle of pq pair */
2670 if ((SAME_STRAND(i1,i1+1))&&(i1<(unsigned int)S[0]))
2671 dang3 = P->dangle3[type][S1[i1+1]]; /* 3'dangle of previous pair */
2674 case 0: /* adjacent helices */
2675 if (dangle_model==2)
2676 energy += dang3+dang5;
2677 else if (dangle_model==3 && i1!=0) {
2678 if (SAME_STRAND(i1,p)) {
2679 new_cx = energy + P->stack[rtype[type]][rtype[tt]];
2680 /* subtract 5'dangle and TerminalAU penalty */
2681 new_cx += -ld5 - mlintern[tt]-mlintern[type]+2*mlintern[1];
2684 energy = MIN2(energy, cx_energy);
2687 case 1: /* 1 unpaired base between helices */
2688 dang = (dangle_model==2)?(dang3+dang5):MIN2(dang3, dang5);
2689 if (dangle_model==3) {
2690 energy = energy +dang; ld5 = dang - dang3;
2691 /* may be problem here: Suppose
2692 cx_energy>energy, cx_energy+dang5<energy
2693 and the following helices are also stacked (i.e.
2694 we'll subtract the dang5 again */
2695 if (cx_energy+dang5 < energy) {
2696 energy = cx_energy+dang5;
2699 new_cx = INF; /* no coax stacking with mismatch for now */
2703 default: /* many unpaired base between helices */
2704 energy += dang5 +dang3;
2705 if (dangle_model==3) {
2706 energy = MIN2(energy, cx_energy + dang5);
2707 new_cx = INF; /* no coax stacking possible */
2713 if (dangle_model==3) cx_energy = new_cx;
2716 best_energy = MIN2(energy, best_energy); /* don't use cx_energy here */
2717 /* fprintf(stderr, "%6.2d\t", energy); */
2718 if (dangle_model!=3 || is_extloop) break; /* may break cofold with co-ax */
2719 /* skip a helix and start again */
2720 while (pair_table[p]==0) p++;
2721 if (i == (unsigned int)pair_table[p]) break;
2722 i = (unsigned int)pair_table[p];
2724 energy = best_energy;
2725 energy += mlclosing;
2726 /* logarithmic ML loop energy if logML */
2727 if ( (!is_extloop) && logML && (u>6) )
2728 energy += 6*mlbase+(int)(P->lxc*log((double)u/6.));
2731 /* fprintf(stderr, "\n"); */
2736 PUBLIC void initialize_fold(int length){
2740 PUBLIC float energy_of_struct(const char *string, const char *structure){
2741 return energy_of_structure(string, structure, eos_debug);
2744 PUBLIC int energy_of_struct_pt(const char *string, short * ptable, short *s, short *s1){
2745 return energy_of_structure_pt(string, ptable, s, s1, eos_debug);
2748 PUBLIC float energy_of_circ_struct(const char *string, const char *structure){
2749 return energy_of_circ_structure(string, structure, eos_debug);