1 #ifndef __VIENNA_RNA_PACKAGE_LOOP_ENERGIES_H__
2 #define __VIENNA_RNA_PACKAGE_LOOP_ENERGIES_H__
10 #include "fold_vars.h"
11 #include "energy_par.h"
14 # define INLINE inline
20 * \file loop_energies.h
21 * \brief Energy evaluation for MFE and partition function calculations
24 * This file contains functions for the calculation of the free energy \f$\Delta G\f$
25 * of a hairpin- [ E_Hairpin() ] or interior-loop [ E_IntLoop()] .<BR>
26 * The unit of the free energy returned is \f$10^{-2} * \mathrm{kcal}/\mathrm{mol}\f$
29 * In case of computing the partition function, this file also supplies functions
30 * which return the Boltzmann weights \f$e^{-\Delta G/kT} \f$ for a hairpin- [ exp_E_Hairpin() ]
31 * or interior-loop [ exp_E_IntLoop() ].
36 * \def E_MLstem(A,B,C,D)
37 * <H2>Compute the Energy contribution of a Multiloop stem</H2>
38 * This definition is a wrapper for the E_Stem() funtion.
39 * It is substituted by an E_Stem() funtion call with argument
40 * extLoop=0, so the energy contribution returned reflects a
41 * stem introduced in a multiloop.<BR>
42 * As for the parameters B (si1) and C (sj1) of the substituted
43 * E_Stem() function, you can inhibit to take 5'-, 3'-dangles
44 * or mismatch contributions to be taken into account by passing
45 * -1 to these parameters.
48 * \param A The pair type of the stem-closing pair
49 * \param B The 5'-mismatching nucleotide
50 * \param C The 3'-mismatching nucleotide
51 * \param D The datastructure containing scaled energy parameters
52 * \return The energy contribution of the introduced multiloop stem
54 INLINE PRIVATE int E_MLstem( int type,
60 * \def exp_E_MLstem(A,B,C,D)
61 * This is the partition function variant of \ref E_MLstem()
63 * \return The Boltzmann weighted energy contribution of the introduced multiloop stem
65 INLINE PRIVATE double exp_E_MLstem(int type,
71 * \def E_ExtLoop(A,B,C,D)
72 * <H2>Compute the Energy contribution of an Exterior loop stem</H2>
73 * This definition is a wrapper for the E_Stem() funtion.
74 * It is substituted by an E_Stem() funtion call with argument
75 * extLoop=1, so the energy contribution returned reflects a
76 * stem introduced in an exterior-loop.<BR>
77 * As for the parameters B (si1) and C (sj1) of the substituted
78 * E_Stem() function, you can inhibit to take 5'-, 3'-dangles
79 * or mismatch contributions to be taken into account by passing
80 * -1 to these parameters.
83 * \param A The pair type of the stem-closing pair
84 * \param B The 5'-mismatching nucleotide
85 * \param C The 3'-mismatching nucleotide
86 * \param D The datastructure containing scaled energy parameters
87 * \return The energy contribution of the introduced exterior-loop stem
89 INLINE PRIVATE int E_ExtLoop(int type,
95 * \def exp_E_ExtLoop(A,B,C,D)
96 * This is the partition function variant of \ref E_ExtLoop()
98 * \return The Boltzmann weighted energy contribution of the introduced exterior-loop stem
100 INLINE PRIVATE double exp_E_ExtLoop( int type,
106 * <H2>Compute the Energy of an interior-loop</H2>
107 * This function computes the free energy \f$\Delta G\f$ of an interior-loop with the
108 * following structure: <BR>
122 * This general structure depicts an interior-loop that is closed by the base pair (X,Y).
123 * The enclosed base pair is (V,U) which leaves the unpaired bases a_1-a_n and b_1-b_n
124 * that constitute the loop. In this example, the length of the interior-loop is \f$(n+m)\f$
125 * where n or m may be 0 resulting in a bulge-loop or base pair stack.
126 * The mismatching nucleotides for the closing pair (X,Y) are:<BR>
127 * 5'-mismatch: a_1<BR>
128 * 3'-mismatch: b_m<BR>
129 * and for the enclosed base pair (V,U):<BR>
130 * 5'-mismatch: b_1<BR>
131 * 3'-mismatch: a_n<BR>
132 * \note Base pairs are always denoted in 5'->3' direction. Thus the enclosed base pair
133 * must be 'turned arround' when evaluating the free energy of the interior-loop
134 * \see scale_parameters()
136 * \note This function is threadsafe
138 * \param n1 The size of the 'left'-loop (number of unpaired nucleotides)
139 * \param n2 The size of the 'right'-loop (number of unpaired nucleotides)
140 * \param type The pair type of the base pair closing the interior loop
141 * \param type_2 The pair type of the enclosed base pair
142 * \param si1 The 5'-mismatching nucleotide of the closing pair
143 * \param sj1 The 3'-mismatching nucleotide of the closing pair
144 * \param sp1 The 3'-mismatching nucleotide of the enclosed pair
145 * \param sq1 The 5'-mismatching nucleotide of the enclosed pair
146 * \param P The datastructure containing scaled energy parameters
147 * \return The Free energy of the Interior-loop in dcal/mol
149 INLINE PRIVATE int E_IntLoop(int n1,
161 * <H2>Compute the Energy of a hairpin-loop</H2>
162 * To evaluate the free energy of a hairpin-loop, several parameters have to be known.
163 * A general hairpin-loop has this structure:<BR>
172 * where X-Y marks the closing pair [e.g. a <B>(G,C)</B> pair]. The length of this loop is 6 as there are
173 * six unpaired nucleotides (a1-a6) enclosed by (X,Y). The 5' mismatching nucleotide is
174 * a1 while the 3' mismatch is a6. The nucleotide sequence of this loop is "a1.a2.a3.a4.a5.a6" <BR>
175 * \note The parameter sequence should contain the sequence of the loop in capital letters of the nucleic acid
176 * alphabet if the loop size is below 7. This is useful for unusually stable tri-, tetra- and hexa-loops
177 * which are treated differently (based on experimental data) if they are tabulated.
178 * @see scale_parameters()
180 * \warning Not (really) thread safe! A threadsafe implementation will replace this function in a future release!\n
181 * Energy evaluation may change due to updates in global variable "tetra_loop"
183 * \param size The size of the loop (number of unpaired nucleotides)
184 * \param type The pair type of the base pair closing the hairpin
185 * \param si1 The 5'-mismatching nucleotide
186 * \param sj1 The 3'-mismatching nucleotide
187 * \param string The sequence of the loop
188 * \param P The datastructure containing scaled energy parameters
189 * \return The Free energy of the Hairpin-loop in dcal/mol
191 INLINE PRIVATE int E_Hairpin(int size,
199 * <H2>Compute the energy contribution of a stem branching off a loop-region</H2>
200 * This function computes the energy contribution of a stem that branches off
201 * a loop region. This can be the case in multiloops, when a stem branching off
202 * increases the degree of the loop but also <I>immediately interior base pairs</I>
203 * of an exterior loop contribute free energy.
204 * To switch the bahavior of the function according to the evaluation of a multiloop-
205 * or exterior-loop-stem, you pass the flag 'extLoop'.
206 * The returned energy contribution consists of a TerminalAU penalty if the pair type
207 * is greater than 2, dangling end contributions of mismatching nucleotides adjacent to
208 * the stem if only one of the si1, sj1 parameters is greater than 0 and mismatch energies
209 * if both mismatching nucleotides are positive values.
210 * Thus, to avoid incooperating dangling end or mismatch energies just pass a negative number,
211 * e.g. -1 to the mismatch argument.
213 * This is an illustration of how the energy contribution is assembled:
221 * Here, (X,Y) is the base pair that closes the stem that branches off a loop region.
222 * The nucleotides si1 and sj1 are the 5'- and 3'- mismatches, respectively. If the base pair
223 * type of (X,Y) is greater than 2 (i.e. an A-U or G-U pair, the TerminalAU penalty will be
224 * included in the energy contribution returned. If si1 and sj1 are both nonnegative numbers,
225 * mismatch energies will also be included. If one of sij or sj1 is a negtive value, only
226 * 5' or 3' dangling end contributions are taken into account. To prohibit any of these mismatch
227 * contributions to be incoorporated, just pass a negative number to both, si1 and sj1.
228 * In case the argument extLoop is 0, the returned energy contribution also includes
229 * the <I>internal-loop-penalty</I> of a multiloop stem with closing pair type.
233 * \note This function is threadsafe
235 * \param type The pair type of the first base pair un the stem
236 * \param si1 The 5'-mismatching nucleotide
237 * \param sj1 The 3'-mismatching nucleotide
238 * \param extLoop A flag that indicates whether the contribution reflects the one of an exterior loop or not
239 * \param P The datastructure containing scaled energy parameters
240 * \return The Free energy of the branch off the loop in dcal/mol
243 INLINE PRIVATE int E_Stem( int type,
250 * <H2>Compute the Boltzmann weighted energy contribution of a stem branching off a loop-region</H2>
251 * This is the partition function variant of \ref E_Stem()
253 * \note This function is threadsafe
255 * \return The Boltzmann weighted energy contribution of the branch off the loop
257 INLINE PRIVATE double exp_E_Stem(int type,
264 * <H2>Compute Boltzmann weight \f$e^{-\Delta G/kT} \f$ of a hairpin loop</H2>
265 * multiply by scale[u+2]
266 * @see get_scaled_pf_parameters()
269 * \warning Not (really) thread safe! A threadsafe implementation will replace this function in a future release!\n
270 * Energy evaluation may change due to updates in global variable "tetra_loop"
272 * \param u The size of the loop (number of unpaired nucleotides)
273 * \param type The pair type of the base pair closing the hairpin
274 * \param si1 The 5'-mismatching nucleotide
275 * \param sj1 The 3'-mismatching nucleotide
276 * \param string The sequence of the loop
277 * \param P The datastructure containing scaled Boltzmann weights of the energy parameters
278 * \return The Boltzmann weight of the Hairpin-loop
280 INLINE PRIVATE double exp_E_Hairpin( int u,
288 * <H2>Compute Boltzmann weight \f$e^{-\Delta G/kT} \f$ of interior loop</H2>
289 * multiply by scale[u1+u2+2] for scaling
290 * @see get_scaled_pf_parameters()
293 * \note This function is threadsafe
295 * \param u1 The size of the 'left'-loop (number of unpaired nucleotides)
296 * \param u2 The size of the 'right'-loop (number of unpaired nucleotides)
297 * \param type The pair type of the base pair closing the interior loop
298 * \param type2 The pair type of the enclosed base pair
299 * \param si1 The 5'-mismatching nucleotide of the closing pair
300 * \param sj1 The 3'-mismatching nucleotide of the closing pair
301 * \param sp1 The 3'-mismatching nucleotide of the enclosed pair
302 * \param sq1 The 5'-mismatching nucleotide of the enclosed pair
303 * \param P The datastructure containing scaled Boltzmann weights of the energy parameters
304 * \return The Boltzmann weight of the Interior-loop
306 INLINE PRIVATE double exp_E_IntLoop(int u1,
318 #################################
319 # BEGIN OF FUNCTION DEFINITIONS #
320 #################################
322 INLINE PRIVATE int E_Hairpin(int size, int type, int si1, int sj1, const char *string, paramT *P){
325 energy = (size <= 30) ? P->hairpin[size] : P->hairpin[30]+(int)(P->lxc*log((size)/30.));
326 if (P->model_details.special_hp){
327 if (size == 4) { /* check for tetraloop bonus */
329 strncpy(tl, string, 6);
330 if ((ts=strstr(P->Tetraloops, tl)))
331 return (P->Tetraloop_E[(ts - P->Tetraloops)/7]);
333 else if (size == 6) {
335 strncpy(tl, string, 8);
336 if ((ts=strstr(P->Hexaloops, tl)))
337 return (energy = P->Hexaloop_E[(ts - P->Hexaloops)/9]);
339 else if (size == 3) {
340 char tl[6]={0,0,0,0,0,0}, *ts;
341 strncpy(tl, string, 5);
342 if ((ts=strstr(P->Triloops, tl))) {
343 return (P->Triloop_E[(ts - P->Triloops)/6]);
345 return (energy + (type>2 ? P->TerminalAU : 0));
348 energy += P->mismatchH[type][si1][sj1];
353 INLINE PRIVATE int E_IntLoop(int n1, int n2, int type, int type_2, int si1, int sj1, int sp1, int sq1, paramT *P){
354 /* compute energy of degree 2 loop (stack bulge or interior) */
358 if (n1>n2) { nl=n1; ns=n2;}
362 return P->stack[type][type_2]; /* stack */
364 if (ns==0) { /* bulge */
365 energy = (nl<=MAXLOOP)?P->bulge[nl]:
366 (P->bulge[30]+(int)(P->lxc*log(nl/30.)));
367 if (nl==1) energy += P->stack[type][type_2];
369 if (type>2) energy += P->TerminalAU;
370 if (type_2>2) energy += P->TerminalAU;
374 else { /* interior loop */
376 if (nl==1) /* 1x1 loop */
377 return P->int11[type][type_2][si1][sj1];
378 if (nl==2) { /* 2x1 loop */
380 energy = P->int21[type][type_2][si1][sq1][sj1];
382 energy = P->int21[type_2][type][sq1][si1][sp1];
385 else { /* 1xn loop */
386 energy = (nl+1<=MAXLOOP)?(P->internal_loop[nl+1]) : (P->internal_loop[30]+(int)(P->lxc*log((nl+1)/30.)));
387 energy += MIN2(MAX_NINIO, (nl-ns)*P->ninio[2]);
388 energy += P->mismatch1nI[type][si1][sj1] + P->mismatch1nI[type_2][sq1][sp1];
393 if(nl==2) { /* 2x2 loop */
394 return P->int22[type][type_2][si1][sp1][sq1][sj1];}
395 else if (nl==3){ /* 2x3 loop */
396 energy = P->internal_loop[5]+P->ninio[2];
397 energy += P->mismatch23I[type][si1][sj1] + P->mismatch23I[type_2][sq1][sp1];
402 { /* generic interior loop (no else here!)*/
403 energy = (n1+n2<=MAXLOOP)?(P->internal_loop[n1+n2]) : (P->internal_loop[30]+(int)(P->lxc*log((n1+n2)/30.)));
405 energy += MIN2(MAX_NINIO, (nl-ns)*P->ninio[2]);
407 energy += P->mismatchI[type][si1][sj1] + P->mismatchI[type_2][sq1][sp1];
413 INLINE PRIVATE int E_Stem(int type, int si1, int sj1, int extLoop, paramT *P){
415 int d5 = (si1 >= 0) ? P->dangle5[type][si1] : 0;
416 int d3 = (sj1 >= 0) ? P->dangle3[type][sj1] : 0;
419 energy += P->TerminalAU;
421 if(si1 >= 0 && sj1 >= 0)
422 energy += (extLoop) ? P->mismatchExt[type][si1][sj1] : P->mismatchM[type][si1][sj1];
426 if(!extLoop) energy += P->MLintern[type];
430 INLINE PRIVATE int E_ExtLoop(int type, int si1, int sj1, paramT *P){
432 if(si1 >= 0 && sj1 >= 0){
433 energy += P->mismatchExt[type][si1][sj1];
436 energy += P->dangle5[type][si1];
439 energy += P->dangle3[type][sj1];
443 energy += P->TerminalAU;
448 INLINE PRIVATE int E_MLstem(int type, int si1, int sj1, paramT *P){
450 if(si1 >= 0 && sj1 >= 0){
451 energy += P->mismatchM[type][si1][sj1];
454 energy += P->dangle5[type][si1];
457 energy += P->dangle3[type][sj1];
461 energy += P->TerminalAU;
463 energy += P->MLintern[type];
468 INLINE PRIVATE double exp_E_Hairpin(int u, int type, short si1, short sj1, const char *string, pf_paramT *P){
470 kT = P->kT; /* kT in cal/mol */
473 q = P->exphairpin[u];
475 q = P->exphairpin[30] * exp( -(P->lxc*log( u/30.))*10./kT);
477 if(u < 3) return q; /* should only be the case when folding alignments */
479 if ((P->model_details.special_hp)&&(u==4)) {
480 char tl[7]={0,0,0,0,0,0,0}, *ts;
481 strncpy(tl, string, 6);
482 if ((ts=strstr(P->Tetraloops, tl))){
484 return (P->exptetra[(ts-P->Tetraloops)/7]);
486 q *= P->exptetra[(ts-P->Tetraloops)/7];
489 if ((tetra_loop)&&(u==6)) {
490 char tl[9]={0,0,0,0,0,0,0,0,0}, *ts;
491 strncpy(tl, string, 6);
492 if ((ts=strstr(P->Hexaloops, tl)))
493 return (P->exphex[(ts-P->Hexaloops)/9]);
496 char tl[6]={0,0,0,0,0,0}, *ts;
497 strncpy(tl, string, 5);
498 if ((ts=strstr(P->Triloops, tl)))
499 return (P->exptri[(ts-P->Triloops)/6]);
503 else /* no mismatches for tri-loops */
504 q *= P->expmismatchH[type][si1][sj1];
509 INLINE PRIVATE double exp_E_IntLoop(int u1, int u2, int type, int type2, short si1, short sj1, short sp1, short sq1, pf_paramT *P){
510 int ul, us, no_close = 0;
513 if ((no_closingGU) && ((type2==3)||(type2==4)||(type==3)||(type==4)))
516 if (u1>u2) { ul=u1; us=u2;}
519 if (ul==0) /* stack */
520 z = P->expstack[type][type2];
522 if (us==0) { /* bulge */
524 if (ul==1) z *= P->expstack[type][type2];
526 if (type>2) z *= P->expTermAU;
527 if (type2>2) z *= P->expTermAU;
532 if (ul==1){ /* 1x1 loop */
533 return P->expint11[type][type2][si1][sj1];
535 if (ul==2) { /* 2x1 loop */
537 return P->expint21[type][type2][si1][sq1][sj1];
539 return P->expint21[type2][type][sq1][si1][sp1];
541 else { /* 1xn loop */
542 z = P->expinternal[ul+us] * P->expmismatch1nI[type][si1][sj1] * P->expmismatch1nI[type2][sq1][sp1];
543 return z * P->expninio[2][ul-us];
547 if(ul==2) /* 2x2 loop */
548 return P->expint22[type][type2][si1][sp1][sq1][sj1];
549 else if(ul==3){ /* 2x3 loop */
550 z = P->expinternal[5]*P->expmismatch23I[type][si1][sj1]*P->expmismatch23I[type2][sq1][sp1];
551 return z * P->expninio[2][1];
554 /* generic interior loop (no else here!)*/
555 z = P->expinternal[ul+us] * P->expmismatchI[type][si1][sj1] * P->expmismatchI[type2][sq1][sp1];
556 return z * P->expninio[2][ul-us];
562 INLINE PRIVATE double exp_E_Stem(int type, int si1, int sj1, int extLoop, pf_paramT *P){
564 double d5 = (si1 >= 0) ? P->expdangle5[type][si1] : 1.;
565 double d3 = (sj1 >= 0) ? P->expdangle3[type][sj1] : 1.;
568 energy *= P->expTermAU;
570 if(si1 >= 0 && sj1 >= 0)
571 energy *= (extLoop) ? P->expmismatchExt[type][si1][sj1] : P->expmismatchM[type][si1][sj1];
575 if(!extLoop) energy *= P->expMLintern[type];
579 INLINE PRIVATE double exp_E_MLstem(int type, int si1, int sj1, pf_paramT *P){
581 if(si1 >= 0 && sj1 >= 0){
582 energy *= P->expmismatchM[type][si1][sj1];
585 energy *= P->expdangle5[type][si1];
588 energy *= P->expdangle3[type][sj1];
592 energy *= P->expTermAU;
594 energy *= P->expMLintern[type];
598 INLINE PRIVATE double exp_E_ExtLoop(int type, int si1, int sj1, pf_paramT *P){
600 if(si1 >= 0 && sj1 >= 0){
601 energy *= P->expmismatchExt[type][si1][sj1];
604 energy *= P->expdangle5[type][si1];
607 energy *= P->expdangle3[type][sj1];
611 energy *= P->expTermAU;
616 INLINE PRIVATE int E_IntLoop_Co(int type, int type_2, int i, int j, int p, int q, int cutpoint, short si1, short sj1, short sp1, short sq1, int dangles, paramT *P){
618 if(type > 2) energy += P->TerminalAU;
619 if(type_2 > 2) energy += P->TerminalAU;
621 if(!dangles) return energy;
623 int ci = (i>=cutpoint)||((i+1)<cutpoint);
624 int cj = ((j-1)>=cutpoint)||(j<cutpoint);
625 int cp = ((p-1)>=cutpoint)||(p<cutpoint);
626 int cq = (q>=cutpoint)||((q+1)<cutpoint);
628 int d3 = ci ? P->dangle3[type][si1] : 0;
629 int d5 = cj ? P->dangle5[type][sj1] : 0;
630 int d5_2 = cp ? P->dangle5[type_2][sp1] : 0;
631 int d3_2 = cq ? P->dangle3[type_2][sq1] : 0;
633 int tmm = (cj && ci) ? P->mismatchExt[type][sj1][si1] : d5 + d3;
634 int tmm_2 = (cp && cq) ? P->mismatchExt[type_2][sp1][sq1] : d5_2 + d3_2;
636 if(dangles == 2) return energy + tmm + tmm_2;
638 /* now we may have non-double dangles only */
640 if(q+2 < j){ energy += tmm + tmm_2;}
641 else if(q+2 == j){ energy += (cj && cq) ? MIN2(tmm + d5_2, tmm_2 + d3) : tmm + tmm_2;}
642 else energy += d3 + d5_2;
645 if(q+2 < j){ energy += (ci && cp) ? MIN2(tmm + d3_2, tmm_2 + d5) : tmm + tmm_2;}
647 energy += MIN2(tmm, MIN2(tmm_2, MIN2(d5 + d5_2, d3 + d3_2)));
649 else energy += MIN2(d3, d5_2);
652 if(q+2 < j){ energy += d5 + d3_2;}
653 else if(q+2 == j){ energy += MIN2(d5, d3_2);}