1 #ifndef __VIENNA_RNA_PACKAGE_FOLD_H__
2 #define __VIENNA_RNA_PACKAGE_FOLD_H__
4 #include "data_structures.h"
7 #define DEPRECATED(func) func __attribute__ ((deprecated))
9 #define DEPRECATED(func) func
13 * \addtogroup mfe_fold
14 * \ingroup folding_routines
15 * \brief This section covers all functions and variables related to the calculation
16 * of minimum free energy (MFE) structures.
18 * The library provides a fast dynamic programming minimum free energy
19 * folding algorithm as described in \cite zuker:1981.
20 * All relevant parts that directly implement the "Zuker & Stiegler" algorithm for single
21 * sequences are described in this section.
23 * Folding of circular RNA sequences is handled as a post-processing step of the forward
24 * recursions. See \cite hofacker:2006 for further details.
26 * Nevertheless, the RNAlib also
27 * provides interfaces for the prediction of consensus MFE structures of sequence alignments,
28 * MFE structure for two hybridized sequences, local optimal structures and many more. For
29 * those more specialized variants of MFE folding routines, please consult the appropriate
30 * subsections (Modules) as listed above.
33 * \brief MFE calculations and energy evaluations for single RNA sequences
35 * This file includes (almost) all function declarations within the RNAlib that are related to
40 * \defgroup eval Energy evaluation
42 * \brief This module contains all functions and variables related to energy evaluation
43 * of sequence/structure pairs.
50 * \defgroup mfe_fold Calculating Minimum Free Energy Structures
52 * \brief This module contains all functions and variables related to the calculation
53 * of global minimum free energy structures for single sequences.
55 * The library provides a fast dynamic programming minimum free energy
56 * folding algorithm as described by \ref zuker_81 "Zuker & Stiegler (1981)".
60 /** \brief if nonzero use logarithmic ML energy in energy_of_struct */
63 /** \brief do ML decomposition uniquely (for subopt) */
66 /** \brief set to first pos of second seq for cofolding */
70 * \brief verbose info from energy_of_struct
77 * \brief Compute minimum free energy and an appropriate secondary
78 * structure of an RNA sequence
80 * The first parameter given, the RNA sequence, must be \a uppercase and should only contain
81 * an alphabet \f$\Sigma\f$ that is understood by the RNAlib\n
82 * (e.g. \f$ \Sigma = \{A,U,C,G\} \f$)\n
84 * The second parameter, \a structure, must always point to an allocated
85 * block of memory with a size of at least \f$\mathrm{strlen}(\mathrm{sequence})+1\f$
87 * If the third parameter is NULL, global model detail settings are assumed for the folding
88 * recursions. Otherwise, the provided parameters are used.
90 * The fourth parameter indicates whether a secondary structure constraint in enhanced dot-bracket
91 * notation is passed through the structure parameter or not. If so, the characters " | x < > " are
92 * recognized to mark bases that are paired, unpaired, paired upstream, or downstream, respectively.
93 * Matching brackets " ( ) " denote base pairs, dots "." are used for unconstrained bases.
95 * To indicate that the RNA sequence is circular and thus has to be post-processed, set the last
96 * parameter to non-zero
98 * After a successful call of fold_par(), a backtracked secondary structure (in dot-bracket notation)
99 * that exhibits the minimum of free energy will be written to the memory \a structure is pointing to.
100 * The function returns the minimum of free energy for any fold of the sequence given.
102 * \note OpenMP: Passing NULL to the 'parameters' argument involves access to several global model
103 * detail variables and thus is not to be considered threadsafe
107 * \see fold(), circfold(), #model_detailsT, set_energy_model(), get_scaled_parameters()
109 * \param sequence RNA sequence
110 * \param structure A pointer to the character array where the
111 * secondary structure in dot-bracket notation will be written to
112 * \param parameters A data structure containing the prescaled energy contributions
113 * and the model details. (NULL may be passed, see OpenMP notes above)
114 * \param is_constrained Switch to indicate that a structure contraint is passed via the structure argument (0==off)
115 * \param is_circular Switch to (de-)activate postprocessing steps in case RNA sequence is circular (0==off)
117 * \return the minimum free energy (MFE) in kcal/mol
119 float fold_par( const char *sequence,
126 * \brief Compute minimum free energy and an appropriate secondary structure of an RNA sequence
128 * This function essentially does the same thing as fold_par(). However, it takes its model details,
129 * i.e. #temperature, #dangles, #tetra_loop, #noGU, #no_closingGU, #fold_constrained, #noLonelyPairs
130 * from the current global settings within the library
132 * Use fold_par() for a completely threadsafe variant
136 * \see fold_par(), circfold()
138 * \param sequence RNA sequence
139 * \param structure A pointer to the character array where the
140 * secondary structure in dot-bracket notation will be written to
141 * \return the minimum free energy (MFE) in kcal/mol
143 float fold( const char *sequence,
147 * \brief Compute minimum free energy and an appropriate secondary structure of a circular RNA sequence
149 * This function essentially does the same thing as fold_par(). However, it takes its model details,
150 * i.e. #temperature, #dangles, #tetra_loop, #noGU, #no_closingGU, #fold_constrained, #noLonelyPairs
151 * from the current global settings within the library
153 * Use fold_par() for a completely threadsafe variant
157 * \see fold_par(), circfold()
159 * \param sequence RNA sequence
160 * \param structure A pointer to the character array where the
161 * secondary structure in dot-bracket notation will be written to
162 * \return the minimum free energy (MFE) in kcal/mol
164 float circfold( const char *sequence,
169 * \addtogroup eval Energy evaluation
170 * \ingroup folding_routines
172 * \brief This module contains all functions and variables related to energy evaluation
173 * of sequence/structure pairs.
178 * \brief Calculate the free energy of an already folded RNA using global model detail settings
180 * If verbosity level is set to a value >0, energies of structure elements are printed to stdout
182 * \note OpenMP: This function relies on several global model settings variables and thus is
183 * not to be considered threadsafe. See energy_of_struct_par() for a completely threadsafe
188 * \see energy_of_struct_par(), energy_of_circ_structure()
190 * \param string RNA sequence
191 * \param structure secondary structure in dot-bracket notation
192 * \param verbosity_level a flag to turn verbose output on/off
193 * \return the free energy of the input structure given the input sequence in kcal/mol
195 float energy_of_structure(const char *string,
196 const char *structure,
197 int verbosity_level);
200 * \brief Calculate the free energy of an already folded RNA
202 * If verbosity level is set to a value >0, energies of structure elements are printed to stdout
206 * \see energy_of_circ_structure(), energy_of_structure_pt(), get_scaled_parameters()
208 * \param string RNA sequence in uppercase letters
209 * \param structure Secondary structure in dot-bracket notation
210 * \param parameters A data structure containing the prescaled energy contributions and the model details.
211 * \param verbosity_level A flag to turn verbose output on/off
212 * \return The free energy of the input structure given the input sequence in kcal/mol
214 float energy_of_struct_par( const char *string,
215 const char *structure,
217 int verbosity_level);
220 * \brief Calculate the free energy of an already folded circular RNA
222 * \note OpenMP: This function relies on several global model settings variables and thus is
223 * not to be considered threadsafe. See energy_of_circ_struct_par() for a completely threadsafe
226 * If verbosity level is set to a value >0, energies of structure elements are printed to stdout
230 * \see energy_of_circ_struct_par(), energy_of_struct_par()
232 * \param string RNA sequence
233 * \param structure Secondary structure in dot-bracket notation
234 * \param verbosity_level A flag to turn verbose output on/off
235 * \return The free energy of the input structure given the input sequence in kcal/mol
237 float energy_of_circ_structure( const char *string,
238 const char *structure,
239 int verbosity_level);
242 * \brief Calculate the free energy of an already folded circular RNA
244 * If verbosity level is set to a value >0, energies of structure elements are printed to stdout
248 * \see energy_of_struct_par(), get_scaled_parameters()
250 * \param string RNA sequence
251 * \param structure Secondary structure in dot-bracket notation
252 * \param parameters A data structure containing the prescaled energy contributions and the model details.
253 * \param verbosity_level A flag to turn verbose output on/off
254 * \return The free energy of the input structure given the input sequence in kcal/mol
256 float energy_of_circ_struct_par(const char *string,
257 const char *structure,
259 int verbosity_level);
262 float energy_of_gquad_structure(const char *string,
263 const char *structure,
264 int verbosity_level);
267 * \brief Calculate the free energy of an already folded RNA
269 * If verbosity level is set to a value >0, energies of structure elements are printed to stdout
271 * \note OpenMP: This function relies on several global model settings variables and thus is
272 * not to be considered threadsafe. See energy_of_struct_pt_par() for a completely threadsafe
277 * \see make_pair_table(), energy_of_struct_pt_par()
279 * \param string RNA sequence
280 * \param ptable the pair table of the secondary structure
281 * \param s encoded RNA sequence
282 * \param s1 encoded RNA sequence
283 * \param verbosity_level a flag to turn verbose output on/off
284 * \return the free energy of the input structure given the input sequence in 10kcal/mol
286 int energy_of_structure_pt( const char *string,
290 int verbosity_level);
293 * \brief Calculate the free energy of an already folded RNA
295 * If verbosity level is set to a value >0, energies of structure elements are printed to stdout
299 * \see make_pair_table(), energy_of_struct_par(), get_scaled_parameters()
301 * \param string RNA sequence in uppercase letters
302 * \param ptable The pair table of the secondary structure
303 * \param s Encoded RNA sequence
304 * \param s1 Encoded RNA sequence
305 * \param parameters A data structure containing the prescaled energy contributions and the model details.
306 * \param verbosity_level A flag to turn verbose output on/off
307 * \return The free energy of the input structure given the input sequence in 10kcal/mol
309 int energy_of_struct_pt_par(const char *string,
314 int verbosity_level);
317 * \brief Free arrays for mfe folding
322 void free_arrays(void);
326 * \brief Create a dot-backet/parenthesis structure from backtracking stack
328 * \note This function is threadsafe
330 void parenthesis_structure(char *structure,
335 * \brief Create a dot-backet/parenthesis structure from backtracking stack
336 * obtained by zuker suboptimal calculation in cofold.c
338 * \note This function is threadsafe
340 void parenthesis_zuker( char *structure,
344 void letter_structure(char *structure,
350 * \brief Recalculate energy parameters
354 void update_fold_params(void);
361 void update_fold_params_par(paramT *parameters);
368 char *backtrack_fold_from_pair(char *sequence,
373 * \brief Calculate energy of a move (closing or opening of a base pair)
375 * If the parameters m1 and m2 are negative, it is deletion (opening)
376 * of a base pair, otherwise it is insertion (opening).
378 * \see make_pair_table(), energy_of_move()
379 * \param string RNA sequence
380 * \param structure secondary structure in dot-bracket notation
381 * \param m1 first coordinate of base pair
382 * \param m2 second coordinate of base pair
383 * \returns energy change of the move in kcal/mol
385 float energy_of_move( const char *string,
386 const char *structure,
393 * \brief Calculate energy of a move (closing or opening of a base pair)
395 * If the parameters m1 and m2 are negative, it is deletion (opening)
396 * of a base pair, otherwise it is insertion (opening).
398 * \see make_pair_table(), energy_of_move()
399 * \param pt the pair table of the secondary structure
400 * \param s encoded RNA sequence
401 * \param s1 encoded RNA sequence
402 * \param m1 first coordinate of base pair
403 * \param m2 second coordinate of base pair
404 * \returns energy change of the move in 10cal/mol
406 int energy_of_move_pt(short *pt,
413 * \brief Calculate energy of a loop
415 * \param ptable the pair table of the secondary structure
416 * \param s encoded RNA sequence
417 * \param s1 encoded RNA sequence
418 * \param i position of covering base pair
419 * \returns free energy of the loop in 10cal/mol
421 int loop_energy(short *ptable,
431 void export_fold_arrays(int **f5_p,
443 void export_fold_arrays_par(int **f5_p,
456 void export_circfold_arrays(int *Fc_p,
473 void export_circfold_arrays_par(int *Fc_p,
488 * \brief Create a plist from a dot-bracket string
490 * The dot-bracket string is parsed and for each base pair an
491 * entry in the plist is created. The probability of each pair in
492 * the list is set by a function parameter.
494 * The end of the plist is marked by sequence positions i as well as j
495 * equal to 0. This condition should be used to stop looping over its
498 * This function is threadsafe
500 * \param pl A pointer to the plist that is to be created
501 * \param struc The secondary structure in dot-bracket notation
502 * \param pr The probability for each base pair
504 void assign_plist_from_db(plist **pl,
508 /* finally moved the loop energy function declarations to this header... */
509 /* BUT: The functions only exist for backward compatibility reasons! */
510 /* You better include "loop_energies.h" and call the functions: */
511 /* E_Hairpin() and E_IntLoop() which are (almost) threadsafe as they get */
512 /* a pointer to the energy parameter datastructure as additional argument */
515 * \deprecated {This function is deprecated and will be removed soon.
516 * Use \ref E_IntLoop() instead!}
518 DEPRECATED(int LoopEnergy(int n1,
528 * \deprecated {This function is deprecated and will be removed soon.
529 * Use \ref E_Hairpin() instead!}
531 DEPRECATED(int HairpinE(int size,
535 const char *string));
538 * Allocate arrays for folding\n
539 * \deprecated {This function is deprecated and will be removed soon!}
542 DEPRECATED(void initialize_fold(int length));
545 * Calculate the free energy of an already folded RNA
547 * \note This function is not entirely threadsafe! Depending on the state of the global
548 * variable \ref eos_debug it prints energy information to stdout or not...\n
550 * \deprecated This function is deprecated and should not be used in future programs!
551 * Use \ref energy_of_structure() instead!
553 * \see energy_of_structure, energy_of_circ_struct(), energy_of_struct_pt()
554 * \param string RNA sequence
555 * \param structure secondary structure in dot-bracket notation
556 * \return the free energy of the input structure given the input sequence in kcal/mol
558 DEPRECATED(float energy_of_struct(const char *string,
559 const char *structure));
562 * Calculate the free energy of an already folded RNA
564 * \note This function is not entirely threadsafe! Depending on the state of the global
565 * variable \ref eos_debug it prints energy information to stdout or not...\n
567 * \deprecated This function is deprecated and should not be used in future programs!
568 * Use \ref energy_of_structure_pt() instead!
570 * \see make_pair_table(), energy_of_structure()
571 * \param string RNA sequence
572 * \param ptable the pair table of the secondary structure
573 * \param s encoded RNA sequence
574 * \param s1 encoded RNA sequence
575 * \return the free energy of the input structure given the input sequence in 10kcal/mol
577 DEPRECATED(int energy_of_struct_pt( const char *string,
583 * Calculate the free energy of an already folded circular RNA
585 * \note This function is not entirely threadsafe! Depending on the state of the global
586 * variable \ref eos_debug it prints energy information to stdout or not...\n
588 * \deprecated This function is deprecated and should not be used in future programs
589 * Use \ref energy_of_circ_structure() instead!
591 * \see energy_of_circ_structure(), energy_of_struct(), energy_of_struct_pt()
592 * \param string RNA sequence
593 * \param structure secondary structure in dot-bracket notation
594 * \return the free energy of the input structure given the input sequence in kcal/mol
596 DEPRECATED(float energy_of_circ_struct( const char *string,
597 const char *structure));