1 #ifndef __VIENNA_RNA_PACKAGE_ALIFOLD_H__
2 #define __VIENNA_RNA_PACKAGE_ALIFOLD_H__
4 #include "data_structures.h"
7 * \addtogroup consensus_fold
8 * \brief compute various properties (consensus MFE structures, partition function,
9 * Boltzmann distributed stochastic samples, ...) for RNA sequence alignments
11 * Consensus structures can be predicted by a modified version of the
12 * fold() algorithm that takes a set of aligned sequences instead
13 * of a single sequence. The energy function consists of the mean energy
14 * averaged over the sequences, plus a covariance term that favors pairs
15 * with consistent and compensatory mutations and penalizes pairs that
16 * cannot be formed by all structures. For details see \cite hofacker:2002 and
17 * \cite bernhart:2008.
20 * \brief compute various properties (consensus MFE structures, partition function, Boltzmann
21 * distributed stochastic samples, ...) for RNA sequence alignments
28 * \addtogroup consensus_mfe_fold
29 * \ingroup consensus_fold
36 * \brief This variable controls the weight of the covariance term in the
37 * energy function of alignment folding algorithms
39 * \ingroup consensus_fold
43 extern double cv_fact;
45 * \brief This variable controls the magnitude of the penalty for non-compatible sequences in
46 * the covariance term of alignment folding algorithms.
48 * \ingroup consensus_fold
52 extern double nc_fact;
55 ##############################################
56 # MFE VARIANTS OF THE ALIFOLD IMPLEMENTATION #
57 ##############################################
61 * \brief Update the energy parameters for alifold function
63 * Call this to recalculate the pair matrix and energy parameters after a
64 * change in folding parameters like #temperature
66 void update_alifold_params(void);
70 * \brief Compute MFE and according consensus structure of an alignment of sequences
72 * This function predicts the consensus structure for the aligned 'sequences'
73 * and returns the minimum free energy; the mfe structure in bracket
74 * notation is returned in 'structure'.
76 * Sufficient space must be allocated for 'structure' before calling
79 * \ingroup consensus_mfe_fold
81 * \param strings A pointer to a NULL terminated array of character arrays
82 * \param structure A pointer to a character array that may contain a constraining consensus structure
83 * (will be overwritten by a consensus structure that exhibits the MFE)
84 * \return The free energy score in kcal/mol
86 float alifold( const char **strings,
91 * \brief Compute MFE and according structure of an alignment of sequences assuming the sequences are circular instead of linear
93 * \ingroup consensus_mfe_fold
95 * \param strings A pointer to a NULL terminated array of character arrays
96 * \param structure A pointer to a character array that may contain a constraining consensus structure
97 * (will be overwritten by a consensus structure that exhibits the MFE)
98 * \return The free energy score in kcal/mol
100 float circalifold( const char **strings,
104 * \brief Free the memory occupied by MFE alifold functions
106 * \ingroup consensus_mfe_fold
109 void free_alifold_arrays(void);
112 * \brief Get the mean pairwise identity in steps from ?to?(ident)
114 * \ingroup consensus_fold
117 * \param n_seq The number of sequences in the alignment
118 * \param length The length of the alignment
120 * \return The mean pairwise identity
122 int get_mpi(char *Alseq[],
128 * \brief Read a ribosum or other user-defined scoring matrix
130 * \ingroup consensus_fold
133 float **readribosum(char *name);
136 * \brief Calculate the free energy of a consensus structure given a set of aligned sequences
138 * \ingroup consensus_fold
140 * \param sequences The NULL terminated array of sequences
141 * \param structure The consensus structure
142 * \param n_seq The number of sequences in the alignment
143 * \param energy A pointer to an array of at least two floats that will hold the free energies
144 * (energy[0] will contain the free energy, energy[1] will be filled with the covariance energy term)
145 * \returns free energy in kcal/mol
148 float energy_of_alistruct(const char **sequences,
149 const char *structure,
153 float energy_of_ali_gquad_structure(const char **sequences,
154 const char *structure,
159 #############################################################
160 # some helper functions that might be useful in the library #
161 #############################################################
165 * \brief Get arrays with encoded sequence of the alignment
167 * this function assumes that in S, S5, s3, ss and as enough
168 * space is already allocated (size must be at least sequence length+2)
170 * \ingroup consensus_fold
172 * \param sequence The gapped sequence from the alignment
173 * \param S pointer to an array that holds encoded sequence
174 * \param s5 pointer to an array that holds the next base 5' of alignment position i
175 * \param s3 pointer to an array that holds the next base 3' of alignment position i
178 * \param circ assume the molecules to be circular instead of linear (circ=0)
180 void encode_ali_sequence( const char *sequence,
189 * \brief Allocate memory for sequence array used to deal with aligned sequences
191 * Note that these arrays will also be initialized according to the sequence alignment given
193 * \ingroup consensus_fold
195 * \see free_sequence_arrays()
197 * \param sequences The aligned sequences
198 * \param S A pointer to the array of encoded sequences
199 * \param S5 A pointer to the array that contains the next 5' nucleotide of a sequence position
200 * \param S3 A pointer to the array that contains the next 3' nucleotide of a sequence position
201 * \param a2s A pointer to the array that contains the alignment to sequence position mapping
202 * \param Ss A pointer to the array that contains the ungapped sequence
203 * \param circ assume the molecules to be circular instead of linear (circ=0)
205 void alloc_sequence_arrays(const char **sequences,
209 unsigned short ***a2s,
214 * \brief Free the memory of the sequence arrays used to deal with aligned sequences
216 * This function frees the memory previously allocated with alloc_sequence_arrays()
218 * \ingroup consensus_fold
220 * \see alloc_sequence_arrays()
222 * \param n_seq The number of aligned sequences
223 * \param S A pointer to the array of encoded sequences
224 * \param S5 A pointer to the array that contains the next 5' nucleotide of a sequence position
225 * \param S3 A pointer to the array that contains the next 3' nucleotide of a sequence position
226 * \param a2s A pointer to the array that contains the alignment to sequence position mapping
227 * \param Ss A pointer to the array that contains the ungapped sequence
229 void free_sequence_arrays( unsigned int n_seq,
233 unsigned short ***a2s,
237 #############################################################
238 # PARTITION FUNCTION VARIANTS OF THE ALIFOLD IMPLEMENTATION #
239 #############################################################
244 * \addtogroup consensus_pf_fold
245 * \ingroup consensus_fold
254 * \ingroup consensus_pf_fold
260 * \param calculate_bppm
261 * \param is_constrained
265 float alipf_fold_par( const char **sequences,
268 pf_paramT *parameters,
276 * The partition function version of alifold() works in analogy to
277 * pf_fold(). Pair probabilities and information about sequence
278 * covariations are returned via the 'pi' variable as a list of
279 * #pair_info structs. The list is terminated by the first entry with
282 * \ingroup consensus_pf_fold
289 float alipf_fold( const char **sequences,
296 * \ingroup consensus_pf_fold
303 float alipf_circ_fold(const char **sequences,
309 * \brief Get a pointer to the base pair probability array
311 * Accessing the base pair probabilities for a pair (i,j) is achieved by
312 * \verbatim FLT_OR_DBL *pr = export_bppm(); pr_ij = pr[iindx[i]-j]; \endverbatim
314 * \ingroup consensus_pf_fold
317 * \return A pointer to the base pair probability array
319 FLT_OR_DBL *export_ali_bppm(void);
324 * \ingroup consensus_pf_fold
327 void free_alipf_arrays(void);
330 * \addtogroup consensus_stochbt
337 * \brief Sample a consensus secondary structure from the Boltzmann ensemble according its probability\n
339 * \ingroup consensus_stochbt
341 * \param prob to be described (berni)
342 * \return A sampled consensus secondary structure in dot-bracket notation
344 char *alipbacktrack(double *prob);
348 * \brief Get pointers to (almost) all relavant arrays used in alifold's partition function computation
350 * \ingroup consensus_fold
352 * \note To obtain meaningful pointers, call alipf_fold first!
354 * \see pf_alifold(), alipf_circ_fold()
356 * \param S_p A pointer to the 'S' array (integer representation of nucleotides)
357 * \param S5_p A pointer to the 'S5' array
358 * \param S3_p A pointer to the 'S3' array
359 * \param a2s_p A pointer to the pair type matrix
360 * \param Ss_p A pointer to the 'Ss' array
361 * \param qb_p A pointer to the Q<sup>B</sup> matrix
362 * \param qm_p A pointer to the Q<sup>M</sup> matrix
363 * \param q1k_p A pointer to the 5' slice of the Q matrix (\f$q1k(k) = Q(1, k)\f$)
364 * \param qln_p A pointer to the 3' slice of the Q matrix (\f$qln(l) = Q(l, n)\f$)
365 * \return Non Zero if everything went fine, 0 otherwise
367 int get_alipf_arrays(short ***S_p,
370 unsigned short ***a2s_p,