#ifndef __VIENNA_RNA_PACKAGE_ALIFOLD_H__
#define __VIENNA_RNA_PACKAGE_ALIFOLD_H__
#include "data_structures.h"
/**
* \addtogroup consensus_fold
* \brief compute various properties (consensus MFE structures, partition function,
* Boltzmann distributed stochastic samples, ...) for RNA sequence alignments
*
* Consensus structures can be predicted by a modified version of the
* fold() algorithm that takes a set of aligned sequences instead
* of a single sequence. The energy function consists of the mean energy
* averaged over the sequences, plus a covariance term that favors pairs
* with consistent and compensatory mutations and penalizes pairs that
* cannot be formed by all structures. For details see \cite hofacker:2002 and
* \cite bernhart:2008.
* @{
* \file alifold.h
* \brief compute various properties (consensus MFE structures, partition function, Boltzmann
* distributed stochastic samples, ...) for RNA sequence alignments
*
* @}
*/
/**
* \addtogroup consensus_mfe_fold
* \ingroup consensus_fold
* @{
*
* @}
*/
/**
* \brief This variable controls the weight of the covariance term in the
* energy function of alignment folding algorithms
*
* \ingroup consensus_fold
*
* Default is 1.
*/
extern double cv_fact;
/**
* \brief This variable controls the magnitude of the penalty for non-compatible sequences in
* the covariance term of alignment folding algorithms.
*
* \ingroup consensus_fold
*
* Default is 1.
*/
extern double nc_fact;
/*
##############################################
# MFE VARIANTS OF THE ALIFOLD IMPLEMENTATION #
##############################################
*/
/**
* \brief Update the energy parameters for alifold function
*
* Call this to recalculate the pair matrix and energy parameters after a
* change in folding parameters like #temperature
*/
void update_alifold_params(void);
/**
* \brief Compute MFE and according consensus structure of an alignment of sequences
*
* This function predicts the consensus structure for the aligned 'sequences'
* and returns the minimum free energy; the mfe structure in bracket
* notation is returned in 'structure'.
*
* Sufficient space must be allocated for 'structure' before calling
* alifold().
*
* \ingroup consensus_mfe_fold
*
* \param strings A pointer to a NULL terminated array of character arrays
* \param structure A pointer to a character array that may contain a constraining consensus structure
* (will be overwritten by a consensus structure that exhibits the MFE)
* \return The free energy score in kcal/mol
*/
float alifold( const char **strings,
char *structure);
/**
* \brief Compute MFE and according structure of an alignment of sequences assuming the sequences are circular instead of linear
*
* \ingroup consensus_mfe_fold
*
* \param strings A pointer to a NULL terminated array of character arrays
* \param structure A pointer to a character array that may contain a constraining consensus structure
* (will be overwritten by a consensus structure that exhibits the MFE)
* \return The free energy score in kcal/mol
*/
float circalifold( const char **strings,
char *structure);
/**
* \brief Free the memory occupied by MFE alifold functions
*
* \ingroup consensus_mfe_fold
*
*/
void free_alifold_arrays(void);
/**
* \brief Get the mean pairwise identity in steps from ?to?(ident)
*
* \ingroup consensus_fold
*
* \param Alseq
* \param n_seq The number of sequences in the alignment
* \param length The length of the alignment
* \param mini
* \return The mean pairwise identity
*/
int get_mpi(char *Alseq[],
int n_seq,
int length,
int *mini);
/**
* \brief Read a ribosum or other user-defined scoring matrix
*
* \ingroup consensus_fold
*
*/
float **readribosum(char *name);
/**
* \brief Calculate the free energy of a consensus structure given a set of aligned sequences
*
* \ingroup consensus_fold
*
* \param sequences The NULL terminated array of sequences
* \param structure The consensus structure
* \param n_seq The number of sequences in the alignment
* \param energy A pointer to an array of at least two floats that will hold the free energies
* (energy[0] will contain the free energy, energy[1] will be filled with the covariance energy term)
* \returns free energy in kcal/mol
*
*/
float energy_of_alistruct(const char **sequences,
const char *structure,
int n_seq,
float *energy);
float energy_of_ali_gquad_structure(const char **sequences,
const char *structure,
int n_seq,
float *energy);
/*
#############################################################
# some helper functions that might be useful in the library #
#############################################################
*/
/**
* \brief Get arrays with encoded sequence of the alignment
*
* this function assumes that in S, S5, s3, ss and as enough
* space is already allocated (size must be at least sequence length+2)
*
* \ingroup consensus_fold
*
* \param sequence The gapped sequence from the alignment
* \param S pointer to an array that holds encoded sequence
* \param s5 pointer to an array that holds the next base 5' of alignment position i
* \param s3 pointer to an array that holds the next base 3' of alignment position i
* \param ss
* \param as
* \param circ assume the molecules to be circular instead of linear (circ=0)
*/
void encode_ali_sequence( const char *sequence,
short *S,
short *s5,
short *s3,
char *ss,
unsigned short *as,
int circ);
/**
* \brief Allocate memory for sequence array used to deal with aligned sequences
*
* Note that these arrays will also be initialized according to the sequence alignment given
*
* \ingroup consensus_fold
*
* \see free_sequence_arrays()
*
* \param sequences The aligned sequences
* \param S A pointer to the array of encoded sequences
* \param S5 A pointer to the array that contains the next 5' nucleotide of a sequence position
* \param S3 A pointer to the array that contains the next 3' nucleotide of a sequence position
* \param a2s A pointer to the array that contains the alignment to sequence position mapping
* \param Ss A pointer to the array that contains the ungapped sequence
* \param circ assume the molecules to be circular instead of linear (circ=0)
*/
void alloc_sequence_arrays(const char **sequences,
short ***S,
short ***S5,
short ***S3,
unsigned short ***a2s,
char ***Ss,
int circ);
/**
* \brief Free the memory of the sequence arrays used to deal with aligned sequences
*
* This function frees the memory previously allocated with alloc_sequence_arrays()
*
* \ingroup consensus_fold
*
* \see alloc_sequence_arrays()
*
* \param n_seq The number of aligned sequences
* \param S A pointer to the array of encoded sequences
* \param S5 A pointer to the array that contains the next 5' nucleotide of a sequence position
* \param S3 A pointer to the array that contains the next 3' nucleotide of a sequence position
* \param a2s A pointer to the array that contains the alignment to sequence position mapping
* \param Ss A pointer to the array that contains the ungapped sequence
*/
void free_sequence_arrays( unsigned int n_seq,
short ***S,
short ***S5,
short ***S3,
unsigned short ***a2s,
char ***Ss);
/*
#############################################################
# PARTITION FUNCTION VARIANTS OF THE ALIFOLD IMPLEMENTATION #
#############################################################
*/
/**
* \addtogroup consensus_pf_fold
* \ingroup consensus_fold
* @{
*
* @}
*/
/**
* \brief
*
* \ingroup consensus_pf_fold
*
* \param sequences
* \param structure
* \param pl
* \param parameters
* \param calculate_bppm
* \param is_constrained
* \param is_circular
* \return
*/
float alipf_fold_par( const char **sequences,
char *structure,
plist **pl,
pf_paramT *parameters,
int calculate_bppm,
int is_constrained,
int is_circular);
/**
* \brief
*
* The partition function version of alifold() works in analogy to
* pf_fold(). Pair probabilities and information about sequence
* covariations are returned via the 'pi' variable as a list of
* #pair_info structs. The list is terminated by the first entry with
* pi.i = 0.
*
* \ingroup consensus_pf_fold
*
* \param sequences
* \param structure
* \param pl
* \return
*/
float alipf_fold( const char **sequences,
char *structure,
plist **pl);
/**
* \brief
*
* \ingroup consensus_pf_fold
*
* \param sequences
* \param structure
* \param pl
* \return
*/
float alipf_circ_fold(const char **sequences,
char *structure,
plist **pl);
/**
* \brief Get a pointer to the base pair probability array
*
* Accessing the base pair probabilities for a pair (i,j) is achieved by
* \verbatim FLT_OR_DBL *pr = export_bppm(); pr_ij = pr[iindx[i]-j]; \endverbatim
*
* \ingroup consensus_pf_fold
*
* \see get_iindx()
* \return A pointer to the base pair probability array
*/
FLT_OR_DBL *export_ali_bppm(void);
/**
* \brief
*
* \ingroup consensus_pf_fold
*
*/
void free_alipf_arrays(void);
/**
* \addtogroup consensus_stochbt
* @{
*
* @}
*/
/**
* \brief Sample a consensus secondary structure from the Boltzmann ensemble according its probability\n
*
* \ingroup consensus_stochbt
*
* \param prob to be described (berni)
* \return A sampled consensus secondary structure in dot-bracket notation
*/
char *alipbacktrack(double *prob);
/**
* \brief Get pointers to (almost) all relavant arrays used in alifold's partition function computation
*
* \ingroup consensus_fold
*
* \note To obtain meaningful pointers, call alipf_fold first!
*
* \see pf_alifold(), alipf_circ_fold()
*
* \param S_p A pointer to the 'S' array (integer representation of nucleotides)
* \param S5_p A pointer to the 'S5' array
* \param S3_p A pointer to the 'S3' array
* \param a2s_p A pointer to the pair type matrix
* \param Ss_p A pointer to the 'Ss' array
* \param qb_p A pointer to the QB matrix
* \param qm_p A pointer to the QM matrix
* \param q1k_p A pointer to the 5' slice of the Q matrix (\f$q1k(k) = Q(1, k)\f$)
* \param qln_p A pointer to the 3' slice of the Q matrix (\f$qln(l) = Q(l, n)\f$)
* \return Non Zero if everything went fine, 0 otherwise
*/
int get_alipf_arrays(short ***S_p,
short ***S5_p,
short ***S3_p,
unsigned short ***a2s_p,
char ***Ss_p,
FLT_OR_DBL **qb_p,
FLT_OR_DBL **qm_p,
FLT_OR_DBL **q1k_p,
FLT_OR_DBL **qln_p,
short **pscore);
#endif