1 #ifndef __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
2 #define __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
4 #include "energy_const.h"
6 * \file data_structures.h
7 * \brief All datastructures and typedefs shared among the Vienna RNA Package can be found here
10 /* to use floats instead of doubles in pf_fold() comment next line */
13 #define FLT_OR_DBL double
15 #define FLT_OR_DBL float
24 * \brief Maximal length of alphabet
30 * \brief Maximum density of states discretization for subopt
34 #define VRNA_GQUAD_MAX_STACK_SIZE 7
35 #define VRNA_GQUAD_MIN_STACK_SIZE 2
36 #define VRNA_GQUAD_MAX_LINKER_LENGTH 15
37 #define VRNA_GQUAD_MIN_LINKER_LENGTH 1
38 #define VRNA_GQUAD_MIN_BOX_SIZE ((4*VRNA_GQUAD_MIN_STACK_SIZE)+(3*VRNA_GQUAD_MIN_LINKER_LENGTH))
39 #define VRNA_GQUAD_MAX_BOX_SIZE ((4*VRNA_GQUAD_MAX_STACK_SIZE)+(3*VRNA_GQUAD_MAX_LINKER_LENGTH))
43 * ############################################################
44 * Here are the type definitions of various datastructures
45 * shared among the Vienna RNA Package
46 * ############################################################
50 * \brief this datastructure is used as input parameter in functions of PS_dot.h and others
52 typedef struct plist {
60 * \brief this datastructure is used as input parameter in functions of PS_dot.c
62 typedef struct cpair {
68 * \brief this is a workarround for the SWIG Perl Wrapper RNA plot function
69 * that returns an array of type COORDINATE
72 float X; /* X coords */
73 float Y; /* Y coords */
77 * \brief Stack of partial structures for backtracking
88 typedef struct bondT {
94 * \brief Base pair with associated energy
96 typedef struct bondTEn {
103 * \brief The data structure that contains the complete model details used throughout the calculations
107 int dangles; /**< \brief Specifies the dangle model used in any energy evaluation (0,1,2 or 3)
108 \note Some function do not implement all dangle model but only a subset of
109 (0,1,2,3). Read the documentaion of the particular recurrences or
110 energy evaluation function for information about the provided dangle
113 int special_hp; /**< \brief Include special hairpin contributions for tri, tetra and hexaloops */
114 int noLP; /**< \brief Only consider canonical structures, i.e. no 'lonely' base pairs */
115 int noGU; /**< \brief Do not allow GU pairs */
116 int noGUclosure; /**< \brief Do not allow loops to be closed by GU pair */
117 int logML; /**< \brief Use logarithmic scaling for multi loops */
118 int circ; /**< \brief Assume molecule to be circular */
119 int gquad; /**< \brief Include G-quadruplexes in structure prediction */
123 * \brief The datastructure that contains temperature scaled energy parameters.
127 int stack[NBPAIRS+1][NBPAIRS+1];
129 int bulge[MAXLOOP+1];
130 int internal_loop[MAXLOOP+1];
131 int mismatchExt[NBPAIRS+1][5][5];
132 int mismatchI[NBPAIRS+1][5][5];
133 int mismatch1nI[NBPAIRS+1][5][5];
134 int mismatch23I[NBPAIRS+1][5][5];
135 int mismatchH[NBPAIRS+1][5][5];
136 int mismatchM[NBPAIRS+1][5][5];
137 int dangle5[NBPAIRS+1][5];
138 int dangle3[NBPAIRS+1][5];
139 int int11[NBPAIRS+1][NBPAIRS+1][5][5];
140 int int21[NBPAIRS+1][NBPAIRS+1][5][5][5];
141 int int22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
145 int MLintern[NBPAIRS+1];
149 int Tetraloop_E[200];
150 char Tetraloops[1401];
154 char Hexaloops[1801];
158 int gquad [VRNA_GQUAD_MAX_STACK_SIZE + 1]
159 [3*VRNA_GQUAD_MAX_LINKER_LENGTH + 1];
161 double temperature; /**< \brief Temperature used for loop contribution scaling */
163 model_detailsT model_details; /**< \brief Model details to be used in the recursions */
168 * \brief The datastructure that contains temperature scaled Boltzmann weights of the energy parameters.
172 double expstack[NBPAIRS+1][NBPAIRS+1];
173 double exphairpin[31];
174 double expbulge[MAXLOOP+1];
175 double expinternal[MAXLOOP+1];
176 double expmismatchExt[NBPAIRS+1][5][5];
177 double expmismatchI[NBPAIRS+1][5][5];
178 double expmismatch23I[NBPAIRS+1][5][5];
179 double expmismatch1nI[NBPAIRS+1][5][5];
180 double expmismatchH[NBPAIRS+1][5][5];
181 double expmismatchM[NBPAIRS+1][5][5];
182 double expdangle5[NBPAIRS+1][5];
183 double expdangle3[NBPAIRS+1][5];
184 double expint11[NBPAIRS+1][NBPAIRS+1][5][5];
185 double expint21[NBPAIRS+1][NBPAIRS+1][5][5][5];
186 double expint22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
187 double expninio[5][MAXLOOP+1];
190 double expMLintern[NBPAIRS+1];
193 double expDuplexInit;
197 char Tetraloops[1401];
198 double expTriloop[40];
200 char Hexaloops[1801];
202 double expMultipleCA;
203 double expMultipleCB;
204 double expgquad[VRNA_GQUAD_MAX_STACK_SIZE + 1]
205 [3*VRNA_GQUAD_MAX_LINKER_LENGTH + 1];
208 double pf_scale; /**< \brief Scaling factor to avoid over-/underflows */
210 double temperature; /**< \brief Temperature used for loop contribution scaling */
211 double alpha; /**< \brief Scaling factor for the thermodynamic temperature
212 \details This allows for temperature scaling in Boltzmann
213 factors independently from the energy contributions.
214 The resulting Boltzmann factors are then computed by
215 \f$ e^{-E/(\alpha \cdot K \cdot T)} \f$
218 model_detailsT model_details; /**< \brief Model details to be used in the recursions */
225 * ############################################################
226 * SUBOPT data structures
227 * ############################################################
232 * \brief Base pair data structure used in subopt.c
240 * \brief Sequence interval stack element used in subopt.c
249 * \brief Solution element from subopt.c
252 float energy; /**< \brief Free Energy of structure in kcal/mol */
253 char *structure; /**< \brief Structure in dot-bracket notation */
257 * ############################################################
258 * COFOLD data structures
259 * ############################################################
265 typedef struct cofoldF {
266 /* free energies for: */
267 double F0AB; /**< \brief Null model without DuplexInit */
268 double FAB; /**< \brief all states with DuplexInit correction */
269 double FcAB; /**< \brief true hybrid states only */
270 double FA; /**< \brief monomer A */
271 double FB; /**< \brief monomer B */
277 typedef struct ConcEnt {
278 double A0; /**< \brief start concentration A */
279 double B0; /**< \brief start concentration B */
280 double ABc; /**< \brief End concentration AB */
290 typedef struct pairpro{
299 * \brief A base pair info structure
301 * For each base pair (i,j) with i,j in [0, n-1] the structure lists:
302 * - its probability 'p'
303 * - an entropy-like measure for its well-definedness 'ent'
304 * - the frequency of each type of pair in 'bp[]'
305 * + 'bp[0]' contains the number of non-compatible sequences
306 * + 'bp[1]' the number of CG pairs, etc.
309 unsigned i; /**< \brief nucleotide position i */
310 unsigned j; /**< \brief nucleotide position j */
311 float p; /**< \brief Probability */
312 float ent; /**< \brief Pseudo entropy for \f$ p(i,j) = S_i + S_j - p_ij*ln(p_ij) \f$ */
313 short bp[8]; /**< \brief Frequencies of pair_types */
314 char comp; /**< \brief 1 iff pair is in mfe structure */
319 * ############################################################
320 * FINDPATH data structures
321 * ############################################################
327 typedef struct move {
328 int i; /* i,j>0 insert; i,j<0 delete */
330 int when; /* 0 if still available, else resulting distance from start */
337 typedef struct intermediate {
338 short *pt; /**< \brief pair table */
339 int Sen; /**< \brief saddle energy so far */
340 int curr_en; /**< \brief current energy */
341 move_t *moves; /**< \brief remaining moves to target */
347 typedef struct path {
353 * ############################################################
354 * RNAup data structures
355 * ############################################################
359 * \brief contributions to p_u
361 typedef struct pu_contrib {
362 double **H; /**< \brief hairpin loops */
363 double **I; /**< \brief interior loops */
364 double **M; /**< \brief multi loops */
365 double **E; /**< \brief exterior loop */
366 int length; /**< \brief length of the input sequence */
367 int w; /**< \brief longest unpaired region */
373 typedef struct interact {
374 double *Pi; /**< \brief probabilities of interaction */
375 double *Gi; /**< \brief free energies of interaction */
376 double Gikjl; /**< \brief full free energy for interaction between [k,i] k<i
377 in longer seq and [j,l] j<l in shorter seq */
378 double Gikjl_wo; /**< \brief Gikjl without contributions for prob_unpaired */
379 int i; /**< \brief k<i in longer seq */
380 int k; /**< \brief k<i in longer seq */
381 int j; /**< \brief j<l in shorter seq */
382 int l; /**< \brief j<l in shorter seq */
383 int length; /**< \brief length of longer sequence */
387 * \brief Collection of all free_energy of beeing unpaired values for output
389 typedef struct pu_out {
390 int len; /**< \brief sequence length */
391 int u_vals; /**< \brief number of different -u values */
392 int contribs; /**< \brief [-c "SHIME"] */
393 char **header; /**< \brief header line */
394 double **u_values; /**< \brief (the -u values * [-c "SHIME"]) * seq len */
398 * \brief constraints for cofolding
400 typedef struct constrain{
406 * ############################################################
407 * RNAduplex data structures
408 * ############################################################
420 double energy_backtrack;
421 double opening_backtrack_x;
422 double opening_backtrack_y;
434 * ############################################################
435 * RNAsnoop data structures
436 * ############################################################
442 typedef struct node {
467 float fullStemEnergy;
477 * ############################################################
478 * PKplex data structures
479 * ############################################################
485 typedef struct dupVar{
507 * ############################################################
508 * 2Dfold data structures
509 * ############################################################
513 * \brief Solution element returned from TwoDfoldList
515 * This element contains free energy and structure for the appropriate
516 * kappa (k), lambda (l) neighborhood
517 * The datastructure contains two integer attributes 'k' and 'l'
518 * as well as an attribute 'en' of type float representing the free energy
519 * in kcal/mol and an attribute 's' of type char* containg the secondary
520 * structure representative,
522 * A value of #INF in k denotes the end of a list
524 * \see TwoDfoldList()
527 int k; /**< \brief Distance to first reference */
528 int l; /**< \brief Distance to second reference */
529 float en; /**< \brief Free energy in kcal/mol */
530 char *s; /**< \brief MFE representative structure in dot-bracket notation */
534 * \brief Variables compound for 2Dfold MFE folding
536 * \see get_TwoDfold_variables(), destroy_TwoDfold_variables(), TwoDfoldList()
539 paramT *P; /**< \brief Precomputed energy parameters and model details */
540 int do_backtrack; /**< \brief Flag whether to do backtracing of the structure(s) or not */
541 char *ptype; /**< \brief Precomputed array of pair types */
542 char *sequence; /**< \brief The input sequence */
543 short *S, *S1; /**< \brief The input sequences in numeric form */
544 unsigned int maxD1; /**< \brief Maximum allowed base pair distance to first reference */
545 unsigned int maxD2; /**< \brief Maximum allowed base pair distance to second reference */
548 unsigned int *mm1; /**< \brief Maximum matching matrix, reference struct 1 disallowed */
549 unsigned int *mm2; /**< \brief Maximum matching matrix, reference struct 2 disallowed */
551 int *my_iindx; /**< \brief Index for moving in quadratic distancy dimensions */
555 unsigned int *referenceBPs1; /**< \brief Matrix containing number of basepairs of reference structure1 in interval [i,j] */
556 unsigned int *referenceBPs2; /**< \brief Matrix containing number of basepairs of reference structure2 in interval [i,j] */
557 unsigned int *bpdist; /**< \brief Matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
559 short *reference_pt1;
560 short *reference_pt2;
563 unsigned int seq_length;
582 int **l_min_values_m;
583 int **l_max_values_m;
587 int **l_min_values_m1;
588 int **l_max_values_m1;
589 int *k_min_values_m1;
590 int *k_max_values_m1;
592 int **l_min_values_f;
593 int **l_max_values_f;
597 int **l_min_values_f3;
598 int **l_max_values_f3;
599 int *k_min_values_f3;
600 int *k_max_values_f3;
602 int **l_min_values_m2;
603 int **l_max_values_m2;
604 int *k_min_values_m2;
605 int *k_max_values_m2;
607 int *l_min_values_fc;
608 int *l_max_values_fc;
612 int *l_min_values_fcH;
613 int *l_max_values_fcH;
614 int k_min_values_fcH;
615 int k_max_values_fcH;
617 int *l_min_values_fcI;
618 int *l_max_values_fcI;
619 int k_min_values_fcI;
620 int k_max_values_fcI;
622 int *l_min_values_fcM;
623 int *l_max_values_fcM;
624 int k_min_values_fcM;
625 int k_max_values_fcM;
627 /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
641 unsigned long ***N_F5;
642 unsigned long ***N_C;
643 unsigned long ***N_M;
644 unsigned long ***N_M1;
649 * \brief Solution element returned from TwoDpfoldList
651 * This element contains the partition function for the appropriate
652 * kappa (k), lambda (l) neighborhood
653 * The datastructure contains two integer attributes 'k' and 'l'
654 * as well as an attribute 'q' of type #FLT_OR_DBL
656 * A value of #INF in k denotes the end of a list
658 * \see TwoDpfoldList()
661 int k; /**< \brief Distance to first reference */
662 int l; /**< \brief Distance to second reference */
663 FLT_OR_DBL q; /**< \brief partition function */
664 } TwoDpfold_solution;
667 * \brief Variables compound for 2Dfold partition function folding
669 * \see get_TwoDpfold_variables(), get_TwoDpfold_variables_from_MFE(),
670 * destroy_TwoDpfold_variables(), TwoDpfoldList()
675 char *ptype; /**< \brief Precomputed array of pair types */
676 char *sequence; /**< \brief The input sequence */
677 short *S, *S1; /**< \brief The input sequences in numeric form */
678 unsigned int maxD1; /**< \brief Maximum allowed base pair distance to first reference */
679 unsigned int maxD2; /**< \brief Maximum allowed base pair distance to second reference */
681 double temperature; /* temperature in last call to scale_pf_params */
682 double init_temp; /* temperature in last call to scale_pf_params */
685 pf_paramT *pf_params; /* holds all [unscaled] pf parameters */
687 int *my_iindx; /**< \brief Index for moving in quadratic distancy dimensions */
688 int *jindx; /**< \brief Index for moving in the triangular matrix qm1 */
690 short *reference_pt1;
691 short *reference_pt2;
693 unsigned int *referenceBPs1; /**< \brief Matrix containing number of basepairs of reference structure1 in interval [i,j] */
694 unsigned int *referenceBPs2; /**< \brief Matrix containing number of basepairs of reference structure2 in interval [i,j] */
695 unsigned int *bpdist; /**< \brief Matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
697 unsigned int *mm1; /**< \brief Maximum matching matrix, reference struct 1 disallowed */
698 unsigned int *mm2; /**< \brief Maximum matching matrix, reference struct 2 disallowed */
702 unsigned int seq_length;
720 int **l_min_values_b;
721 int **l_max_values_b;
725 int **l_min_values_m;
726 int **l_max_values_m;
730 int **l_min_values_m1;
731 int **l_max_values_m1;
732 int *k_min_values_m1;
733 int *k_max_values_m1;
735 int **l_min_values_m2;
736 int **l_max_values_m2;
737 int *k_min_values_m2;
738 int *k_max_values_m2;
740 int *l_min_values_qc;
741 int *l_max_values_qc;
745 int *l_min_values_qcH;
746 int *l_max_values_qcH;
747 int k_min_values_qcH;
748 int k_max_values_qcH;
750 int *l_min_values_qcI;
751 int *l_max_values_qcI;
752 int k_min_values_qcI;
753 int k_max_values_qcI;
755 int *l_min_values_qcM;
756 int *l_max_values_qcM;
757 int k_min_values_qcM;
758 int k_max_values_qcM;
760 /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
764 FLT_OR_DBL *Q_M1_rem;
765 FLT_OR_DBL *Q_M2_rem;