1 #ifndef __VIENNA_RNA_PACKAGE_GQUAD_H__
2 #define __VIENNA_RNA_PACKAGE_GQUAD_H__
4 #include "data_structures.h"
16 * \brief Various functions related to G-quadruplex computations
24 FLT_OR_DBL exp_E_gquad( int L,
28 int E_gquad_ali(int i,
36 void E_gquad_ali_en( int i,
45 * \brief Get a triangular matrix prefilled with minimum free energy
46 * contributions of G-quadruplexes.
48 * At each position ij in the matrix, the minimum free energy of any
49 * G-quadruplex delimited by i and j is stored. If no G-quadruplex formation
50 * is possible, the matrix element is set to INF.
51 * Access the elements in the matrix via matrix[indx[j]+i]. To get
52 * the integer array indx see get_jindx().
54 * \see get_jindx(), encode_sequence()
56 * \param S The encoded sequence
57 * \param P A pointer to the data structure containing the precomputed energy contributions
58 * \return A pointer to the G-quadruplex contribution matrix
60 int *get_gquad_matrix(short *S, paramT *P);
62 int *get_gquad_ali_matrix(short *S_cons,
67 FLT_OR_DBL *get_gquad_pf_matrix( short *S,
71 int **get_gquad_L_matrix( short *S,
77 void get_gquad_pattern_mfe(short *S,
84 void get_gquad_pattern_pf( short *S,
91 plist *get_plist_gquad_from_pr( short *S,
98 plist *get_plist_gquad_from_pr_max(short *S,
108 plist *get_plist_gquad_from_db( const char *structure,
112 * given a dot-bracket structure (possibly) containing gquads encoded
113 * by '+' signs, find first gquad, return end position or 0 if none found
114 * Upon return L and l[] contain the number of stacked layers, as well as
115 * the lengths of the linker regions.
116 * To parse a string with many gquads, call parse_gquad repeatedly e.g.
117 * end1 = parse_gquad(struc, &L, l); ... ;
118 * end2 = parse_gquad(struc+end1, &L, l); end2+=end1; ... ;
119 * end3 = parse_gquad(struc+end2, &L, l); end3+=end2; ... ;
121 int parse_gquad(const char *struc, int *L, int l[3]);
126 * backtrack an interior loop like enclosed g-quadruplex
127 * with closing pair (i,j)
129 * \param c The total contribution the loop should resemble
130 * \param i position i of enclosing pair
131 * \param j position j of enclosing pair
132 * \param type base pair type of enclosing pair (must be reverse type)
133 * \param S integer encoded sequence
134 * \param ggg triangular matrix containing g-quadruplex contributions
135 * \param index the index for accessing the triangular matrix
136 * \param p here the 5' position of the gquad is stored
137 * \param q here the 3' position of the gquad is stored
138 * \param P the datastructure containing the precalculated contibutions
140 * \return 1 on success, 0 if no gquad found
142 INLINE PRIVATE int backtrack_GQuad_IntLoop(int c,
153 int energy, dangles, k, l, maxl, minl, c0, l1;
156 dangles = P->model_details.dangles;
162 energy += P->mismatchI[type][si][sj];
165 energy += P->TerminalAU;
169 if(k < j - VRNA_GQUAD_MIN_BOX_SIZE){
170 minl = j - i + k - MAXLOOP - 2;
171 c0 = k + VRNA_GQUAD_MIN_BOX_SIZE - 1;
172 minl = MAX2(c0, minl);
174 maxl = k + VRNA_GQUAD_MAX_BOX_SIZE + 1;
175 maxl = MIN2(c0, maxl);
176 for(l = minl; l < maxl; l++){
177 if(S[l] != 3) continue;
178 if(c == energy + ggg[index[l] + k] + P->internal_loop[j - l - 1]){
187 k < j - VRNA_GQUAD_MIN_BOX_SIZE;
190 if(l1>MAXLOOP) break;
191 if(S[k] != 3) continue;
192 minl = j - i + k - MAXLOOP - 2;
193 c0 = k + VRNA_GQUAD_MIN_BOX_SIZE - 1;
194 minl = MAX2(c0, minl);
196 maxl = k + VRNA_GQUAD_MAX_BOX_SIZE + 1;
197 maxl = MIN2(c0, maxl);
198 for(l = minl; l < maxl; l++){
199 if(S[l] != 3) continue;
200 if(c == energy + ggg[index[l] + k] + P->internal_loop[l1 + j - l - 1]){
210 k < j - VRNA_GQUAD_MIN_BOX_SIZE;
213 if(l1>MAXLOOP) break;
214 if(S[k] != 3) continue;
215 if(c == energy + ggg[index[l] + k] + P->internal_loop[l1]){
225 * backtrack an interior loop like enclosed g-quadruplex
226 * with closing pair (i,j) with underlying Lfold matrix
228 * \param c The total contribution the loop should resemble
229 * \param i position i of enclosing pair
230 * \param j position j of enclosing pair
231 * \param type base pair type of enclosing pair (must be reverse type)
232 * \param S integer encoded sequence
233 * \param ggg triangular matrix containing g-quadruplex contributions
234 * \param p here the 5' position of the gquad is stored
235 * \param q here the 3' position of the gquad is stored
236 * \param P the datastructure containing the precalculated contibutions
238 * \return 1 on success, 0 if no gquad found
240 INLINE PRIVATE int backtrack_GQuad_IntLoop_L(int c,
251 int energy, dangles, k, l, maxl, minl, c0, l1;
254 dangles = P->model_details.dangles;
260 energy += P->mismatchI[type][si][sj];
263 energy += P->TerminalAU;
267 if(k < j - VRNA_GQUAD_MIN_BOX_SIZE){
268 minl = j - i + k - MAXLOOP - 2;
269 c0 = k + VRNA_GQUAD_MIN_BOX_SIZE - 1;
270 minl = MAX2(c0, minl);
272 maxl = k + VRNA_GQUAD_MAX_BOX_SIZE + 1;
273 maxl = MIN2(c0, maxl);
274 for(l = minl; l < maxl; l++){
275 if(S[l] != 3) continue;
276 if(c == energy + ggg[k][l - k] + P->internal_loop[j - l - 1]){
285 k < j - VRNA_GQUAD_MIN_BOX_SIZE;
288 if(l1>MAXLOOP) break;
289 if(S[k] != 3) continue;
290 minl = j - i + k - MAXLOOP - 2;
291 c0 = k + VRNA_GQUAD_MIN_BOX_SIZE - 1;
292 minl = MAX2(c0, minl);
294 maxl = k + VRNA_GQUAD_MAX_BOX_SIZE + 1;
295 maxl = MIN2(c0, maxl);
296 for(l = minl; l < maxl; l++){
297 if(S[l] != 3) continue;
298 if(c == energy + ggg[k][l - k] + P->internal_loop[l1 + j - l - 1]){
308 k < j - VRNA_GQUAD_MIN_BOX_SIZE;
311 if(l1>MAXLOOP) break;
312 if(S[k] != 3) continue;
313 if(c == energy + ggg[k][l - k] + P->internal_loop[l1]){
324 E_GQuad_IntLoop(int i,
332 int energy, ge, en1, en2, dangles, p, q, l1, minq, maxq;
333 int c0, c1, c2, c3, up, d53, d5, d3;
336 dangles = P->model_details.dangles;
342 energy += P->mismatchI[type][si][sj];
345 energy += P->TerminalAU;
351 if(p < j - VRNA_GQUAD_MIN_BOX_SIZE){
352 minq = j - i + p - MAXLOOP - 2;
353 c0 = p + VRNA_GQUAD_MIN_BOX_SIZE - 1;
354 minq = MAX2(c0, minq);
356 maxq = p + VRNA_GQUAD_MAX_BOX_SIZE + 1;
357 maxq = MIN2(c0, maxq);
358 for(q = minq; q < maxq; q++){
359 if(S[q] != 3) continue;
360 c0 = energy + ggg[index[q] + p] + P->internal_loop[j - q - 1];
367 p < j - VRNA_GQUAD_MIN_BOX_SIZE;
370 if(l1>MAXLOOP) break;
371 if(S[p] != 3) continue;
372 minq = j - i + p - MAXLOOP - 2;
373 c0 = p + VRNA_GQUAD_MIN_BOX_SIZE - 1;
374 minq = MAX2(c0, minq);
376 maxq = p + VRNA_GQUAD_MAX_BOX_SIZE + 1;
377 maxq = MIN2(c0, maxq);
378 for(q = minq; q < maxq; q++){
379 if(S[q] != 3) continue;
380 c0 = energy + ggg[index[q] + p] + P->internal_loop[l1 + j - q - 1];
388 p < j - VRNA_GQUAD_MIN_BOX_SIZE;
391 if(l1>MAXLOOP) break;
392 if(S[p] != 3) continue;
393 c0 = energy + ggg[index[q] + p] + P->internal_loop[l1];
398 /* here comes the additional stuff for the odd dangle models */
400 en1 = energy + P->dangle5[type][si];
401 en2 = energy + P->dangle5[type][sj];
402 en3 = energy + P->mismatchI[type][si][sj];
404 /* first case with 5' dangle (i.e. j-1) onto enclosing pair */
407 if(p < j - VRNA_GQUAD_MIN_BOX_SIZE){
408 minq = j - i + p - MAXLOOP - 2;
409 c0 = p + VRNA_GQUAD_MIN_BOX_SIZE - 1;
410 minq = MAX2(c0, minq);
412 maxq = p + VRNA_GQUAD_MAX_BOX_SIZE + 1;
413 maxq = MIN2(c0, maxq);
414 for(q = minq; q < maxq; q++){
415 if(S[q] != 3) continue;
416 c0 = en1 + ggg[index[q] + p] + P->internal_loop[j - q - 1];
422 for(p = i + 2; p < j - VRNA_GQUAD_MIN_BOX_SIZE; p++){
424 if(l1>MAXLOOP) break;
425 if(S[p] != 3) continue;
426 minq = j - i + p - MAXLOOP - 2;
427 c0 = p + VRNA_GQUAD_MIN_BOX_SIZE - 1;
428 minq = MAX2(c0, minq);
430 maxq = p + VRNA_GQUAD_MAX_BOX_SIZE + 1;
431 maxq = MIN2(c0, maxq);
432 for(q = minq; q < maxq; q++){
433 if(S[q] != 3) continue;
434 c0 = en1 + ggg[index[q] + p] + P->internal_loop[l1 + j - q - 1];
441 for(p = i + 4; p < j - VRNA_GQUAD_MIN_BOX_SIZE; p++){
443 if(l1>MAXLOOP) break;
444 if(S[p] != 3) continue;
445 c0 = en1 + ggg[index[q] + p] + P->internal_loop[l1 + 1];
449 /* second case with 3' dangle (i.e. i+1) onto enclosing pair */
458 E_GQuad_IntLoop_L(int i,
466 int energy, ge, en1, en2, dangles, p, q, l1, minq, maxq;
467 int c0, c1, c2, c3, up, d53, d5, d3;
470 dangles = P->model_details.dangles;
476 energy += P->mismatchI[type][si][sj];
479 energy += P->TerminalAU;
485 if(p < j - VRNA_GQUAD_MIN_BOX_SIZE){
486 minq = j - i + p - MAXLOOP - 2;
487 c0 = p + VRNA_GQUAD_MIN_BOX_SIZE - 1;
488 minq = MAX2(c0, minq);
490 maxq = p + VRNA_GQUAD_MAX_BOX_SIZE + 1;
491 maxq = MIN2(c0, maxq);
492 for(q = minq; q < maxq; q++){
493 if(S[q] != 3) continue;
494 c0 = energy + ggg[p][q-p] + P->internal_loop[j - q - 1];
501 p < j - VRNA_GQUAD_MIN_BOX_SIZE;
504 if(l1>MAXLOOP) break;
505 if(S[p] != 3) continue;
506 minq = j - i + p - MAXLOOP - 2;
507 c0 = p + VRNA_GQUAD_MIN_BOX_SIZE - 1;
508 minq = MAX2(c0, minq);
510 maxq = p + VRNA_GQUAD_MAX_BOX_SIZE + 1;
511 maxq = MIN2(c0, maxq);
512 for(q = minq; q < maxq; q++){
513 if(S[q] != 3) continue;
514 c0 = energy + ggg[p][q - p] + P->internal_loop[l1 + j - q - 1];
522 p < j - VRNA_GQUAD_MIN_BOX_SIZE;
525 if(l1>MAXLOOP) break;
526 if(S[p] != 3) continue;
527 c0 = energy + ggg[p][q - p] + P->internal_loop[l1];
536 exp_E_GQuad_IntLoop(int i,
544 int k, l, minl, maxl, u, r;
545 FLT_OR_DBL q, qe, *expintern;
551 qe = pf->expmismatchI[type][si][sj];
552 expintern = pf->expinternal;
559 if(k < j - VRNA_GQUAD_MIN_BOX_SIZE){
560 minl = j - i + k - MAXLOOP - 2;
561 u = k + VRNA_GQUAD_MIN_BOX_SIZE - 1;
562 minl = MAX2(u, minl);
564 maxl = k + VRNA_GQUAD_MAX_BOX_SIZE + 1;
565 maxl = MIN2(u, maxl);
566 for(l = minl; l < maxl; l++){
567 if(S[l] != 3) continue;
568 q += qe * G[index[k]-l] * expintern[j - l - 1];
575 k <= j - VRNA_GQUAD_MIN_BOX_SIZE;
578 if(u > MAXLOOP) break;
579 if(S[k] != 3) continue;
580 minl = j - i + k - MAXLOOP - 2;
581 r = k + VRNA_GQUAD_MIN_BOX_SIZE - 1;
582 minl = MAX2(r, minl);
583 maxl = k + VRNA_GQUAD_MAX_BOX_SIZE + 1;
585 maxl = MIN2(r, maxl);
586 for(l = minl; l < maxl; l++){
587 if(S[l] != 3) continue;
588 q += qe * G[index[k]-l] * expintern[u + j - l - 1];
594 for(k = i + 4; k < j - VRNA_GQUAD_MIN_BOX_SIZE; k++){
597 if(S[k] != 3) continue;
598 q += qe * G[index[k]-l] * expintern[u];