2 /* Last changed Time-stamp: <2007-12-05 14:05:51 ivo> */
5 RNA secondary structure prediction
7 c Ivo Hofacker, Chrisoph Flamm
8 original implementation by
21 #include "energy_par.h"
22 #include "fold_vars.h"
26 #include "loop_energies.h"
28 static char rcsid[] UNUSED = "$Id: fold.c,v 1.38 2007/12/19 10:27:42 ivo Exp $";
38 #define PRIVATE static
40 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
41 #define NEW_NINIO 1 /* new asymetry penalty */
44 PRIVATE void letter_structure(char *structure, int length) UNUSED;
45 PRIVATE void parenthesis_structure(char *structure, int length);
46 PRIVATE void get_arrays(unsigned int size);
47 /* PRIVATE void scale_parameters(void); */
48 /* PRIVATE int stack_energy(int i, const char *string); */
49 PRIVATE void make_ptypes(const short *S, const char *structure);
50 PRIVATE void encode_seq(const char *sequence);
51 PRIVATE void backtrack(const char *sequence, int s);
52 PRIVATE int fill_arrays(const char *sequence, const int max_asymm, const int threshloop,
53 const int min_s2, const int max_s2, const int half_stem, const int max_half_stem);
58 PRIVATE void alisnoinitialize_fold(const int length);
59 PRIVATE void make_pscores(const short *const* S, const char *const* AS,int n_seq, const char *structure);
60 PRIVATE int *pscore; /* precomputed array of pair types */
62 PRIVATE int alifill_arrays(const char **string, const int max_asymm, const int threshloop,
63 const int min_s2, const int max_s2, const int half_stem,
64 const int max_half_stem);
65 PRIVATE void aliget_arrays(unsigned int size);
66 PRIVATE short * aliencode_seq(const char *sequence);
67 PRIVATE int alibacktrack(const char **strings, int s);
70 #define MINPSCORE -2 * UNIT
73 #define MAXSECTORS 500 /* dimension for a backtrack array */
74 #define LOCALITY 0. /* locality parameter for base-pairs */
76 #define MIN2(A, B) ((A) < (B) ? (A) : (B))
77 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
78 #define SAME_STRAND(I,J) (((I)>=cut_point)||((J)<cut_point))
80 PRIVATE paramT *P = NULL;
82 PRIVATE int *indx = NULL; /* index for moving in the triangle matrices c[] and fMl[]*/
84 PRIVATE int *c = NULL; /* energy array, given that i-j pair */
85 PRIVATE int *cc = NULL; /* linear array for calculating canonical structures */
86 PRIVATE int *cc1 = NULL; /* " " */
87 PRIVATE int *Fmi = NULL; /* holds row i of fML (avoids jumps in memory) */
88 PRIVATE int *DMLi = NULL; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
89 PRIVATE int *DMLi1 = NULL; /* MIN(fML[i+1,k]+fML[k+1,j]) */
90 PRIVATE int *DMLi2 = NULL; /* MIN(fML[i+2,k]+fML[k+1,j]) */
91 PRIVATE char *ptype = NULL; /* precomputed array of pair types */
92 PRIVATE short *S = NULL, *S1 = NULL;
93 PRIVATE int init_length=-1;
94 PRIVATE int *mLoop = NULL; /*contains the minimum of c for a xy range*/
95 PRIVATE folden **foldlist = NULL;
96 PRIVATE folden **foldlist_XS = NULL;
98 PRIVATE int *BP = NULL; /* contains the structure constrainsts: BP[i]
99 -1: | = base must be paired
100 -2: < = base must be paired with j<i
101 -3: > = base must be paired with j>i
102 -4: x = base must not pair
103 positive int: base is paired with int */
106 static sect sector[MAXSECTORS]; /* stack of partial structures for backtracking */
108 PRIVATE char alpha[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
109 /* needed by cofold/eval */
110 /* PRIVATE int cut_in_loop(int i); */
111 /* PRIVATE int min_hairpin = TURN; */
113 /* some definitions to take circfold into account... */
114 /* PRIVATE int *fM2 = NULL;*/ /* fM2 = multiloop region with exactly two stems, extending to 3' end */
115 PUBLIC int Fc, FcH, FcI, FcM; /* parts of the exterior loop energies */
116 /*--------------------------------------------------------------------------*/
118 void snoinitialize_fold(const int length)
121 if (length<1) nrerror("snoinitialize_fold: argument must be greater 0");
122 if (init_length>0) snofree_arrays(length);
123 get_arrays((unsigned) length);
125 for (n = 1; n <= (unsigned) length; n++)
126 indx[n] = (n*(n-1)) >> 1; /* n(n-1)/2 */
128 snoupdate_fold_params();
131 PRIVATE void alisnoinitialize_fold(const int length)
134 if (length<1) nrerror("snoinitialize_fold: argument must be greater 0");
135 if (init_length>0) snofree_arrays(length);
136 aliget_arrays((unsigned) length);
139 for (n = 1; n <= (unsigned) length; n++)
140 indx[n] = (n*(n-1)) >> 1; /* n(n-1)/2 */
141 snoupdate_fold_params();
145 /*--------------------------------------------------------------------------*/
147 PRIVATE void get_arrays(unsigned int size)
149 indx = (int *) space(sizeof(int)*(size+1));
150 c = (int *) space(sizeof(int)*((size*(size+1))/2+2));
151 mLoop = (int *) space(sizeof(int)*((size*(size+1))/2+2));
153 ptype = (char *) space(sizeof(char)*((size*(size+1))/2+2));
154 cc = (int *) space(sizeof(int)*(size+2));
155 cc1 = (int *) space(sizeof(int)*(size+2));
156 Fmi = (int *) space(sizeof(int)*(size+1));
157 DMLi = (int *) space(sizeof(int)*(size+1));
158 DMLi1 = (int *) space(sizeof(int)*(size+1));
159 DMLi2 = (int *) space(sizeof(int)*(size+1));
160 if (base_pair) free(base_pair);
161 base_pair = (struct bondT *) space(sizeof(struct bondT)*(1+size/2));
162 /* extra array(s) for circfold() */
165 PRIVATE void aliget_arrays(unsigned int size)
167 indx = (int *) space(sizeof(int)*(size+1));
168 c = (int *) space(sizeof(int)*((size*(size+1))/2+2));
169 mLoop = (int *) space(sizeof(int)*((size*(size+1))/2+2));
170 pscore = (int *) space(sizeof(int)*((size*(size+1))/2+2));
171 ptype = (char *) space(sizeof(char)*((size*(size+1))/2+2));
172 cc = (int *) space(sizeof(int)*(size+2));
173 cc1 = (int *) space(sizeof(int)*(size+2));
174 Fmi = (int *) space(sizeof(int)*(size+1));
175 DMLi = (int *) space(sizeof(int)*(size+1));
176 DMLi1 = (int *) space(sizeof(int)*(size+1));
177 DMLi2 = (int *) space(sizeof(int)*(size+1));
178 if (base_pair) free(base_pair);
179 base_pair = (struct bondT *) space(sizeof(struct bondT)*(1+size/2));
180 /* extra array(s) for circfold() */
186 /*--------------------------------------------------------------------------*/
189 void snofree_arrays(const int length)
191 free(indx); free(c);free(cc); free(cc1);
192 free(ptype);free(mLoop);
194 for(i=length;i>-1;i--){
195 while(foldlist[i]!=NULL){
196 folden *n = foldlist[i];
197 foldlist[i] = foldlist[i]->next;
203 for(i=length;i>-1;i--){
204 while(foldlist_XS[i]!=NULL){
205 folden *n = foldlist_XS[i];
206 foldlist_XS[i] = foldlist_XS[i]->next;
209 free(foldlist_XS[i]);
212 free(base_pair); base_pair=NULL; free(Fmi);
213 free(DMLi); free(DMLi1);free(DMLi2);
218 void alisnofree_arrays(const int length)
220 free(indx); free(c);free(cc); free(cc1);
221 free(ptype);free(mLoop);free(pscore);
223 for(i=length-1;i>-1;i--){
224 while(foldlist[i]!=NULL){
225 folden *n = foldlist[i];
226 foldlist[i] = foldlist[i]->next;
232 free(base_pair); base_pair=NULL; free(Fmi);
233 free(DMLi); free(DMLi1);free(DMLi2);
238 /*--------------------------------------------------------------------------*/
240 void snoexport_fold_arrays(int **indx_p, int **mLoop_p, int **cLoop, folden ***fold_p, folden ***fold_p_XS) {
241 /* make the DP arrays available to routines such as subopt() */
242 *indx_p = indx; *mLoop_p = mLoop;
243 *cLoop = c; *fold_p = foldlist;*fold_p_XS=foldlist_XS;
246 /* void alisnoexport_fold_arrays(int **indx_p, int **mLoop_p, int **cLoop, folden ***fold_p, int **pscores) { */
247 /* /\* make the DP arrays available to routines such as subopt() *\/ */
248 /* *indx_p = indx; *mLoop_p = mLoop; */
249 /* *cLoop = c; *fold_p = foldlist; */
250 /* *pscores=pscore; */
253 /*--------------------------------------------------------------------------*/
264 int snofold(const char *string, char *structure, const int max_assym, const int threshloop,
265 const int min_s2, const int max_s2, const int half_stem, const int max_half_stem) {
266 int length, energy, bonus, bonus_cnt, s;
268 /* Variable initialization */
272 length = (int) strlen(string);
274 S = encode_sequence(string, 0);
275 S1 = encode_sequence(string, 1);
278 /* structure = (char *) space((unsigned) length+1); */
280 if (length>init_length) snoinitialize_fold(length);
281 else if (fabs(P->temperature - temperature)>1e-6) snoupdate_fold_params();
285 /* encode_seq(string); */
286 BP = (int *)space(sizeof(int)*(length+2));
287 make_ptypes(S, structure);
288 energy=fill_arrays(string, max_assym, threshloop, min_s2, max_s2, half_stem, max_half_stem);
289 backtrack(string, s);
292 /*no structure output, no backtrack*/
293 backtrack(string, 0);
294 parenthesis_structure(structure, length);
297 free(S); free(S1); /* free(BP); */
301 PRIVATE void make_pscores(const short *const* S, const char *const* AS,
302 int n_seq, const char *structure) {
303 /* calculate co-variance bonus for each pair depending on */
304 /* compensatory/consistent mutations and incompatible seqs */
305 /* should be 0 for conserved pairs, >0 for good pairs */
306 #define NONE -10000 /* score for forbidden pairs */
307 int n,i,j,k,l,s,score;
308 int dm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */
309 {0,0,2,2,1,2,2} /* CG */,
310 {0,2,0,1,2,2,2} /* GC */,
311 {0,2,1,0,2,1,2} /* GU */,
312 {0,1,2,2,0,2,1} /* UG */,
313 {0,2,2,1,2,0,2} /* AU */,
314 {0,2,2,2,1,2,0} /* UA */};
315 n=Sali[0][0]; /* length of seqs */
316 for (i=1; i<n; i++) {
317 for (j=i+1; (j<i+TURN+1) && (j<=n); j++)
318 pscore[indx[j]+i] = NONE;
319 for (j=i+TURN+1; j<=n; j++) {
320 int pfreq[8]={0,0,0,0,0,0,0,0};
321 for (s=0; s<n_seq; s++) {
323 if (Sali[s][i]==0 && Sali[s][j]==0) type = 7; /* gap-gap */
325 if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7;
326 else type = pair[Sali[s][i]][Sali[s][j]];
331 if (pfreq[0]*2>n_seq) { pscore[indx[j]+i] = NONE; continue;}
332 for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */
333 for (l=k+1; l<=6; l++)
334 /* scores for replacements between pairtypes */
335 /* consistent or compensatory mutations score 1 or 2 */
336 score += pfreq[k]*pfreq[l]*dm[k][l];
337 /* counter examples score -1, gap-gap scores -0.25 */
338 pscore[indx[j]+i] = cv_fact *
339 ((UNIT*score)/n_seq - nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25));
343 if (noLonelyPairs) /* remove unwanted pairs */
344 for (k=1; k<n-TURN-1; k++)
345 for (l=1; l<=2; l++) {
346 int type,ntype=0,otype=0;
348 type = pscore[indx[j]+i];
349 while ((i>=1)&&(j<=n)) {
350 if ((i>1)&&(j<n)) ntype = pscore[indx[j+1]+i-1];
351 if ((otype<-4*UNIT)&&(ntype<-4*UNIT)) /* worse than 2 counterex */
352 pscore[indx[j]+i] = NONE; /* i.j can only form isolated pairs */
360 if (fold_constrained&&(structure!=NULL)) {
361 int psij, hx, hx2, *stack, *stack2;
362 stack = (int *) space(sizeof(int)*(n+1));
363 stack2 = (int *) space(sizeof(int)*(n+1));
365 for(hx=hx2=0, j=1; j<=n; j++) {
366 switch (structure[j-1]) {
367 case 'x': /* can't pair */
368 for (l=1; l<j-TURN; l++) pscore[indx[j]+l] = NONE;
369 for (l=j+TURN+1; l<=n; l++) pscore[indx[l]+j] = NONE;
377 case '<': /* pairs upstream */
378 for (l=1; l<j-TURN; l++) pscore[indx[j]+l] = NONE;
382 fprintf(stderr, "%s\n", structure);
383 nrerror("unbalanced brackets in constraints");
386 pscore[indx[j]+i]=NONE;
390 fprintf(stderr, "%s\n", structure);
391 nrerror("unbalanced brackets in constraints");
394 psij = pscore[indx[j]+i]; /* store for later */
397 pscore[indx[k]+l] = NONE;
400 pscore[indx[l]+k] = NONE;
401 for (k=i+1; k<j; k++)
402 pscore[indx[k]+i] = pscore[indx[j]+k] = NONE;
403 pscore[indx[j]+i] = (psij>0) ? psij : 0;
405 case '>': /* pairs downstream */
406 for (l=j+TURN+1; l<=n; l++) pscore[indx[l]+j] = NONE;
411 fprintf(stderr, "%s\n", structure);
412 nrerror("unbalanced brackets in constraint string");
414 free(stack); free(stack2);
418 float alisnofold(const char **strings, const int max_assym, const int threshloop,
419 const int min_s2, const int max_s2, const int half_stem, const int max_half_stem) {
420 int s,n_seq, length, energy;
422 length = (int) strlen(strings[0]);
423 /* structure = (char *) space((unsigned) length+1); */
425 if (length>init_length) alisnoinitialize_fold(length);
426 if (fabs(P->temperature - temperature)>1e-6) snoupdate_fold_params();
427 for (s=0; strings[s]!=NULL; s++);
429 Sali = (short **) space(n_seq*sizeof(short *));
430 for (s=0; s<n_seq; s++) {
431 if (strlen(strings[s]) != length) nrerror("uneqal seqence lengths");
432 Sali[s] = aliencode_seq(strings[s]);
434 make_pscores((const short **) Sali, (const char *const *) strings, n_seq, structure);
435 energy=alifill_arrays(strings, max_assym, threshloop, min_s2, max_s2, half_stem, max_half_stem);
436 alibacktrack((const char **)strings, 0);
437 structure = (char *) space((length+1)*sizeof(char));
438 parenthesis_structure(structure, length);
441 for (s=0; s<n_seq; s++) free(Sali[s]);
443 /* free(structure); */
444 /* free(S)*/; free(S1); /* free(BP); */
445 return (float) energy/100.;
448 PRIVATE int alifill_arrays(const char **strings, const int max_asymm, const int threshloop,
449 const int min_s2, const int max_s2, const int half_stem,
450 const int max_half_stem) {
452 int i, j, length, energy;
453 /* int decomp, new_fML; */
458 for (n_seq=0; strings[n_seq]!=NULL; n_seq++);
459 type = (int *) space(n_seq*sizeof(int));
460 length = strlen(strings[0]);
462 /* max_separation = (int) ((1.-LOCALITY)*(double)(length-2));*/ /* not in use */
464 /* for (i=(j>TURN?(j-TURN):1); i<j; i++) { */
466 for (i = (length)-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
467 for (j = i+TURN+1; j <= length; j++) {
470 for (s=0; s<n_seq; s++) {
471 type[s] = pair[Sali[s][i]][Sali[s][j]];
472 if (type[s]==0) type[s]=7;
474 psc = pscore[indx[j]+i];
475 if (psc>=MINPSCORE) { /* we have a pair */
476 int new_c=0, stackEnergy=INF; /* seems that new_c immer den minimum von cij enthaelt */
477 /* hairpin ----------------------------------------------*/
479 for (new_c=s=0; s<n_seq; s++)
480 new_c += E_Hairpin(j-i-1,type[s],Sali[s][i+1],Sali[s][j-1],strings[s]+i-1,P);
481 /*--------------------------------------------------------
482 check for elementary structures involving more than one
483 closing pair (interior loop).
484 --------------------------------------------------------*/
486 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++) {
487 int minq = j-i+p-MAXLOOP-2;
488 if (minq<p+1+TURN) minq = p+1+TURN;
489 for (q = minq; q < j; q++) {
490 if (pscore[indx[q]+p]<MINPSCORE) continue;
491 if(abs((p-i) - (j-q)) > max_asymm) continue;
492 for (energy = s=0; s<n_seq; s++) {
493 type_2 = pair[Sali[s][q]][Sali[s][p]]; /* q,p not p,q! */
494 if (type_2 == 0) type_2 = 7;
495 energy += E_IntLoop(p-i-1, j-q-1, type[s], type_2,
496 Sali[s][i+1], Sali[s][j-1],
497 Sali[s][p-1], Sali[s][q+1],P);
499 new_c = MIN2(energy+c[indx[q]+p], new_c);
500 if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
505 /* coaxial stacking of (i.j) with (i+1.k) or (k+1.j-1) */
507 new_c = MIN2(new_c, cc1[j-1]+stackEnergy);
508 cc[j] = new_c - psc; /* add covariance bonnus/penalty */
510 } /* end >> if (pair) << */
512 /* done with c[i,j], now compute fML[i,j] */
513 /* free ends ? -----------------------------------------*/
518 int *FF; /* rotate the auxilliary arrays */
519 FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
520 FF = cc1; cc1=cc; cc=FF;
521 for (j=1; j<=length; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
524 foldlist = (folden**) space((length)*sizeof(folden*));
526 for(i=0; i< length; i++){
527 foldlist[i]=(folden*) space(sizeof(folden));
528 foldlist[i]->next=NULL;
529 foldlist[i]->k=INF+1;
530 foldlist[i]->energy=INF;
533 folden* head; /* we save the stem loop information in a list like structure */
535 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
537 max_k = MIN2(length-min_s2,i+max_half_stem+1);
538 min_k = MAX2(i+half_stem+1, length-max_s2);
539 for (j = i+TURN+1; j <= length; j++) {
542 for(a=0; a< MISMATCH ;a++){
543 for(b=0; b< MISMATCH ; b++){
544 mLoop[ij]=MIN2(mLoop[ij], c[indx[j-a]+i+b]);
548 if(mLoop[ij]>=n_seq*threshloop){
552 if(j>=min_k-1 && j < max_k){ /* comment if out to recover the known behaviour */
553 head = (folden*) space(sizeof(folden));
555 head->energy=mLoop[ij];
556 head->next=foldlist[i];
565 return mLoop[indx[length]+1];/* mLoop; */
568 PRIVATE int alibacktrack(const char **strings, int s) {
570 /*------------------------------------------------------------------
571 trace back through the "c", "f5" and "fML" arrays to get the
572 base pairing list. No search for equivalent structures is done.
573 This is fast, since only few structure elements are recalculated.
574 ------------------------------------------------------------------*/
577 If s>0 then s items have been already pushed onto the sector stack */
578 int i, j, length, energy;/* , new; */
580 int bonus,n_seq,*type; int b=0,cov_en = 0;
582 length = strlen(strings[0]);
583 for (n_seq=0; strings[n_seq]!=NULL; n_seq++);
584 type = (int *) space(n_seq*sizeof(int));
587 sector[s].j = length;
591 int ml, ss, cij, traced, i1, j1, p, q;
592 int canonical = 1; /* (i,j) closes a canonical structure */
595 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
596 occur in the fML- (1) or in the f-array (0) */
598 base_pair[++b].i = i;
603 if (j < i+TURN+1) continue; /* no more pairs in this interval */
608 /*----- begin of "repeat:" -----*/
609 if (canonical) cij = c[indx[j]+i];
610 for (ss=0; ss<n_seq; ss++) {
611 type[ss] = pair[Sali[ss][i]][Sali[ss][j]];
612 if (type[ss]==0) type[ss] = 7;
617 if (cij == c[indx[j]+i]) {
618 /* (i.j) closes canonical structures, thus
619 (i+1.j-1) must be a pair */
620 for (ss=0; ss<n_seq; ss++) {
621 type_2 = pair[Sali[ss][j-1]][Sali[ss][i+1]]; /* j,i not i,j */
622 if (type_2==0) type_2 = 7;
623 cij -= P->stack[type[ss]][type_2];
625 cij += pscore[indx[j]+i];
626 base_pair[++b].i = i+1;
627 base_pair[b].j = j-1;
628 cov_en += pscore[indx[j-1]+i+1];
634 cij += pscore[indx[j]+i];
636 for (ss=0; ss<n_seq; ss++)
637 cc += E_Hairpin(j-i-1, type[ss], Sali[ss][i+1], Sali[ss][j-1], strings[ss]+i-1,P);
638 if (cij == cc) /* found hairpin */
641 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
643 minq = j-i+p-MAXLOOP-2;
644 if (minq<p+1+TURN) minq = p+1+TURN;
645 for (q = j-1; q >= minq; q--) {
646 for (ss=energy=0; ss<n_seq; ss++) {
647 type_2 = pair[Sali[ss][q]][Sali[ss][p]]; /* q,p not p,q */
648 if (type_2==0) type_2 = 7;
649 energy += E_IntLoop(p-i-1, j-q-1, type[ss], type_2,
650 Sali[ss][i+1], Sali[ss][j-1],
651 Sali[ss][p-1], Sali[ss][q+1],P);
653 traced = (cij == energy+c[indx[q]+p]);
655 base_pair[++b].i = p;
657 cov_en += pscore[indx[q]+p];
664 /* end of repeat: --------------------------------------------------*/
666 /* (i.j) must close a multi-loop */
667 /* tt = rtype[type]; */
668 /* mm = bonus+P->MLclosing+P->MLintern[tt]; */
669 /* d5 = P->dangle5[tt][S1[j-1]]; */
670 /* d3 = P->dangle3[tt][S1[i+1]]; */
672 sector[s+1].ml = sector[s+2].ml = 1;
674 /* if (k<=j-3-TURN) { /\\* found the decomposition *\\/ *\/ */
675 /* sector[++s].i = i1; */
676 /* sector[s].j = k; */
677 /* sector[++s].i = k+1; */
678 /* sector[s].j = j1; */
679 /* } /\* else { *\/ */
680 /* nrerror("backtracking failed in repeat"); */
684 base_pair[0].i = b; /* save the total number of base pairs */
689 PRIVATE int fill_arrays(const char *string, const int max_asymm, const int threshloop,
690 const int min_s2, const int max_s2, const int half_stem, const int max_half_stem) {
692 int i, j, length, energy;
693 /* int decomp;*/ /*, new_fML; */
694 int no_close, type, type_2;
699 length = (int) strlen(string);
701 /* max_separation = (int) ((1.-LOCALITY)*(double)(length-2)); */ /* not in use */
706 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
707 /* printf("i=%d\t",i); */
708 for (j = i+TURN+1; j <= length; j++) {
709 /* printf("j=%d,",j); */
716 if ((BP[i]==j)||(BP[i]==-1)||(BP[i]==-2)) bonus -= BONUS;
717 if ((BP[j]==-1)||(BP[j]==-3)) bonus -= BONUS;
718 if ((BP[i]==-4)||(BP[j]==-4)) type=0;
720 no_close = (((type==3)||(type==4))&&no_closingGU);
722 /* if (j-i-1 > max_separation) type = 0; */ /* forces locality degree */
724 if (type) { /* we have a pair */
725 int new_c=0, stackEnergy=INF; /* seems that new_c immer den minimum von cij enthaelt */
726 /* hairpin ----------------------------------------------*/
728 if (no_close) new_c = FORBIDDEN;
730 new_c = E_Hairpin(j-i-1, type, S1[i+1], S1[j-1], string+i-1,P); /* computes hair pin structure for subsequence i...j */
732 /*--------------------------------------------------------
733 check for elementary structures involving more than one
734 closing pair (interior loop).
735 --------------------------------------------------------*/
737 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++) {
738 int minq = j-i+p-MAXLOOP-2;
739 if (minq<p+1+TURN) minq = p+1+TURN;
740 for (q = minq; q < j; q++) {
742 if(abs((p-i) - (j-q)) > max_asymm) continue;
743 type_2 = ptype[indx[q]+p];
745 if (type_2==0) continue;
746 type_2 = rtype[type_2];
749 if (no_close||(type_2==3)||(type_2==4))
750 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
752 energy = E_IntLoop(p-i-1, j-q-1, type, type_2,
753 S1[i+1], S1[j-1], S1[p-1], S1[q+1],P);
754 new_c = MIN2(energy+c[indx[q]+p], new_c);
755 if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
763 /* coaxial stacking of (i.j) with (i+1.k) or (k+1.j-1) */
766 new_c = MIN2(new_c, cc1[j-1]+stackEnergy);
769 /* min_c=MIN2(min_c, c[ij]); */
771 } /* end >> if (pair) << */
776 /* done with c[i,j], now compute fML[i,j] */
777 /* free ends ? -----------------------------------------*/
782 int *FF; /* rotate the auxilliary arrays */
783 FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
784 FF = cc1; cc1=cc; cc=FF;
785 for (j=1; j<=length; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
788 foldlist = (folden**) space((length+1)*sizeof(folden*));
789 foldlist_XS = (folden**) space((length+1)*sizeof(folden*));
790 /* linked list initialization*/
791 for(i=0; i<=length; i++){
792 foldlist[i]=(folden*) space(sizeof(folden));
793 foldlist[i]->next=NULL;
794 foldlist[i]->k=INF+1;
795 foldlist[i]->energy=INF;
796 foldlist_XS[i]=(folden*) space(sizeof(folden));
797 foldlist_XS[i]->next=NULL;
798 foldlist_XS[i]->k=INF+1;
799 foldlist_XS[i]->energy=INF;
801 folden* head; /* we save the stem loop information in a list like structure */
803 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
805 max_k = MIN2(length-min_s2,i+max_half_stem+1);
806 min_k = MAX2(i+half_stem+1, length-max_s2);
809 for (j = i+TURN+1; j <= length; j++) {
812 for(a=0; a< MISMATCH ;a++){
813 for(b=0; b< MISMATCH ; b++){
814 mLoop[ij]=MIN2(mLoop[ij], c[indx[j-a]+i+b]);
815 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j-2]+i]); */
816 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j]+i+1]); */
817 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j-1]+i+1]); */
818 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j-2]+i+1]); */
819 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j]+i+2]); */
820 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j-1]+i+2]); */
821 /* #mLoop[ij]=MIN2(mLoop[ij], c[indx[j-2]+i+2]); */
824 min_c = MIN2(mLoop[ij] ,min_c);
826 if(mLoop[ij]>=threshloop){
830 if(j>=min_k-1 && j <= max_k){ /* comment if out to recover the known behaviour */
831 head = (folden*) space(sizeof(folden));
833 head->energy=mLoop[ij];
834 head->next=foldlist[i];
836 head_XS = (folden*) space(sizeof(folden));
838 head_XS->energy=mLoop[ij];
839 head_XS->next=foldlist_XS[j];
840 foldlist_XS[j] = head_XS;
847 /* for(i=0; i< length; i++){ */
849 /* temp = foldlist[i]; */
850 /* while(temp->next){ */
852 /* printf("count %d: i%d j%d energy %d \n", count, i, temp->k, temp->energy); */
853 /* temp=temp->next; */
856 /* printf("Count %d \n", count); */
858 /* for(i=length-1; i>=0; i--){ */
860 /* temp = foldlist_XS[i]; */
861 /* while(temp->next){ */
863 /* printf("count %d: i%d j%d energy %d \n", count, temp->k,i, temp->energy); */
864 /* temp=temp->next; */
867 /* printf("Count %d \n", count); */
868 /* return mLoop[indx[length]+1]; */ /* mLoop; */
870 /* printf("\nmin_array = %d\n", min_c); */
871 /* return f5[length]; */
877 PRIVATE void backtrack(const char *string, int s) {
879 /*------------------------------------------------------------------
880 trace back through the "c", "f5" and "fML" arrays to get the
881 base pairing list. No search for equivalent structures is done.
882 This is fast, since only few structure elements are recalculated.
883 ------------------------------------------------------------------*/
886 If s>0 then s items have been already pushed onto the sector stack */
887 int i, j, /*k,*/ length, energy, new;
888 int no_close, type, type_2;/* , tt; */
892 length = strlen(string);
895 sector[s].j = length;
899 int ml, cij, traced, i1, j1, /*d3, d5, mm,*/ p, q;
900 int canonical = 1; /* (i,j) closes a canonical structure */
903 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
904 occur in the fML- (1) or in the f-array (0) */
906 base_pair[++b].i = i;
911 if (j < i+TURN+1) continue; /* no more pairs in this interval */
916 /*----- begin of "repeat:" -----*/
917 if (canonical) cij = c[indx[j]+i];
918 type = ptype[indx[j]+i];
920 if (fold_constrained) {
921 if ((BP[i]==j)||(BP[i]==-1)||(BP[i]==-2)) bonus -= BONUS;
922 if ((BP[j]==-1)||(BP[j]==-3)) bonus -= BONUS;
925 if (cij == c[indx[j]+i]) {
926 /* (i.j) closes canonical structures, thus
927 (i+1.j-1) must be a pair */
928 type_2 = ptype[indx[j-1]+i+1]; type_2 = rtype[type_2];
929 cij -= P->stack[type][type_2] + bonus;
930 base_pair[++b].i = i+1;
931 base_pair[b].j = j-1;
937 no_close = (((type==3)||(type==4))&&no_closingGU&&(bonus==0));
939 if (cij == FORBIDDEN) continue;
941 if (cij == E_Hairpin(j-i-1, type, S1[i+1], S1[j-1],string+i-1,P)+bonus)
943 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
945 minq = j-i+p-MAXLOOP-2;
946 if (minq<p+1+TURN) minq = p+1+TURN;
947 for (q = j-1; q >= minq; q--) {
948 type_2 = ptype[indx[q]+p];
949 if (type_2==0) continue;
950 type_2 = rtype[type_2];
952 if (no_close||(type_2==3)||(type_2==4))
953 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
954 energy = E_IntLoop(p-i-1, j-q-1, type, type_2,
955 S1[i+1], S1[j-1], S1[p-1], S1[q+1],P);
956 new = energy+c[indx[q]+p]+bonus;
957 traced = (cij == new);
959 base_pair[++b].i = p;
967 /* end of repeat: --------------------------------------------------*/
969 /* (i.j) must close a multi-loop */
970 /* tt = rtype[type]; */
971 /* mm = bonus+P->MLclosing+P->MLintern[tt]; */
972 /* d5 = P->dangle5[tt][S1[j-1]]; */
973 /* d3 = P->dangle3[tt][S1[i+1]]; */
975 sector[s+1].ml = sector[s+2].ml = 1;
977 /* if (k<=j-3-TURN) { */ /* found the decomposition */
978 /* sector[++s].i = i1; */
979 /* sector[s].j = k; */
980 /* sector[++s].i = k+1; */
981 /* sector[s].j = j1; */
983 /* nrerror("backtracking failed in repeat"); */
988 base_pair[0].i = b; /* save the total number of base pairs */
991 char *snobacktrack_fold_from_pair(const char *sequence, int i, int j) {
997 encode_seq(sequence);
998 backtrack(sequence, 1);
999 structure = (char *) space((strlen(sequence)+1)*sizeof(char));
1000 parenthesis_structure(structure, strlen(sequence));
1005 char *alisnobacktrack_fold_from_pair(const char **strings, int i, int j, int *cov) {
1007 int n_seq, s, length;
1008 length = (int) strlen(strings[0]);
1009 for (s=0; strings[s]!=NULL; s++);
1015 /* encode_seq(sequence); */
1016 Sali = (short **) space(n_seq*sizeof(short *));
1017 for (s=0; s<n_seq; s++) {
1018 if (strlen(strings[s]) != length) nrerror("uneqal seqence lengths");
1019 Sali[s] = aliencode_seq(strings[s]);
1021 *cov=alibacktrack(strings, 1);
1022 structure = (char *) space((length+1)*sizeof(char));
1023 parenthesis_structure(structure, length);
1025 for (s=0; s<n_seq; s++) {
1034 /*---------------------------------------------------------------------------*/
1037 /*---------------------------------------------------------------------------*/
1040 /*--------------------------------------------------------------------------*/
1043 /*---------------------------------------------------------------------------*/
1046 PRIVATE void encode_seq(const char *sequence) {
1049 l = strlen(sequence);
1050 S = (short *) space(sizeof(short)*(l+2));
1051 S1= (short *) space(sizeof(short)*(l+2));
1052 /* S1 exists only for the special X K and I bases and energy_set!=0 */
1055 for (i=1; i<=l; i++) { /* make numerical encoding of sequence */
1056 S[i]= (short) encode_char(toupper(sequence[i-1]));
1057 S1[i] = alias[S[i]]; /* for mismatches of nostandard bases */
1059 /* for circular folding add first base at position n+1 and last base at
1061 S[l+1] = S[1]; S1[l+1]=S1[1]; S1[0] = S1[l];
1064 PRIVATE short * aliencode_seq(const char *sequence) {
1067 l = strlen(sequence);
1068 Stemp = (short *) space(sizeof(short)*(l+2));
1069 Stemp[0] = (short) l;
1071 /* make numerical encoding of sequence */
1072 for (i=1; i<=l; i++)
1073 Stemp[i]= (short) encode_char(toupper(sequence[i-1]));
1075 /* for circular folding add first base at position n+1 */
1076 /* Stemp[l+1] = Stemp[1]; */
1081 /*---------------------------------------------------------------------------*/
1083 PRIVATE void letter_structure(char *structure, int length)
1087 for (n = 0; n <= length-1; structure[n++] = ' ') ;
1088 structure[length] = '\0';
1090 for (n = 0, k = 1; k <= base_pair[0].i; k++) {
1093 if (x-1 > 0 && y+1 <= length) {
1094 if (structure[x-2] != ' ' && structure[y] == structure[x-2]) {
1095 structure[x-1] = structure[x-2];
1096 structure[y-1] = structure[x-1];
1100 if (structure[x] != ' ' && structure[y-2] == structure[x]) {
1101 structure[x-1] = structure[x];
1102 structure[y-1] = structure[x-1];
1106 structure[x-1] = alpha[n-1];
1107 structure[y-1] = alpha[n-1];
1111 /*---------------------------------------------------------------------------*/
1113 PRIVATE void parenthesis_structure(char *structure, int length)
1117 for (n = 0; n <= length-1; structure[n++] = '.') ;
1118 structure[length] = '\0';
1120 for (k = 1; k <= base_pair[0].i; k++) {
1121 structure[base_pair[k].i-1] = '(';
1122 structure[base_pair[k].j-1] = ')';
1125 /*---------------------------------------------------------------------------*/
1127 PUBLIC void snoupdate_fold_params(void)
1130 P = scale_parameters();
1132 if (init_length < 0) init_length=0;
1135 /*---------------------------------------------------------------------------*/
1136 /* PRIVATE short *pair_table; */
1139 /*---------------------------------------------------------------------------*/
1141 PRIVATE int stack_energy(int i, const char *string)
1143 /* calculate energy of substructure enclosed by (i,j) */
1148 type = pair[S[i]][S[j]];
1154 while (p<q) { /* process all stacks and interior loops */
1156 while (pair_table[++p]==0);
1157 while (pair_table[--q]==0);
1158 if ((pair_table[q]!=(short)p)||(p>q)) break;
1159 type_2 = pair[S[q]][S[p]];
1163 /* energy += LoopEnergy(i, j, p, q, type, type_2); */
1164 if ( SAME_STRAND(i,p) && SAME_STRAND(q,j) )
1165 ee = LoopEnergy(p-i-1, j-q-1, type, type_2,
1166 S1[i+1], S1[j-1], S1[p-1], S1[q+1]);
1168 i=p; j=q; type = rtype[type_2];
1171 /* p,q don't pair must have found hairpin or multiloop */
1173 if (p>q) { /* hair pin */
1174 if (SAME_STRAND(i,j))
1175 ee = snoHairpinE(j-i-1, type, S1[i+1], S1[j-1], string+i-1);
1181 /* (i,j) is exterior pair of multiloop */
1183 /* add up the contributions of the substructures of the ML */
1184 energy += stack_energy(p, string);
1186 /* search for next base pair in multiloop */
1187 while (pair_table[++p]==0);
1191 ii = cut_in_loop(i);
1198 /*---------------------------------------------------------------------------*/
1201 /*---------------------------------------------------------------------------*/
1203 PRIVATE int cut_in_loop(int i) {
1204 /* walk around the loop; return j pos of pair after cut if
1205 cut_point in loop else 0 */
1207 p = j = pair_table[i];
1209 i = pair_table[p]; p = i+1;
1210 while ( pair_table[p]==0 ) p++;
1211 } while (p!=j && SAME_STRAND(i,p));
1212 return SAME_STRAND(i,p) ? 0 : pair_table[p];
1216 /*---------------------------------------------------------------------------*/
1218 PRIVATE void make_ptypes(const short *S, const char *structure) {
1222 for (k=1; k<n-TURN; k++)
1223 for (l=1; l<=2; l++) {
1224 int type,ntype=0,otype=0;
1225 i=k; j = i+TURN+l; if (j>n) continue;
1226 type = pair[S[i]][S[j]];
1227 while ((i>=1)&&(j<=n)) {
1228 if ((i>1)&&(j<n)) ntype = pair[S[i-1]][S[j+1]];
1229 if (noLonelyPairs && (!otype) && (!ntype))
1230 type = 0; /* i.j can only form isolated pairs */
1231 ptype[indx[j]+i] = (char) type;
1238 if (fold_constrained&&(structure!=NULL)) {
1239 constrain_ptypes(structure, (unsigned int)n, ptype, BP, TURN, 0);
1243 stack = (int *) space(sizeof(int)*(n+1));
1245 for(hx=0, j=1; j<=n; j++) {
1246 switch (structure[j-1]) {
1247 case '|': BP[j] = -1; break;
1248 case 'x': /* can't pair */
1249 for (l=1; l<j-TURN; l++) ptype[indx[j]+l] = 0;
1250 for (l=j+TURN+1; l<=n; l++) ptype[indx[l]+j] = 0;
1255 case '<': /* pairs upstream */
1256 for (l=1; l<j-TURN; l++) ptype[indx[j]+l] = 0;
1260 fprintf(stderr, "%s\n", structure);
1261 nrerror("unbalanced brackets in constraints");
1264 type = ptype[indx[j]+i];
1265 for (k=i+1; k<=n; k++) ptype[indx[k]+i] = 0;
1266 /* don't allow pairs i<k<j<l */
1267 for (l=j; l<=n; l++)
1268 for (k=i+1; k<=j; k++) ptype[indx[l]+k] = 0;
1269 /* don't allow pairs k<i<l<j */
1270 for (l=i; l<=j; l++)
1271 for (k=1; k<=i; k++) ptype[indx[l]+k] = 0;
1272 for (k=1; k<j; k++) ptype[indx[j]+k] = 0;
1273 ptype[indx[j]+i] = (type==0)?7:type;
1275 case '>': /* pairs downstream */
1276 for (l=j+TURN+1; l<=n; l++) ptype[indx[l]+j] = 0;
1281 fprintf(stderr, "%s\n", structure);
1282 nrerror("unbalanced brackets in constraint string");