3 Revision 2.0 2010/12/06 20:04:20 ronny
4 repaired subopt for cofolding
6 Revision 1.24 2008/11/01 21:10:20 ivo
7 avoid rounding errors when computing DoS
9 Revision 1.23 2008/03/31 15:06:49 ivo
10 Add cofolding support in subopt
12 Revision 1.22 2008/02/23 09:42:35 ivo
13 fix circular folding bugs with dangles that cross the origin
15 Revision 1.21 2008/01/08 15:08:51 ivo
16 circular fold would fail for open chain
18 Revision 1.20 2008/01/08 14:08:20 ivo
19 add an option to compute the density of state
21 Revision 1.19 2007/12/05 13:04:04 ivo
22 add various circfold variants from Ronny
24 Revision 1.18 2003/10/06 08:56:45 ivo
27 Revision 1.17 2003/08/26 09:26:08 ivo
28 don't modify print_energy in subopt(); use doubles instead of floats
30 Revision 1.16 2001/10/01 13:50:00 ivo
31 sorted -> subopt_sorted
33 Revision 1.15 2001/09/17 10:30:42 ivo
34 move scale_parameters() into params.c
35 returns pointer to paramT structure
37 Revision 1.14 2001/08/31 15:02:19 ivo
38 Let subopt either write to file pointer or return a list of structures,
39 so we can nicely integrate it into the library
41 Revision 1.13 2001/04/05 07:35:08 ivo
42 remove uneeded declaration of TETRA_ENERGY
44 Revision 1.12 2000/10/10 08:53:20 ivo
45 adapted for new Turner energy parameters
46 supports all constraints that forbid pairs
48 Revision 1.11 2000/04/08 15:56:18 ivo
49 with noLonelyPairs=1 will produce no structures with isolated base pairs
50 (Giegerich's canonical structures)
52 Revision 1.10 1999/05/06 10:13:35 ivo
53 recalculte energies before printing if logML is set
56 Revision 1.9 1998/05/19 16:31:52 ivo
57 added support for constrained folding
59 Revision 1.8 1998/03/30 14:44:54 ivo
60 cleanup of make_printout etc.
62 Revision 1.7 1998/03/30 14:39:31 ivo
63 replaced BasePairs list with structure string in STATE
64 save memory by not storing (and sorting) structures
65 modified for use with ViennaRNA-1.2.1
67 Revision 1.6 1997/10/21 11:34:09 walter
70 Revision 1.1 1997/08/04 21:05:32 walter
75 suboptimal folding - Stefan Wuchty, Walter Fontana & Ivo Hofacker
87 #include "energy_par.h"
88 #include "fold_vars.h"
92 #include "loop_energies.h"
103 #define SAME_STRAND(I,J) (((I)>=cut_point)||((J)<cut_point))
104 #define NEW_NINIO 1 /* use new asymetry penalty */
105 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
106 #define MAXALPHA 20 /* maximal length of alphabet */
109 #################################
111 #################################
113 PUBLIC int subopt_sorted=0; /* output sorted by energy */
114 PUBLIC int density_of_states[MAXDOS+1];
115 PUBLIC double print_energy = 9999; /* printing threshold for use with logML */
122 /* int best_energy; */ /* best attainable energy */
126 #################################
127 # PRIVATE VARIABLES #
128 #################################
131 PRIVATE LIST *Stack = NULL;
133 PRIVATE int best_energy; /* best_energy = remaining energy */
134 PRIVATE int *f5 = NULL; /* energy of 5 end */
135 PRIVATE int *c = NULL; /* energy array, given that i-j pair */
136 PRIVATE int *fML = NULL; /* multi-loop auxiliary energy array */
137 PRIVATE int *fM1 = NULL; /* another multi-loop auxiliary energy array */
138 PRIVATE int *fc = NULL; /*energy array, from i (j) to cut*/
139 PRIVATE int *indx = NULL; /* index for moving in the triangle matrices c[] and f[] */
140 PRIVATE short *S=NULL, *S1=NULL;
141 PRIVATE char *ptype=NULL;
142 PRIVATE paramT *P = NULL;
144 PRIVATE int minimal_energy; /* minimum free energy */
145 PRIVATE int element_energy; /* internal energy of a structural element */
146 PRIVATE int threshold; /* minimal_energy + delta */
147 PRIVATE char *sequence = NULL;
148 PRIVATE int circular = 0;
149 PRIVATE int struct_constrained = 0;
150 PRIVATE int *fM2 = NULL; /* energies of M2 */
151 PRIVATE int Fc, FcH, FcI, FcM; /* parts of the exterior loop energies */
152 PRIVATE int with_gquad = 0;
155 PRIVATE int *ggg = NULL;
159 #pragma omp threadprivate(turn, Stack, nopush, best_energy, f5, c, fML, fM1, fc, indx, S, S1,\
160 ptype, P, length, minimal_energy, element_energy, threshold, sequence,\
161 fM2, Fc, FcH, FcI, FcM, circular, struct_constrained,\
167 #################################
168 # PRIVATE FUNCTION DECLARATIONS #
169 #################################
171 PRIVATE void make_pair(int i, int j, STATE *state);
173 /* mark a gquadruplex in the resulting dot-bracket structure */
174 PRIVATE void make_gquad(int i, int L, int l[3], STATE *state);
176 PRIVATE INTERVAL *make_interval (int i, int j, int ml);
177 /*@out@*/ PRIVATE STATE *make_state(/*@only@*/LIST *Intervals,
178 /*@only@*/ /*@null@*/ char *structure,
179 int partial_energy, int is_duplex);
180 PRIVATE STATE *copy_state(STATE * state);
181 PRIVATE void print_state(STATE * state);
182 PRIVATE void UNUSED print_stack(LIST * list);
183 /*@only@*/ PRIVATE LIST *make_list(void);
184 PRIVATE void push(LIST * list, /*@only@*/ void *data);
185 PRIVATE void *pop(LIST * list);
186 PRIVATE int best_attainable_energy(STATE * state);
187 PRIVATE void scan_interval(int i, int j, int array_flag, STATE * state);
188 PRIVATE void free_interval_node(/*@only@*/ INTERVAL * node);
189 PRIVATE void free_state_node(/*@only@*/ STATE * node);
190 PRIVATE void push_back(STATE * state);
191 PRIVATE char* get_structure(STATE * state);
192 PRIVATE int compare(const void *solution1, const void *solution2);
193 PRIVATE void make_output(SOLUTION *SL, FILE *fp);
194 PRIVATE char *costring(char *string);
195 PRIVATE void repeat(int i, int j, STATE * state,
196 int part_energy, int temp_energy);
199 #################################
200 # BEGIN OF FUNCTION DEFINITIONS #
201 #################################
206 /*---------------------------------------------------------------------------*/
207 /*List routines--------------------------------------------------------------*/
208 /*---------------------------------------------------------------------------*/
211 make_pair(int i, int j, STATE *state)
213 state->structure[i-1] = '(';
214 state->structure[j-1] = ')';
218 make_gquad(int i, int L, int l[3], STATE *state)
221 for(x = 0; x < L; x++){
222 state->structure[i - 1 + x] = '+';
223 state->structure[i - 1 + x + L + l[0]] = '+';
224 state->structure[i - 1 + x + 2*L + l[0] + l[1]] = '+';
225 state->structure[i - 1 + x + 3*L + l[0] + l[1] + l[2]] = '+';
229 /*---------------------------------------------------------------------------*/
232 make_interval(int i, int j, int array_flag)
236 interval = lst_newnode(sizeof(INTERVAL));
239 interval->array_flag = array_flag;
243 /*---------------------------------------------------------------------------*/
246 free_interval_node(INTERVAL * node)
251 /*---------------------------------------------------------------------------*/
254 free_state_node(STATE * node)
256 free(node->structure);
258 lst_kill(node->Intervals, lst_freenode);
262 /*---------------------------------------------------------------------------*/
265 make_state(LIST * Intervals,
272 state = lst_newnode(sizeof(STATE));
275 state->Intervals = Intervals;
277 state->Intervals = lst_init();
279 state->structure = structure;
282 state->structure = (char *) space(length+1);
283 for (i=0; i<length; i++)
284 state->structure[i] = '.';
287 state->partial_energy = partial_energy;
292 /*---------------------------------------------------------------------------*/
295 copy_state(STATE * state)
299 INTERVAL *new_interval, *next;
301 new_state = lst_newnode(sizeof(STATE));
302 new_state->Intervals = lst_init();
303 new_state->partial_energy = state->partial_energy;
304 /* new_state->best_energy = state->best_energy; */
306 if (state->Intervals->count) {
307 after = LST_HEAD(new_state->Intervals);
308 for ( next = lst_first(state->Intervals); next; next = lst_next(next))
310 new_interval = lst_newnode(sizeof(INTERVAL));
311 *new_interval = *next;
312 lst_insertafter(new_state->Intervals, new_interval, after);
313 after = new_interval;
316 new_state->structure = strdup(state->structure);
317 if (!new_state->structure) nrerror("out of memory");
321 /*---------------------------------------------------------------------------*/
323 /*@unused @*/ PRIVATE void
324 print_state(STATE * state)
328 if (state->Intervals->count)
330 printf("%d intervals:\n", state->Intervals->count);
331 for (next = lst_first(state->Intervals); next; next = lst_next(next))
333 printf("[%d,%d],%d ", next->i, next->j, next->array_flag);
337 printf("partial structure: %s\n", state->structure);
339 printf(" partial_energy: %d\n", state->partial_energy);
340 /* printf(" best_energy: %d\n", state->best_energy); */
341 (void) fflush(stdout);
344 /*---------------------------------------------------------------------------*/
346 /*@unused @*/ PRIVATE void
347 print_stack(LIST * list)
351 printf("================\n");
352 printf("%d states\n", list->count);
353 for (rec = lst_first(list); rec; rec = lst_next(rec))
355 printf("state-----------\n");
358 printf("================\n");
361 /*---------------------------------------------------------------------------*/
369 /*---------------------------------------------------------------------------*/
372 push(LIST * list, void *data)
375 lst_insertafter(list, data, LST_HEAD(list));
379 /* push_stack(STATE *state) { */ /* keep the stack sorted by energy */
380 /* STATE *after, *next; */
381 /* nopush = false; */
382 /* next = after = LST_HEAD(Stack); */
383 /* while ( next = lst_next(next)) { */
384 /* if ( next->best_energy >= state->best_energy ) break; */
387 /* lst_insertafter(Stack, state, after); */
390 /*---------------------------------------------------------------------------*/
397 data = lst_deletenext(list, LST_HEAD(list));
401 /*---------------------------------------------------------------------------*/
402 /*auxiliary routines---------------------------------------------------------*/
403 /*---------------------------------------------------------------------------*/
406 best_attainable_energy(STATE * state)
408 /* evaluation of best possible energy attainable within remaining intervals */
413 sum = state->partial_energy; /* energy of already found elements */
415 for (next = lst_first(state->Intervals); next; next = lst_next(next))
417 if (next->array_flag == 0)
418 sum += (circular) ? Fc : f5[next->j];
419 else if (next->array_flag == 1)
420 sum += fML[indx[next->j] + next->i];
421 else if (next->array_flag == 2)
422 sum += c[indx[next->j] + next->i];
423 else if (next->array_flag == 3)
424 sum += fM1[indx[next->j] + next->i];
425 else if (next->array_flag == 4)
427 else if (next->array_flag == 5)
434 /*---------------------------------------------------------------------------*/
437 push_back(STATE * state)
439 push(Stack, copy_state(state));
443 /*---------------------------------------------------------------------------*/
446 get_structure(STATE * state)
450 structure = strdup(state->structure);
454 /*---------------------------------------------------------------------------*/
456 compare(const void *solution1, const void *solution2)
458 if (((SOLUTION *) solution1)->energy > ((SOLUTION *) solution2)->energy)
460 if (((SOLUTION *) solution1)->energy < ((SOLUTION *) solution2)->energy)
462 return strcmp(((SOLUTION *) solution1)->structure,
463 ((SOLUTION *) solution2)->structure);
466 /*---------------------------------------------------------------------------*/
468 PRIVATE void make_output(SOLUTION *SL, FILE *fp) /* prints stuff */
472 for (sol = SL; sol->structure!=NULL; sol++)
473 if (cut_point<0) fprintf(fp, "%s %6.2f\n", sol->structure, sol->energy);
476 tStruc=costring(sol->structure);
477 fprintf(fp, "%s %6.2f\n", tStruc, sol->energy);
482 /*---------------------------------------------------------------------------*/
483 /* start of subopt backtracking ---------------------------------------------*/
484 /*---------------------------------------------------------------------------*/
486 PUBLIC SOLUTION *subopt(char *seq, char *structure, int delta, FILE *fp){
487 return subopt_par(seq, structure, NULL, delta, fold_constrained, 0, fp);
490 PUBLIC SOLUTION *subopt_circ(char *seq, char *structure, int delta, FILE *fp){
491 return subopt_par(seq, structure, NULL, delta, fold_constrained, 1, fp);
494 PUBLIC SOLUTION *subopt_par(char *seq,
505 SOLUTION *SolutionList;
506 unsigned long max_sol, n_sol;
507 int maxlevel, count, partial_energy, old_dangles, logML, dangle_model;
508 double structure_energy, min_en, eprint;
514 length = strlen(sequence);
515 circular = is_circular;
516 struct_constrained = is_constrained;
518 struc = (char *) space(sizeof(char)*(length+1));
519 if (struct_constrained) strncpy(struc, structure, length);
521 /* do mfe folding to get fill arrays and get ground state energy */
522 /* in case dangles is neither 0 or 2, set dangles=2 while folding */
526 P = get_parameter_copy(parameters);
529 set_model_details(&md);
530 P = get_scaled_parameters(temperature, md);
533 logML = P->model_details.logML;
534 old_dangles = dangle_model = P->model_details.dangles;
535 with_gquad = P->model_details.gquad;
537 /* temporarily set dangles to 2 if necessary */
538 if((P->model_details.dangles != 0) && (P->model_details.dangles != 2))
539 P->model_details.dangles = 2;
542 turn = (cut_point<0) ? 3 : 0;
545 min_en = fold_par(sequence, struc, P, struct_constrained, circular);
546 export_circfold_arrays(&Fc, &FcH, &FcI, &FcM, &fM2, &f5, &c, &fML, &fM1, &indx, &ptype);
547 /* restore dangle model */
548 P->model_details.dangles = old_dangles;
549 /* re-evaluate in case we're using logML etc */
550 min_en = energy_of_circ_struct_par(sequence, struc, P, 0);
552 min_en = cofold_par(sequence, struc, P, struct_constrained);
555 export_cofold_arrays_gq(&f5, &c, &fML, &fM1, &fc, &ggg, &indx, &ptype);
557 export_cofold_arrays(&f5, &c, &fML, &fM1, &fc, &indx, &ptype);
560 /* restore dangle model */
561 P->model_details.dangles = old_dangles;
562 /* re-evaluate in case we're using logML etc */
563 min_en = energy_of_struct_par(sequence, struc, P, 0);
567 eprint = print_energy + min_en;
570 SeQ=costring(sequence);
571 fprintf(fp, "%s %6d %6d\n", SeQ, (int) (-0.1+100*min_en), delta);
575 S = encode_sequence(sequence, 0);
576 S1 = encode_sequence(sequence, 1);
578 /* Initialize ------------------------------------------------------------ */
584 /* Initialize the stack ------------------------------------------------- */
586 minimal_energy = (circular) ? Fc : f5[length];
587 threshold = minimal_energy + delta;
589 warn_user("energy range too high, limiting to reasonable value");
590 threshold = INF-EMAX;
592 Stack = make_list(); /* anchor */
593 Intervals = make_list(); /* initial state: */
594 interval = make_interval(1, length, 0); /* interval [1,length,0] */
595 push(Intervals, interval);
596 state = make_state(Intervals, NULL, partial_energy,0);
597 /* state->best_energy = minimal_energy; */
600 /* SolutionList stores the suboptimal structures found */
602 SolutionList = (SOLUTION *) space(max_sol*sizeof(SOLUTION));
604 /* end initialize ------------------------------------------------------- */
607 while (1) { /* forever, til nothing remains on stack */
609 maxlevel = (Stack->count > maxlevel ? Stack->count : maxlevel);
611 if (LST_EMPTY (Stack)) /* we are done! clean up and quit */
613 /* fprintf(stderr, "maxlevel: %d\n", maxlevel); */
615 lst_kill(Stack, free_state_node);
617 SolutionList[n_sol].structure = NULL; /* NULL terminate list */
620 /* sort structures by energy */
621 qsort(SolutionList, n_sol, sizeof(SOLUTION), compare);
623 if (fp) make_output(SolutionList, fp);
629 /* pop the last element ---------------------------------------------- */
631 state = pop(Stack); /* current state to work with */
633 if (LST_EMPTY(state->Intervals))
636 /* state has no intervals left: we got a solution */
639 structure = get_structure(state);
640 structure_energy = state->partial_energy / 100.;
643 structure_energy = (circular) ? energy_of_circ_struct_par(sequence, structure, P, 0) : energy_of_struct_par(sequence, structure, P, 0);
646 if ((double) (state->partial_energy / 100.) != structure_energy) {
647 fprintf(stderr, "%s %6.2f %6.2f\n", structure,
648 state->partial_energy / 100., structure_energy );
652 if (logML || (dangle_model==1) || (dangle_model==3)) { /* recalc energy */
653 structure_energy = (circular) ? energy_of_circ_struct_par(sequence, structure, P, 0) : energy_of_struct_par(sequence, structure, P, 0);
656 e = (int) ((structure_energy-min_en)*10. + 0.1); /* avoid rounding errors */
657 if (e>MAXDOS) e=MAXDOS;
658 density_of_states[e]++;
659 if (structure_energy>eprint) {
662 if (!subopt_sorted && fp) {
663 /* print and forget */
665 fprintf(fp, "%s %6.2f\n", structure, structure_energy);
668 /*make ampersand seperated output if 2 sequences*/
669 outstruc=costring(structure);
670 fprintf(fp, "%s %6.2f\n", outstruc, structure_energy);
677 if (n_sol+1 == max_sol) {
679 SolutionList = (SOLUTION *)
680 xrealloc(SolutionList, max_sol*sizeof(SOLUTION));
682 SolutionList[n_sol].energy = structure_energy;
683 SolutionList[n_sol++].structure = structure;
688 /* get (and remove) next interval of state to analyze */
690 interval = pop(state->Intervals);
692 scan_interval(interval->i, interval->j, interval->array_flag, state);
694 free_interval_node(interval); /* free the current interval */
697 free_state_node(state); /* free the current state */
698 } /* end of while (1) */
700 /* free arrays left over from cofold() */
702 (circular) ? free_arrays():free_co_arrays();
703 if (fp) { /* we've printed everything -- free solutions */
705 for (sol=SolutionList; sol->structure != NULL; sol++)
706 free(sol->structure);
716 scan_interval(int i, int j, int array_flag, STATE * state)
718 /* real backtrack routine */
720 /* array_flag = 0: trace back in f5-array */
721 /* array_flag = 1: trace back in fML-array */
722 /* array_flag = 2: trace back in repeat() */
723 /* array_flag = 3: trace back in fM1-array */
725 STATE *new_state, *temp_state;
726 INTERVAL *new_interval;
727 register int k, fi, cij;
729 register int dangle_model = P->model_details.dangles;
730 register int noGUclosure = P->model_details.noGUclosure;
731 register int noLP = P->model_details.noLP;
733 best_energy = best_attainable_energy(state); /* .. on remaining intervals */
736 if ((i > 1) && (!array_flag))
737 nrerror ("Error while backtracking!");
739 if (j < i + turn + 1 && SAME_STRAND(i,j)) { /* minimal structure element */
745 /* 13131313131313131313131313131313131313131313131313131313131313131313131 */
746 if (array_flag == 3 || array_flag == 1) {
747 /* array_flag = 3: interval i,j was generated during */
748 /* a multiloop decomposition using array fM1 in repeat() */
749 /* or in this block */
751 /* array_flag = 1: interval i,j was generated from a */
752 /* stack, bulge, or internal loop in repeat() */
753 /* or in this block */
756 fi = fM1[indx[j-1] + i] + P->MLbase;
758 fi = fML[indx[j-1] + i] + P->MLbase;
760 if ((fi + best_energy <= threshold)&&(SAME_STRAND(j-1,j))) {
761 /* no basepair, nibbling of 3'-end */
763 new_state = copy_state(state);
764 new_interval = make_interval(i, j-1, array_flag);
765 push(new_state->Intervals, new_interval);
766 new_state->partial_energy += P->MLbase;
767 /* new_state->best_energy = fi + best_energy; */
768 push(Stack, new_state);
771 type = ptype[indx[j]+i];
773 if (type) { /* i,j may pair */
776 element_energy = E_MLstem(type,
777 (((i > 1)&&(SAME_STRAND(i-1,i))) || circular) ? S1[i-1] : -1,
778 (((j < length)&&(SAME_STRAND(j,j+1))) || circular) ? S1[j+1] : -1,
781 element_energy = E_MLstem(type, -1, -1, P);
783 cij = c[indx[j] + i] + element_energy;
784 if (cij + best_energy <= threshold)
785 repeat(i, j, state, element_energy, 0);
787 } /* array_flag == 3 || array_flag == 1 */
789 /* 11111111111111111111111111111111111111111111111111111111111111111111111 */
791 if (array_flag == 1) {
792 /* array_flag = 1: interval i,j was generated from a */
793 /* stack, bulge, or internal loop in repeat() */
794 /* or in this block */
797 if ((SAME_STRAND(i-1,i))&&(SAME_STRAND(j,j+1))) { /*backtrack in FML only if multiloop is possible*/
798 for ( k = i+turn+1 ; k <= j-1-turn ; k++) {
799 /* Multiloop decomposition if i,j contains more than 1 stack */
801 type = ptype[indx[j]+k+1];
802 if (type==0) continue;
805 element_energy = E_MLstem(type,
806 (SAME_STRAND(i-1,i)) ? S1[k] : -1,
807 (SAME_STRAND(j,j+1)) ? S1[j+1] : -1,
810 element_energy = E_MLstem(type, -1, -1, P);
812 if (SAME_STRAND(k,k+1)) {
813 if (fML[indx[k]+i] + c[indx[j] + k+1] +
814 element_energy + best_energy <= threshold) {
815 temp_state = copy_state (state);
816 new_interval = make_interval (i, k, 1);
817 push (temp_state->Intervals, new_interval);
818 repeat(k+1, j, temp_state, element_energy, fML[indx[k]+i]);
819 free_state_node(temp_state);
825 stopp=(cut_point>0)? (cut_point-2):(length); /*if cut_point -1: k on cut, => no ml*/
826 stopp=MIN2(stopp, j-1-turn);
827 if (i>cut_point) stopp=j-1-turn;
828 else if (i==cut_point) stopp=0; /*not a multi loop*/
829 for (k = i ; k <= stopp; k++) {
830 /* Multiloop decomposition if i,j contains only 1 stack */
831 type = ptype[indx[j]+k+1];
832 if (type==0) continue;
835 element_energy = E_MLstem(type,
836 (SAME_STRAND(k-1,k)) ? S1[k] : -1,
837 (SAME_STRAND(j,j+1)) ? S1[j+1] : -1,
840 element_energy = E_MLstem(type, -1, -1, P);
842 element_energy += P->MLbase*(k-i+1);
844 if (c[indx[j]+k+1] + element_energy + best_energy <= threshold)
845 repeat(k+1, j, state, element_energy, 0);
847 } /* array_flag == 1 */
849 /* 2222222222222222222222222222222222222222222222222222222222222222222222 */
853 /* array_flag = 2: interval i,j was generated from a */
854 /* stack, bulge, or internal loop in repeat() */
856 repeat(i, j, state, 0, 0);
860 fprintf(stderr, "%d,%d", i, j);
861 fprintf(stderr, "Oops, no solution in repeat!\n");
867 /* 0000000000000000000000000000000000000000000000000000000000000000000000 */
869 if ((array_flag == 0) && !circular)
871 /* array_flag = 0: interval i,j was found while */
872 /* tracing back through f5-array and c-array */
873 /* or within this block */
875 if (f5[j-1] + best_energy <= threshold) {
876 /* no basepair, nibbling of 3'-end */
878 new_state = copy_state(state);
879 new_interval = make_interval(i, j-1 , 0);
880 push(new_state->Intervals, new_interval);
881 /* new_state->best_energy = f5[j-1] + best_energy; */
882 push(Stack, new_state);
885 for (k = j-turn-1; k > 1; k--) {
887 type = ptype[indx[j]+k];
888 if (type==0) continue;
892 element_energy = E_ExtLoop(type,
893 (SAME_STRAND(k-1,k)) ? S1[k-1] : -1,
894 ((j < length)&&(SAME_STRAND(j,j+1))) ? S1[j+1] : -1,
897 element_energy = E_ExtLoop(type, -1, -1, P);
899 if (!(SAME_STRAND(k,j)))/*&&(state->is_duplex==0))*/ {
900 element_energy+=P->DuplexInit;
901 /*state->is_duplex=1;*/
904 if (f5[k-1] + c[indx[j]+k] + element_energy + best_energy <= threshold)
906 temp_state = copy_state(state);
907 new_interval = make_interval(1, k-1, 0);
908 push(temp_state->Intervals, new_interval);
909 repeat(k, j, temp_state, element_energy, f5[k-1]);
910 free_state_node(temp_state);
913 type = ptype[indx[j]+1];
915 if (dangle_model && (j < length)&&(SAME_STRAND(j,j+1)))
916 element_energy = E_ExtLoop(type, -1, S1[j+1], P);
918 element_energy = E_ExtLoop(type, -1, -1, P);
920 if (!(SAME_STRAND(1,j))) element_energy+=P->DuplexInit;
922 if (c[indx[j]+1] + element_energy + best_energy <= threshold)
923 repeat(1, j, state, element_energy, 0);
925 }/* end array_flag == 0 && !circular*/
926 /* or do we subopt circular? */
927 else if(array_flag == 0){
929 /* if we've done everything right, we will never reach this case more than once */
930 /* right after the initilization of the stack with ([1,n], empty, 0) */
931 /* lets check, if we can have an open chain without breaking the threshold */
932 /* this is an ugly work-arround cause in case of an open chain we do not have to */
933 /* backtrack anything further... */
935 new_state = copy_state(state);
936 new_interval = make_interval(1,2,0);
937 push(new_state->Intervals, new_interval);
938 new_state->partial_energy = 0;
939 push(Stack, new_state);
941 /* ok, lets check if we can do an exterior hairpin without breaking the threshold */
942 /* best energy should be 0 if we are here */
943 if(FcH + best_energy <= threshold){
944 /* lets search for all exterior hairpin cases, that fit into our threshold barrier */
945 /* we use index k,l to avoid confusion with i,j index of our state... */
946 /* if we reach here, i should be 1 and j should be n respectively */
948 for (l=k+turn+1; l <= j; l++){
949 int kl, type, u, tmpE, no_close;
950 u = j-l + k-1; /* get the hairpin loop length */
953 kl = indx[l]+k; /* just confusing these indices ;-) */
955 no_close = ((type==3)||(type==4))&&noGUclosure;
959 /* now lets have a look at the hairpin energy */
962 strcpy(loopseq , sequence+l-1);
963 strncat(loopseq, sequence, k);
965 tmpE = E_Hairpin(u, type, S1[l+1], S1[k-1], loopseq, P);
967 if(c[kl] + tmpE + best_energy <= threshold){
968 /* what we really have to do is something like this, isn't it? */
969 /* we have to create a new state, with interval [k,l], then we */
970 /* add our loop energy as initial energy of this state and put */
971 /* the state onto the stack R... for further refinement... */
972 /* we also denote this new interval to be scanned in C */
973 new_state = copy_state(state);
974 new_interval = make_interval(k,l,2);
975 push(new_state->Intervals, new_interval);
976 /* hopefully we add this energy in the right way... */
977 new_state->partial_energy += tmpE;
978 push(Stack, new_state);
983 /* now lets see, if we can do an exterior interior loop without breaking the threshold */
984 if(FcI + best_energy <= threshold){
985 /* now we search for our exterior interior loop possibilities */
987 for (l=k+turn+1; l <= j; l++){
990 kl = indx[l]+k; /* just confusing these indices ;-) */
995 for (p = l+1; p < j ; p++){
998 if (u1+k-1>MAXLOOP) break;
999 qmin = u1+k-1+j-MAXLOOP;
1000 if(qmin<p+turn+1) qmin = p+turn+1;
1001 for(q = qmin; q <=j; q++){
1003 type_2 = rtype[ptype[indx[q]+p]];
1004 if(!type_2) continue;
1006 if(u1+u2>MAXLOOP) continue;
1007 tmpE = E_IntLoop(u1, u2, type, type_2, S1[l+1], S1[k-1], S1[p-1], S1[q+1], P);
1008 if(c[kl] + c[indx[q]+p] + tmpE + best_energy <= threshold){
1009 /* ok, similar to the hairpin stuff, we add new states onto the stack R */
1010 /* but in contrast to the hairpin decomposition, we have to add two new */
1011 /* intervals, enclosed by k,l and p,q respectively and we also have to */
1012 /* add the partial energy, that comes from the exterior interior loop */
1013 new_state = copy_state(state);
1014 new_interval = make_interval(k, l, 2);
1015 push(new_state->Intervals, new_interval);
1016 new_interval = make_interval(p,q,2);
1017 push(new_state->Intervals, new_interval);
1018 new_state->partial_energy += tmpE;
1019 push(Stack, new_state);
1026 /* and last but not least, we have a look, if we can do an exterior multiloop within the energy threshold */
1027 if(FcM <= threshold){
1028 /* this decomposition will be somehow more complicated...so lets see what we do here... */
1029 /* first we want to find out which split inidices we can use without exceeding the threshold */
1031 for (k=turn+1; k<j-2*turn; k++){
1032 tmpE2 = fML[indx[k]+1]+fM2[k+1]+P->MLclosing;
1033 if(tmpE2 + best_energy <= threshold){
1034 /* grmpfh, we have found a possible split index k so we have to split fM2 and fML now */
1035 /* lets do it first in fM2 anyway */
1036 for(l=k+turn+2; l<j-turn-1; l++){
1037 tmpE2 = fM1[indx[l]+k+1] + fM1[indx[j]+l+1];
1038 if(tmpE2 + fML[indx[k]+1] + P->MLclosing <= threshold){
1039 /* we've (hopefully) found a valid decomposition of fM2 and therefor we have all */
1040 /* three intervals for our new state to be pushed on stack R */
1041 new_state = copy_state(state);
1043 /* first interval leads for search in fML array */
1044 new_interval = make_interval(1, k, 1);
1045 push(new_state->Intervals, new_interval);
1047 /* next, we have the first interval that has to be traced in fM1 */
1048 new_interval = make_interval(k+1, l, 3);
1049 push(new_state->Intervals, new_interval);
1051 /* and the last of our three intervals is also one to be traced within fM1 array... */
1052 new_interval = make_interval(l+1, j, 3);
1053 push(new_state->Intervals, new_interval);
1055 /* mmh, we add the energy for closing the multiloop now... */
1056 new_state->partial_energy += P->MLclosing;
1057 /* next we push our state onto the R stack */
1058 push(Stack, new_state);
1061 /* else we search further... */
1064 /* ok, we have to decompose fML now... */
1068 } /* thats all folks for the circular case... */
1070 /* 44444444444444444444444444444444444444444444444444444444444444 */
1071 if (array_flag == 4) {
1072 /* array_flag = 4: interval i,j was found while */
1073 /* tracing back through fc-array smaller than than cut_point*/
1074 /* or within this block */
1076 if (fc[i+1] + best_energy <= threshold) {
1077 /* no basepair, nibbling of 5'-end */
1078 new_state = copy_state(state);
1079 new_interval = make_interval(i+1, j , 4);
1080 push(new_state->Intervals, new_interval);
1081 push(Stack, new_state);
1084 for (k = i+TURN+1; k < j; k++) {
1086 type = ptype[indx[k]+i];
1087 if (type==0) continue;
1091 element_energy = E_ExtLoop(type, (i > 1) ? S1[i-1]: -1, S1[k+1], P);
1092 else /* no dangles */
1093 element_energy = E_ExtLoop(type, -1, -1, P);
1095 if (fc[k+1] + c[indx[k]+i] + element_energy + best_energy <= threshold) {
1096 temp_state = copy_state(state);
1097 new_interval = make_interval(k+1,j, 4);
1098 push(temp_state->Intervals, new_interval);
1099 repeat(i, k, temp_state, element_energy, fc[k+1]);
1100 free_state_node(temp_state);
1103 type = ptype[indx[j]+i];
1106 element_energy = E_ExtLoop(type, (i>1) ? S1[i-1] : -1, -1, P);
1108 element_energy = E_ExtLoop(type, -1, -1, P);
1110 if (c[indx[cut_point-1]+i] + element_energy + best_energy <= threshold)
1111 repeat(i, cut_point-1, state, element_energy, 0);
1113 } /* array_flag == 4 */
1115 /*55555555555555555555555555555555555555555555555555555555555555555555555*/
1116 if (array_flag == 5) {
1117 /* array_flag = 5: interval cut_point=i,j was found while */
1118 /* tracing back through fc-array greater than cut_point */
1119 /* or within this block */
1121 if (fc[j-1] + best_energy <= threshold) {
1122 /* no basepair, nibbling of 3'-end */
1123 new_state = copy_state(state);
1124 new_interval = make_interval(i, j-1 , 5);
1125 push(new_state->Intervals, new_interval);
1126 push(Stack, new_state);
1129 for (k = j-TURN-1; k > i; k--) {
1131 type = ptype[indx[j]+k];
1132 if (type==0) continue;
1137 element_energy = E_ExtLoop(type, S1[k-1], (j < length) ? S1[j+1] : -1, P);
1139 element_energy = E_ExtLoop(type, -1, -1, P);
1141 if (fc[k-1] + c[indx[j]+k] + element_energy + best_energy <= threshold) {
1142 temp_state = copy_state(state);
1143 new_interval = make_interval(i, k-1, 5);
1144 push(temp_state->Intervals, new_interval);
1145 repeat(k, j, temp_state, element_energy, fc[k-1]);
1146 free_state_node(temp_state);
1149 type = ptype[indx[j]+i];
1152 element_energy = E_ExtLoop(type, -1, (j<length) ? S1[j+1] : -1, P);
1154 if (c[indx[j]+cut_point] + element_energy + best_energy <= threshold)
1155 repeat(cut_point, j, state, element_energy, 0);
1157 } /* array_flag == 5 */
1163 /*---------------------------------------------------------------------------*/
1166 repeat(int i, int j, STATE * state, int part_energy, int temp_energy)
1168 /* routine to find stacks, bulges, internal loops and multiloops */
1169 /* within interval closed by basepair i,j */
1172 INTERVAL *new_interval;
1174 register int k, p, q, energy, new;
1176 register int no_close, type, type_2;
1178 int dangle_model = P->model_details.dangles;
1179 int noLP = P->model_details.noLP;
1180 int noGUclosure = P->model_details.noGUclosure;
1182 type = ptype[indx[j]+i];
1183 if (type==0) fprintf(stderr, "repeat: Warning: %d %d can't pair\n", i,j);
1185 no_close = (((type == 3) || (type == 4)) && noGUclosure);
1187 if (noLP) /* always consider the structure with additional stack */
1188 if ((i+turn+2<j) && ((type_2 = ptype[indx[j-1]+i+1]))) {
1189 new_state = copy_state(state);
1190 make_pair(i, j, new_state);
1191 make_pair(i+1, j-1, new_state);
1192 new_interval = make_interval(i+1, j-1, 2);
1193 push(new_state->Intervals, new_interval);
1194 if(SAME_STRAND(i,i+1) && SAME_STRAND(j-1,j))
1195 energy = E_IntLoop(0, 0, type, rtype[type_2],S1[i+1],S1[j-1],S1[i+1],S1[j-1], P);
1197 new_state->partial_energy += part_energy;
1198 new_state->partial_energy += energy;
1199 /* new_state->best_energy = new + best_energy; */
1200 push(Stack, new_state);
1201 if (i==1 || state->structure[i-2]!='(' || state->structure[j]!=')')
1202 /* adding a stack is the only possible structure */
1206 best_energy += part_energy; /* energy of current structural element */
1207 best_energy += temp_energy; /* energy from unpushed interval */
1209 for (p = i + 1; p <= MIN2 (j-2-turn, i+MAXLOOP+1); p++) {
1210 int minq = j-i+p-MAXLOOP-2;
1211 if (minq<p+1+turn) minq = p+1+turn;
1212 for (q = j - 1; q >= minq; q--) {
1213 if ((noLP) && (p==i+1) && (q==j-1)) continue;
1215 type_2 = ptype[indx[q]+p];
1216 if (type_2==0) continue;
1219 if (no_close||(type_2==3)||(type_2==4))
1220 if ((p>i+1)||(q<j-1)) continue; /* continue unless stack */
1222 if (SAME_STRAND(i,p) && SAME_STRAND(q,j)) {
1223 energy = E_IntLoop(p-i-1, j-q-1, type, rtype[type_2],
1224 S1[i+1],S1[j-1],S1[p-1],S1[q+1], P);
1226 new = energy + c[indx[q]+p];
1228 if (new + best_energy <= threshold) {
1229 /* stack, bulge, or interior loop */
1231 new_state = copy_state(state);
1232 make_pair(i, j, new_state);
1233 make_pair(p, q, new_state);
1235 new_interval = make_interval(p, q, 2);
1236 push(new_state->Intervals, new_interval);
1237 new_state->partial_energy += part_energy;
1238 new_state->partial_energy += energy;
1239 /* new_state->best_energy = new + best_energy; */
1240 push(Stack, new_state);
1242 }/*end of if block */
1243 } /* end of q-loop */
1244 } /* end of p-loop */
1246 if (!SAME_STRAND(i,j)) { /*look in fc*/
1250 element_energy = E_ExtLoop(rt, (SAME_STRAND(j-1,j)) ? S1[j-1] : -1, (SAME_STRAND(i,i+1)) ? S1[i+1] : -1, P);
1252 element_energy = E_ExtLoop(rt, -1, -1, P);
1254 if (fc[i+1] + fc[j-1] +element_energy + best_energy <= threshold)
1256 INTERVAL *interval1, *interval2;
1258 new_state = copy_state(state);
1259 interval1 = make_interval(i+1, cut_point-1, 4);
1260 interval2 = make_interval(cut_point, j-1, 5);
1261 if (cut_point-i < j-cut_point) { /* push larger interval first */
1262 push(new_state->Intervals, interval1);
1263 push(new_state->Intervals, interval2);
1265 push(new_state->Intervals, interval2);
1266 push(new_state->Intervals, interval1);
1268 make_pair(i, j, new_state);
1269 new_state->partial_energy += part_energy;
1270 new_state->partial_energy += element_energy;
1271 push(Stack, new_state);
1278 for (k = i + 1 + turn; k <= j - 2 - turn; k++) {
1279 /* multiloop decomposition */
1281 element_energy = mm;
1283 element_energy = E_MLstem(rt, S1[j-1], S1[i+1], P) + mm;
1285 element_energy = E_MLstem(rt, -1, -1, P) + mm;
1287 if ((fML[indx[k] + i+1] + fM1[indx[j-1] + k+1] +
1288 element_energy + best_energy) <= threshold)
1290 INTERVAL *interval1, *interval2;
1292 new_state = copy_state(state);
1293 interval1 = make_interval(i+1, k, 1);
1294 interval2 = make_interval(k+1, j-1, 3);
1295 if (k-i+1 < j-k-2) { /* push larger interval first */
1296 push(new_state->Intervals, interval1);
1297 push(new_state->Intervals, interval2);
1299 push(new_state->Intervals, interval2);
1300 push(new_state->Intervals, interval1);
1302 make_pair(i, j, new_state);
1303 new_state->partial_energy += part_energy;
1304 new_state->partial_energy += element_energy;
1305 /* new_state->best_energy = fML[indx[k] + i+1] + fM1[indx[j-1] + k+1]
1306 + element_energy + best_energy; */
1307 push(Stack, new_state);
1309 } /* end of k-loop */
1312 if (SAME_STRAND(i,j)) {
1313 if (no_close) element_energy = FORBIDDEN;
1315 element_energy = E_Hairpin(j-i-1, type, S1[i+1], S1[j-1], sequence+i-1, P);
1316 if (element_energy + best_energy <= threshold) {
1317 /* hairpin structure */
1319 new_state = copy_state(state);
1320 make_pair(i, j, new_state);
1321 new_state->partial_energy += part_energy;
1322 new_state->partial_energy += element_energy;
1323 /* new_state->best_energy =
1324 hairpin[unpaired] + element_energy + best_energy; */
1325 push(Stack, new_state);
1329 best_energy -= part_energy;
1330 best_energy -= temp_energy;
1334 PRIVATE char *costring(char *string)
1338 len = strlen(string);
1339 ctmp = (char *)space((len+2) * sizeof(char));
1340 /* first sequence */
1342 (void) strncpy(ctmp, string, len);
1345 (void) strncpy(ctmp, string, cut_point-1);
1347 ctmp[cut_point-1] = '&';
1348 /* second sequence */
1349 (void) strcat(ctmp, string+cut_point-1);
1354 /*---------------------------------------------------------------------------*/
1355 /* Well, that is the end!----------------------------------------------------*/
1356 /*---------------------------------------------------------------------------*/