2 search for sequences that
3 fold into a given target structure
8 /* Last changed Time-stamp: <2009-03-19 10:43:32 ivo> */
10 #define TDIST 0 /* use tree distance */
11 #define PF 1 /* include support for partiton function */
20 #include "part_func.h"
24 #include "dist_vars.h"
26 #include "RNAstruct.h"
29 #include "fold_vars.h"
32 static char rcsid[] = "$Id: inverse.c,v 1.13 2009/03/19 09:45:28 ivo Exp $";
34 #define PRIVATE static
35 PRIVATE double adaptive_walk(char *start, const char *target);
36 PRIVATE void shuffle(int *list, int len);
37 PRIVATE void make_start(char* start, const char *structure);
38 PRIVATE void make_ptable(const char *structure, int *table);
39 PRIVATE void make_pairset(void);
40 PRIVATE double mfe_cost(const char *, char*, const char *);
41 PRIVATE double pf_cost(const char *, char *, const char *);
42 PRIVATE char *aux_struct(const char* structure );
44 /* for backward compatibility, make sure symbolset can hold 20 characters */
45 PRIVATE char default_alpha[21] = "AUGC";
46 PUBLIC char *symbolset = default_alpha;
47 PUBLIC int give_up = 0;
48 PUBLIC float final_cost = 0; /* when to stop inverse_pf_fold */
49 PUBLIC int inv_verbose=0; /* print out substructure on which inverse_fold() fails */
51 PRIVATE char pairset[2*MAXALPHA+1];
52 PRIVATE int base, npairs;
55 /*-------------------------------------------------------------------------*/
56 PRIVATE int fold_type;
62 PRIVATE double adaptive_walk(char *start, const char *target)
65 printf("%s\n%s %c\n", start, target, backtrack_type );
68 int i,j,p,tt,w1,w2, n_pos, len, flag;
70 char *string, *string2, *cstring, *structure, *struct2;
71 int *mut_pos_list, mut_sym_list[MAXALPHA+1], mut_pair_list[2*MAXALPHA+1];
72 int *w1_list, *w2_list, mut_position, symbol, bp;
73 int *target_table, *test_table;
75 double cost, current_cost, ccost2;
76 double (*cost_function)(const char *, char *, const char *);
79 if (strlen(target)!=len) {
80 fprintf(stderr, "%s\n%s\n", start, target);
81 nrerror("adaptive_walk: start and target have unequal length");
83 string = (char *) space(sizeof(char)*(len+1));
84 cstring = (char *) space(sizeof(char)*(len+1));
85 string2 = (char *) space(sizeof(char)*(len+1));
86 structure = (char *) space(sizeof(char)*(len+1));
87 struct2 = (char *) space(sizeof(char)*(len+1));
88 mut_pos_list = (int *) space(sizeof(int)*len);
89 w1_list = (int *) space(sizeof(int)*len);
90 w2_list = (int *) space(sizeof(int)*len);
91 target_table = (int *) space(sizeof(int)*len);
92 test_table = (int *) space(sizeof(int)*len);
94 make_ptable(target, target_table);
96 for (i=0; i<base; i++) mut_sym_list[i] = i;
97 for (i=0; i<npairs; i++) mut_pair_list[i] = i;
100 string[i] = (islower(start[i]))?toupper(start[i]):start[i];
103 if (fold_type==0) cost_function = mfe_cost;
104 else cost_function = pf_cost;
106 cost = cost_function(string, structure, target);
108 if (fold_type==0) ccost2=cost2;
109 else { ccost2 = -1.; cost2=0; }
111 strcpy(cstring, string);
117 if (fold_type==0) { /* min free energy fold */
118 make_ptable(structure, test_table);
119 for (j=w1=w2=flag=0; j<len; j++)
120 if ((tt=target_table[j])!=test_table[j]) {
121 if ((tt<j)&&(isupper(start[j]))) w1_list[w1++] = j; /* incorrectly paired */
122 if ((flag==0)&&(j>0))
123 if ((target_table[j-1]<j-1)&&isupper(start[j-1]))
124 w2_list[w2++] = j-1; /* adjacent to incorrect position */
125 if (w2>1) if (w2_list[w2-2]==w2_list[w2-1]) w2--;
129 if (flag==1) if ((tt<j)&&isupper(start[j]))
130 w2_list[w2++] = j; /* adjacent to incorrect position */
133 shuffle(w1_list, w1);
134 shuffle(w2_list, w2);
135 for (j=n_pos=0; j<w1; j++) mut_pos_list[n_pos++] = w1_list[j];
136 for (j=0; j<w2; j++) mut_pos_list[n_pos++] = w2_list[j];
137 } else { /* partition_function */
138 for (j=n_pos=0; j<len; j++) if (isupper(start[j]))
139 if (target_table[j]<=j) mut_pos_list[n_pos++] = j;
140 shuffle(mut_pos_list, n_pos);
144 for (mut_position=0; mut_position<n_pos; mut_position++){
146 strcpy(string, cstring);
147 shuffle(mut_sym_list, base);
148 shuffle(mut_pair_list, npairs);
150 i = mut_pos_list[mut_position];
152 if (target_table[i]<0) /* unpaired base */
153 for (symbol=0;symbol<base;symbol++) {
156 symbolset[mut_sym_list[symbol]]) continue;
158 string[i] = symbolset[mut_sym_list[symbol]];
160 cost = cost_function(string, structure, target);
162 if ( cost + DBL_EPSILON < current_cost ) break;
163 if (( cost == current_cost)&&(cost2<ccost2)){
164 strcpy(string2, string);
165 strcpy(struct2, structure);
169 else /* paired base */
170 for (bp=0; bp<npairs; bp++) {
172 p = mut_pair_list[bp]*2;
173 if ((cstring[i] == pairset[p]) &&
174 (cstring[j] == pairset[p+1]))
176 string[i] = pairset[p];
177 string[j] = pairset[p+1];
179 cost = cost_function(string, structure, target);
181 if ( cost < current_cost ) break;
182 if (( cost == current_cost)&&(cost2<ccost2)){
183 strcpy(string2, string);
184 strcpy(struct2, structure);
189 if ( cost < current_cost ) {
190 strcpy(cstring, string);
198 if ((current_cost>0)&&(cont==0)&&(string2[0])) {
199 /* no mutation that decreased cost was found,
200 but the the sequence in string2 decreases cost2 while keeping
202 strcpy(cstring, string2);
203 strcpy(structure, struct2);
208 for (i=0; i<len; i++) if (isupper(start[i])) start[i]=cstring[i];
211 if (fold_type==0) { free_tree(T0); T0=NULL; }
227 /*-------------------------------------------------------------------------*/
229 /* shuffle produces a ronaom list by doing len exchanges */
230 PRIVATE void shuffle(int *list, int len)
234 for (i=0;i<len;i++) {
236 rn = i + (int) (urn()*(len-i)); /* [i..len-1] */
237 /* swap element i and rn */
244 /*-------------------------------------------------------------------------*/
246 PRIVATE void make_ptable(const char *structure, int *table)
252 stack = (int *) space(sizeof(int)*(strlen(structure)+1));
254 for (i=0; i<strlen(structure); i++) {
255 switch (structure[i]) {
265 fprintf(stderr, "%s\n", structure);
266 nrerror("unbalanced brackets in make_ptable");
274 fprintf(stderr, "%s\n", structure);
275 nrerror("unbalanced brackets in make_ptable");
280 /*-------------------------------------------------------------------------*/
283 strncpy(wstruct, structure+i, j-i+1); \
284 wstruct[j-i+1]='\0'; \
285 strncpy(wstring, string+i, j-i+1); \
286 wstring[j-i+1]='\0'; \
287 dist=adaptive_walk(wstring, wstruct); \
288 strncpy(string+i, wstring, j-i+1); \
289 if ((dist>0)&&(give_up)) goto adios
291 PUBLIC float inverse_fold(char *start, char *structure)
293 int i, j, jj, len, o;
295 char *string, *wstring, *wstruct, *aux;
298 nc2 = j = o = fold_type = 0;
300 len = strlen(structure);
301 if (strlen(start)!=len) {
302 fprintf(stderr, "%s\n%s\n", start, structure);
303 nrerror("inverse_fold: start and structure have unequal length");
305 string = (char *) space(len+1);
306 wstring = (char *) space(len+1);
307 wstruct = (char *) space(len+1);
308 pt = (int *) space(sizeof(int)*(len+2));
311 aux = aux_struct(structure);
312 strcpy(string, start);
314 make_start(string, structure);
316 make_ptable(structure, pt);
319 while ((j<len)&&(structure[j]!=')')) {
320 if (aux[j]=='[') o++;
321 if (aux[j]==']') o--;
325 while ((i>0) && structure[--i]!='(');
326 if (structure[i]=='.') { /* no pair found -> open chain */
330 if (aux[i]!='[') { i--; j++;}
334 while (aux[--i]!='[');
335 while (aux[++j]!=']');
341 while (aux[++j]=='.');
342 while ((i>=0)&&(aux[i]=='.')) i--;
344 backtrack_type = (o==0)? 'F' : 'M';
345 if (j-jj>8) { WALK((i+1),(jj)); }
347 while ((i>=0) &&(aux[i]==']')) {
349 while ((i>=0)&&(aux[i]=='.')) i--;
357 if ((dist>0)&&(inv_verbose)) printf("%s\n%s\n", wstring, wstruct);
358 /*if ((dist==0)||(give_up==0))*/ strcpy(start, string);
359 free(wstring); free(wstruct);
360 free(string); free(aux);
362 /* if (dist>0) printf("%3d \n", nc2); */
366 /*-------------------------------------------------------------------------*/
368 PUBLIC float inverse_pf_fold(char *start, char *target)
374 if (dangles!=0) dangles=2;
376 update_fold_params(); /* make sure there is a valid pair matrix */
378 make_start(start, target);
381 dist = adaptive_walk(start, target);
383 return (dist+final_cost);
386 /*-------------------------------------------------------------------------*/
388 PRIVATE void make_start(char* start, const char *structure)
390 int i,j,k,l,r,length;
391 int *table, *S, sym[MAXALPHA], ss;
393 length=strlen(start);
394 table = (int *) space(sizeof(int)*length);
395 S = (int *) space(sizeof(int)*length);
397 make_ptable(structure, table);
398 for (i=0; i<strlen(start); i++) S[i] = encode_char(toupper(start[i]));
399 for (i=0; i<strlen(symbolset); i++) sym[i] = i;
401 for (k=0; k<length; k++) {
402 if (table[k]<k) continue;
403 if (((urn()<0.5) && isupper(start[k])) ||
404 islower(start[table[k]])) {
410 if (!pair[S[i]][S[j]]) { /* make a valid pair by mutating j */
411 shuffle(sym, (int) base);
412 for (l=0; l<base; l++) {
413 ss = encode_char(symbolset[sym[l]]);
414 if (pair[S[i]][ss]) break;
416 if (l==base) { /* nothing pairs start[i] */
417 r = 2*int_urn(0, npairs-1);
418 start[i] = pairset[r];
419 start[j] = pairset[r+1];
420 } else start[j] = symbolset[sym[l]];
427 /*---------------------------------------------------------------------------*/
429 PRIVATE void make_pairset(void)
435 base = strlen(symbolset);
437 for (i=0; i< base; i++) sym[i] = encode_char(symbolset[i]);
439 for (i=npairs=0; i< base; i++)
440 for (j=0; j<base; j++)
441 if (pair[sym[i]][sym[j]]) {
442 pairset[npairs++] = symbolset[i];
443 pairset[npairs++] = symbolset[j];
446 if (npairs==0) nrerror("No pairs in this alphabet!");
448 /*---------------------------------------------------------------------------*/
450 PRIVATE double mfe_cost(const char *string, char *structure, const char *target)
456 double energy, distance;
458 if (strlen(string)!=strlen(target)) {
459 fprintf(stderr, "%s\n%s\n", string, target);
460 nrerror("unequal length in mfe_cost");
462 energy = fold(string, structure);
465 xstruc = expand_Full(target);
466 T0=make_tree(xstruc);
470 xstruc = expand_Full(structure);
471 T1=make_tree(xstruc);
472 distance = tree_edit_distance(T0,T1);
476 distance = (double) bp_distance(target, structure);
478 cost2 = energy_of_structure(string, target, 0) - energy;
479 return (double) distance;
481 /*---------------------------------------------------------------------------*/
483 PRIVATE double pf_cost(const char *string, char *structure, const char *target)
488 f = pf_fold(string, structure);
489 e = energy_of_structure(string, target, 0);
490 return (double) (e-f-final_cost);
492 nrerror("this version not linked with pf_fold");
497 /*---------------------------------------------------------------------------*/
499 PRIVATE char *aux_struct(const char* structure )
505 string = (char *) space(sizeof(char)*(strlen(structure)+1));
506 match_paren = (int *) space(sizeof(int)*(strlen(structure)/2+1));
507 strcpy(string, structure);
518 while ((string[p+1]==')')&&(match_paren[o-1]==match_paren[o]-1)) {
523 string[match_paren[o]]='[';
527 nrerror("Junk in structure at aux_structure\n");