1 /* Last changed Time-stamp: <2010-06-30 17:24:43 wolfgang> */
3 compute potentially pseudoknotted structures of an RNA sequence
9 library containing the function used in PKplex
10 it generates pseudoknotted structures by letting the sequence form a duplex structure with itself
20 #include "energy_par.h"
21 #include "fold_vars.h"
24 #include "loop_energies.h"
34 PRIVATE void update_dfold_params(void);
35 PRIVATE void duplexfold_XS(const char *s1, int **access_s1, const int threshold, const int max_interaction_length);
36 PRIVATE char *backtrack_XS(int kk, int ll, const int ii, const int jj, const int max_interaction_length);
37 PRIVATE void make_ptypes(const char *structure);
39 PRIVATE paramT *P = NULL;
40 PRIVATE int ***c3 = NULL; /* energy array used in duplexfold */
41 PRIVATE short *S1 = NULL, *SS1 = NULL;
43 PRIVATE char *ptype = NULL; /* precomputed array of pair types */
44 PRIVATE int *indx = NULL; /* index for moving in the triangle matrices ptype[] */
46 PUBLIC dupVar *PlexHits = NULL;
47 PUBLIC int PlexHitsArrayLength = 100;
48 PUBLIC int NumberOfHits = 0;
49 PUBLIC int verbose = 0;
51 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
53 PRIVATE void duplexfold_XS(const char *s1,
56 const int max_interaction_length){
58 int i, j, k, l, p, q, Emin=INF, l_min=0, k_min=0, j_min=0;
59 int type, type2, type3, E, tempK;
61 int length = (int) strlen(s1);
64 c3 = (int ***) space(sizeof(int **) * (length));
65 for (i=0; i<length; i++){
66 c3[i] = (int **) space(sizeof(int *) * max_interaction_length);
67 for (j=0; j<max_interaction_length; j++) {
68 c3[i][j]=(int *) space(sizeof(int) * max_interaction_length);
80 /* init all matrix elements to INF */
81 for (j=0; j < length; j++){
82 for(k=0;k<max_interaction_length;k++){
83 for(l=0;l<max_interaction_length;l++){
88 char string[10] = {'\0'};
89 /* matrix starting values for (i,j)-basepairs */
90 for(j=i+4; j<n1-10; j++) {
91 type=ptype[indx[j]+i];
93 c3[j-11][max_interaction_length-1][0] = P->DuplexInit;
94 c3[j-11][max_interaction_length-1][0] += E_Hairpin(j-i-1, type, SS1[i+1], SS1[j-1], string, P);
95 /* c3[j-11][max_interaction_length-1][0] += E_ExtLoop(type, SS1[i+1], SS1[j-1], P); */
96 /* c3[j-11][max_interaction_length-1][0] += E_ExtLoop(rtype[type], SS1[j-1], SS1[i+1], P); */
100 int i_pos_begin=MAX2(9, i-max_interaction_length); /* why 9 ??? */
103 for(k=i-1; k>i_pos_begin; k--) {
104 tempK=max_interaction_length-i+k-1;
105 for(l = i + 5; l < n1 - 9; l++) { /* again, why 9 less then the sequence length ? */
106 type2 = ptype[indx[l] + k];
108 for(p = k + 1; (p <= i) && (p <= k + MAXLOOP + 1); p++){
109 for(q = l - 1; (q>=i+4) && (q >= l - MAXLOOP - 1); q--){
110 if (p - k + l - q - 2 > MAXLOOP) break;
111 type3 = ptype[indx[q] + p];
113 E = E_IntLoop(p - k - 1, l - q - 1, type2, rtype[type3], SS1[k + 1], SS1[l - 1], SS1[p - 1], SS1[q + 1], P);
114 for(j = MAX2(i + 4, l - max_interaction_length + 1); j <= q; j++){
115 type = ptype[indx[j]+i];
117 c3[j-11][tempK][l-j] = MIN2(c3[j-11][tempK][l-j], c3[j-11][max_interaction_length-i+p-1][q-j]+E);
125 /* read out matrix minimum */
126 for(j=i+4; j<n1-10; j++) {
127 type=ptype[indx[j]+i];
129 int j_pos_end=MIN2(n1-9,j+max_interaction_length);
130 for (k=i-1; k>i_pos_begin; k--) {
131 for (l=j+1; l<j_pos_end; l++) {
132 type2=ptype[indx[l]+k];
133 if (!type2) continue;
134 E = c3[j-11][max_interaction_length-i+k-1][l-j];
135 /* printf("[%d,%d][%d,%d]\t%6.2f\t%6.2f\t%6.2f\n", i, k, l, j, E/100., access_s1[i-k+1][i]/100., access_s1[l-j+1][l]/100.); */
136 E+=access_s1[i-k+1][i]+access_s1[l-j+1][l];
137 E+=E_ExtLoop(type2,((k>i_pos_begin+1)? SS1[k-1]:-1),((l<j_pos_end-1)? SS1[l+1]:-1),P);
138 E+=E_ExtLoop(rtype[type], SS1[j-1], SS1[i+1], P);
140 Emin=E; k_min=k; l_min=l;
147 if(Emin < threshold){
148 struc = backtrack_XS(k_min, l_min, i, j_min, max_interaction_length);
150 /* lets take care of the dangles */
151 /* find best combination */
152 int dx_5, dx_3, dy_5, dy_3,dGx,dGy,bonus_x, bonus_y;
153 dx_5 = dx_3 = dy_5 = dy_3 = dGx = dGy = bonus_x = bonus_y = 0;
154 dGx = access_s1[i-k_min+1][i];
155 dGy = access_s1[l_min-j_min+1][l_min];
156 PlexHits[NumberOfHits].tb=k_min -10 -dx_5;
157 PlexHits[NumberOfHits].te=i -10 + dx_3;
158 PlexHits[NumberOfHits].qb=j_min -10 - dy_5;
159 PlexHits[NumberOfHits].qe=l_min -10 + dy_3;
160 PlexHits[NumberOfHits].ddG=(double) Emin * 0.01;
161 PlexHits[NumberOfHits].dG1=(double) dGx*0.01 ;
162 PlexHits[NumberOfHits].dG2=(double) dGy*0.01 ;
163 PlexHits[NumberOfHits].energy = PlexHits[NumberOfHits].ddG - PlexHits[NumberOfHits].dG1 - PlexHits[NumberOfHits].dG2;
164 PlexHits[NumberOfHits].structure = struc;
167 if(PlexHits[NumberOfHits].energy * 100 < threshold){
168 if (verbose) printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", PlexHits[NumberOfHits].structure, PlexHits[NumberOfHits].tb, PlexHits[NumberOfHits].te, PlexHits[NumberOfHits].qb, PlexHits[NumberOfHits].qe, PlexHits[NumberOfHits].ddG, PlexHits[NumberOfHits].energy, PlexHits[NumberOfHits].dG1, PlexHits[NumberOfHits].dG2);
170 if(NumberOfHits==PlexHitsArrayLength-1){
171 PlexHitsArrayLength*=2;
172 PlexHits = (dupVar *) xrealloc(PlexHits,sizeof(dupVar)*PlexHitsArrayLength);
178 for (i=0; i<(n1-20); i++) {
179 for (j=0; j<max_interaction_length; j++) {
188 PRIVATE char *backtrack_XS(int k, int l, const int i, const int j, const int max_interaction_length) {
189 /* backtrack structure going backwards from i, and forwards from j
190 return structure in bracket notation with & as separator */
191 int p, q, type, type2, E, traced, i0, j0;
192 char *st1, *st2, *struc;
193 st1 = (char *) space(sizeof(char)*(i-k+2));
195 st2 = (char *) space(sizeof(char)*(l-j+2));
199 while (k<=i && l>=j) {
200 E = c3[j-11][max_interaction_length-i+k-1][l-j]; traced=0;
204 type=ptype[indx[l]+k];
205 if (!type) nrerror("backtrack failed in fold duplex bli");
206 for (p=k+1; p<=i; p++) {
207 for (q=l-1; q>=j; q--) {
209 if (p-k+l-q-2>MAXLOOP) break;
210 type2=ptype[indx[q]+p];
211 if (!type2) continue;
212 LE = E_IntLoop(p-k-1, l-q-1, type, rtype[type2], SS1[k+1], SS1[l-1], SS1[p-1], SS1[q+1], P);
213 if (E == c3[j-11][max_interaction_length-i+p-1][q-j]+LE) {
222 E-=E_ExtLoop(type2, ((k<i)?SS1[k+1]:-1), ((l>j-1)? SS1[l-1]:-1), P);
224 if (E != P->DuplexInit) {
225 nrerror("backtrack failed in fold duplex bal");
229 struc = (char *) space(k-i0+1+j0-l+1+2);
231 for (p=0; p<=i-i0; p++){
232 if (!st1[p]) st1[p] = '.';
235 for (p=0; p<=j0-j; p++) {
244 free(st1); free(st2);
248 PUBLIC dupVar **PKLduplexfold_XS( const char *s1,
251 const int max_interaction_length,
254 if ((!P) || (fabs(P->temperature - temperature)>1e-6))
255 update_dfold_params();
257 n1 = (int) strlen(s1);
258 S1 = encode_sequence(s1, 0);
259 SS1 = encode_sequence(s1, 1);
262 ptype = (char *) space(sizeof(char)*((n1*(n1+1))/2+2));
266 duplexfold_XS(s1,access_s1,threshold, max_interaction_length);
268 free(indx); free(ptype);
272 /*---------------------------------UTILS------------------------------------------*/
274 PRIVATE void update_dfold_params(void){
276 P = scale_parameters();
280 /*---------------------------------------------------------------------------*/
282 PRIVATE void make_ptypes(const char *structure) {
286 for (k=1; k<n-TURN; k++)
287 for (l=1; l<=2; l++) {
288 int type,ntype=0,otype=0;
289 i=k; j = i+TURN+l; if (j>n) continue;
290 type = pair[S1[i]][S1[j]];
291 while ((i>=1)&&(j<=n)) {
292 if ((i>1)&&(j<n)) ntype = pair[S1[i-1]][S1[j+1]];
293 if (noLonelyPairs && (!otype) && (!ntype))
294 type = 0; /* i.j can only form isolated pairs */
295 ptype[indx[j]+i] = (char) type;