1 /* Last changed Time-stamp: <2007-10-30 14:06:22 htafer> */
3 compute the duplex structure of two RNA strands,
4 allowing only inter-strand base pairs.
5 see cofold() for computing hybrid structures without
14 library containing the function used in rnaplex
15 the program rnaplex uses the following function
16 Lduplexfold: finds high scoring segments
17 it stores the end-position of these segments in an array
18 and call then for each of these positions the duplexfold function
19 which allows to make backtracking for each of the high scoring position
20 It allows to find suboptimal partially overlapping (depends on a a parameter)
21 duplexes between a long RNA and a shorter one.
22 Contrarly to RNAduplex, the energy model is not in E~log(N),
23 where N is the length of an interial loop but used an affine model,
24 where the extension and begin parameter are fitted to the energy
25 parameter used by RNAduplex. This allows to check for duplex between a short RNA(20nt)
26 and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
28 The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
29 given threshold this value is stored in an array. When the alignment score goes
30 then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
31 to find all non-overlapping high-scoring segments.
32 For more information check "durbin, biological sequence analysis"
42 #include "energy_par.h"
43 #include "fold_vars.h"
49 #include "loop_energies.h"
51 static char rcsid[] UNUSED = "$Id: plex.c,v 1.14 2007/06/12 12:50:16 htafer Exp $";
52 /* int subopt_sorted=0; */
55 #define PRIVATE static
57 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
58 #define NEW_NINIO 1 /* new asymetry penalty */
59 #define ARRAY 32 /*array size*/
61 #define MINPSCORE -2 * UNIT
62 PRIVATE void encode_seqs(const char *s1, const char *s2);
63 PRIVATE short *encode_seq(const char *seq);
64 /* PRIVATE void my_encode_seq(const char *s1, const char *s2); */
65 PRIVATE void update_dfold_params(void);
66 /* PRIVATE int compare(const void *sub1, const void *sub2); */
67 /* PRIVATE int compare_XS(const void *sub1, const void *sub2); */
68 /* PRIVATE duplexT* backtrack(int threshold, const int extension_cost); */
69 /* static void print_struct(duplexT const *dup); */
71 /* PRIVATE int print_struct(duplexT const *dup); */
72 /* PRIVATE int get_rescaled_energy(duplexT const *dup); */
74 PRIVATE char * backtrack_C(int i, int j, const int extension_cost, const char * structure, int *E);
75 PRIVATE void find_max_C(const int *position, const int *position_j, const int delta, const int threshold, const int constthreshold, const int length, const char *s1, const char *s2, const int extension_cost, const int fast, const char* structure);
76 PRIVATE void plot_max_C(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int extension_cost, const int fast, const char* structure);
79 PRIVATE char * backtrack_CXS(int i, int j, const int** access_s1, const int** access_s2, const char* structure, int *E);
80 PRIVATE void find_max_CXS(const int *position, const int *position_j,const int delta, const int threshold, const int constthreshold, const int alignment_length, const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int fast,const char* structure);
81 PRIVATE void plot_max_CXS(const int max, const int max_pos, const int max_pos_j, const int alignment_length,const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int fast, const char* structure);
82 PRIVATE duplexT duplexfold_C(const char *s1, const char *s2, const int extension_cost, const char* structure);
83 PRIVATE duplexT duplexfold_CXS(const char *s1, const char *s2,const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold ,const char* structure);
88 #define MAXSECTORS 500 /* dimension for a backtrack array */
89 #define LOCALITY 0. /* locality parameter for base-pairs */
91 #define MIN2(A, B) ((A) < (B) ? (A) : (B))
92 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
94 PRIVATE paramT *P = NULL;
95 PRIVATE int **c = NULL;/*, **in, **bx, **by;*/ /* energy array used in duplexfold */
96 /* PRIVATE int ****c_XS; */
97 PRIVATE int **lc = NULL, **lin = NULL, **lbx = NULL, **lby = NULL, **linx = NULL, **liny = NULL; /* energy array used in Lduplexfold
98 this arrays contains only 3 columns
99 In this way I reduce my memory use and
100 I can make most of my computation and
101 accession in the computer cash
102 which is the main performance boost*/
106 /*PRIVATE int last_cell; this variable is the last_cell containing
107 the information about the alignment
108 useful only if there is an alignment
109 which extends till the last nucleotide of
112 PRIVATE short *S1 = NULL, *SS1 = NULL, *S2 = NULL, *SS2 = NULL;/*contains the sequences*/
113 PRIVATE int n1,n2; /* sequence lengths */
114 PRIVATE int n3, n4; /*sequence length for the duplex*/;
115 PRIVATE int delay_free=0;
118 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
120 PRIVATE duplexT duplexfold_CXS(const char *s1, const char *s2, const int **access_s1, const int **access_s2,
121 const int i_pos, const int j_pos, const int threshold, const char* structure) {
122 int i, j,p,q,Emin=INF, l_min=0, k_min=0;
127 n3 = (int) strlen(s1);
128 n4 = (int) strlen(s2);
131 previous_const=(int *) space(sizeof(int) * (n4+1));
136 if(structure[j-1]=='|'){
137 previous_const[j]=prev_temp;
141 previous_const[j]=prev_temp;
144 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
145 update_fold_params(); if(P) free(P); P = scale_parameters();
149 c = (int **) space(sizeof(int *) * (n3+1));
150 for (i=0; i<=n3; i++) c[i] = (int *) space(sizeof(int) * (n4+1));
151 for (i=0; i<=n3; i++){
157 int type, type2, type3, E, k,l;
159 type = pair[S1[i]][S2[j]];
161 printf("Error during initialization of the duplex in duplexfold_XS\n");
166 c[i][j] = P->DuplexInit + (structure[j-1]=='|' ? bonus : 0 ); /* check if first pair is constrained */
167 if(!(structure[j-2] == '|')){
168 c[i][j]+=P->mismatchExt[rtype[type]][SS2[j-1]][SS1[i+1]];
171 c[i][j]+=P->dangle3[rtype[type]][SS1[i+1]];
173 if (type>2) c[i][j] += P->TerminalAU;
174 for (k=i-1; k>0 ; k--) {
176 for (l=j+1; l<=n4; l++) {
178 int bonus_2 = (structure[l-1]=='|'? bonus : 0 ); /* check if position is constrained and prepare bonus accordingly */
179 type2 = pair[S1[k]][S2[l]];
180 if (!type2) continue;
181 for (p=k+1; p< n3 && p<k+MAXLOOP-1; p++){
182 for (q = l-1; q >= previous_const[l] && q > 1; q--) {
183 if (p-k+l-q-2>MAXLOOP) break;
184 type3=pair[S1[p]][S2[q]];
186 E = E_IntLoop(p-k-1, l-q-1, type2, rtype[type3],SS1[k+1], SS2[l-1], SS1[p-1], SS2[q+1],P) + bonus_2;
187 c[k][l] = MIN2(c[k][l], c[p][q]+E);
191 if (type2>2) E += P->TerminalAU;
192 E+=access_s1[i-k+1][i_pos]+access_s2[l-1][j_pos+(l-1)-1];
193 if (k>1 && l<n4 && !(structure[l]=='|') ){
194 E+=P->mismatchExt[type2][SS1[k-1]][SS2[l+1]];
197 E += P->dangle5[type2][SS1[k-1]];
199 else if(l<n4 && !(structure[l]=='|')){
200 E += P->dangle3[type2][SS2[l+1]];
203 Emin=E; k_min=k; l_min=l;
207 free(previous_const);
208 if(Emin > threshold){
212 for (i=0; i<=n3; i++) free(c[i]);
214 free(S1); free(S2); free(SS1); free(SS2);
217 struc = backtrack_CXS(k_min, l_min, access_s1, access_s2,structure,&Emin);
221 /* lets take care of the dangles */
222 /* find best combination */
223 int dx_5, dx_3, dy_5, dy_3,dGx,dGy,bonus_x;
224 dx_5=0; dx_3=0; dy_5=0; dy_3=0;dGx=0;dGy=0;bonus_x=0;
225 dGx = access_s1[i-k_min+1][i_pos];dx_3=0; dx_5=0;bonus_x=0;
226 dGy = access_s2[l_min-j+1][j_pos + (l_min-1)-1];
227 mfe.tb=i_pos -9 - i + k_min -1 -dx_5;
228 mfe.te=i_pos -9 -1 + dx_3;
229 mfe.qb=j_pos -9 -1 - dy_5;
230 mfe.qe=j_pos + l_min -3 -9 + dy_3;
231 mfe.ddG=(double) Emin * 0.01;
232 mfe.dG1=(double) dGx*0.01 ;
233 mfe.dG2=(double) dGy*0.01 ;
234 /* mfe.energy += bonus_y + bonus_x; */
235 mfe.energy= mfe.ddG - mfe.dG1 - mfe.dG2;
237 mfe.structure = struc;
238 for (i=0; i<=n3; i++) free(c[i]);
240 free(S1); free(S2); free(SS1); free(SS2);
245 PRIVATE char *backtrack_CXS (int i, int j, const int **access_s1,const int **access_s2,const char* structure, int *Emin ) {
246 /* backtrack structure going backwards from i, and forwards from j
247 return structure in bracket notation with & as separator */
248 int k, l, type, type2, E, traced, i0, j0;
249 char *st1, *st2, *struc;
252 previous_const=(int *) space(sizeof(int) * (n4+1));
254 previous_const[j_temp]=1;
257 if(structure[j_temp-1]=='|'){
258 previous_const[j_temp]=prev_temp;
262 previous_const[j_temp]=prev_temp;
265 st1 = (char *) space(sizeof(char)*(n3+1));
266 st2 = (char *) space(sizeof(char)*(n4+1));
267 i0=i;/*MAX2(i-1,1);*/j0=j;/*MIN2(j+1,n4);*/
268 while (i<=n3-1 && j>=2) {
269 int bonus_2 = (structure[j-1]== '|'? bonus: 0);
270 E = c[i][j]; traced=0;
273 type = pair[S1[i]][S2[j]];
274 if (!type) nrerror("backtrack failed in fold duplex bli");
275 for (k=i+1; k<=n3 && k>i-MAXLOOP-2; k++) {
276 for (l=j-1; l >= previous_const[j] && l>=1; l--) {
278 if (i-k+l-j-2>MAXLOOP) break;
279 type2 = pair[S1[k]][S2[l]];
280 if (!type2) continue;
281 LE = E_IntLoop(k-i-1, j-l-1, type, rtype[type2], SS1[i+1], SS2[j-1], SS1[k-1], SS2[l+1],P) + bonus_2;
282 if (E == c[k][l]+LE) {
292 if(i<n3 && j>1 && !(structure[j-2]=='|')){
293 E -= P->mismatchExt[rtype[type]][SS2[j-1]][SS1[i+1]];
296 E -= P->dangle3[rtype[type]][SS1[i+1]];/* +access_s1[1][i+1]; */
299 E -= (!(structure[j-2]=='|') ? P->dangle5[rtype[type]][SS2[j-1]] : 0);/* +access_s2[1][j+1]; */
301 if (type>2) E -= P->TerminalAU;
304 if (E != P->DuplexInit + bonus_2) {
305 nrerror("backtrack failed in fold duplex bal");
314 struc = (char *) space(i-i0+1+j0-j+1+2);
315 for (k=MAX2(i0,1); k<=i; k++) if (!st1[k-1]) st1[k-1] = '.';
316 for (k=j; k<=j0; k++) if (!st2[k-1]) st2[k-1] = '.';
317 strcpy(struc, st1+MAX2(i0-1,0)); strcat(struc, "&");
318 strcat(struc, st2+j-1);
319 free(st1); free(st2);free(previous_const);
324 duplexT** Lduplexfold_CXS(const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int threshold, const int alignment_length, const int delta, const int fast, const char* structure,const int il_a, const int il_b, const int b_a, const int b_b)/* , const int target_dead, const int query_dead) */
331 int iext_s=2*il_a;/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
332 int iext_ass=50+il_a;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
333 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
335 int max_pos;/* get position of the best hit */
341 int constthreshold=0; /* minimal threshold corresponding to a structure complying to all constraints */
344 while(structure[i]!='\0'){
345 if(structure[i]=='|') constthreshold+=bonus;
348 int *position; /* contains the position of the hits with energy > E */
350 n1 = (int) strlen(s1);
351 n2 = (int) strlen(s2);
352 position = (int *) space ((delta+n1+3+delta) * sizeof(int));
353 position_j= (int *) space ((delta+n1+3+delta) * sizeof(int));
356 if ((!P) || (fabs(P->temperature - temperature)>1e-6)){
357 update_dfold_params(); if(P) free(P); P = scale_parameters();
363 maxPenalty[0]=(int) -1*P->stack[2][2]/2;
364 maxPenalty[1]=(int) -1*P->stack[2][2];
365 maxPenalty[2]=(int) -3*P->stack[2][2]/2;
366 maxPenalty[3]=(int) -2*P->stack[2][2];
368 lc = (int**) space (sizeof(int *) * 5);
369 lin = (int**) space (sizeof(int *) * 5);
370 lbx = (int**) space (sizeof(int *) * 5);
371 lby = (int**) space (sizeof(int *) * 5);
372 linx = (int**) space (sizeof(int *) * 5);
373 liny = (int**) space (sizeof(int *) * 5);
375 for (i=0; i<=4; i++){
376 lc[i] = (int *) space(sizeof(int) * (n2+5));
377 lin[i] = (int *) space(sizeof(int) * (n2+5));
378 lbx[i] = (int *) space(sizeof(int) * (n2+5));
379 lby[i] = (int *) space(sizeof(int) * (n2+5));
380 linx[i]= (int *) space(sizeof(int) * (n2+5));
381 liny[i]= (int *) space(sizeof(int) * (n2+5));
384 lbx[0][j]=lbx[1][j]=lbx[2][j]=lbx[3][j] = lbx[4][j] =INF;
385 lin[0][j]=lin[1][j]=lin[2][j]=lin[3][j] = lin[4][j] =INF;
386 lc[0][j] =lc[1][j] =lc[2][j] = lc[3][j] = lc[4][j] =INF;
387 lby[0][j]=lby[1][j]=lby[2][j]=lby[3][j] = lby[4][j] =INF;
388 liny[0][j]=liny[1][j]=liny[2][j]=liny[3][j]=liny[4][j]=INF;
389 linx[0][j]=linx[1][j]=linx[2][j]=linx[3][j]=linx[4][j]=INF;
392 i=10 /*target_dead*/; /* start from 2 ( i=4) because no structure allowed to begin with a single base pair */
393 i_length= n1 - 9 /*- target_dead*/ ;
394 while(i < i_length) {
401 di1 = access_s1[5][i] - access_s1[4][i-1];
402 di2 = access_s1[5][i-1] - access_s1[4][i-2] + di1;
403 di3 = access_s1[5][i-2] - access_s1[4][i-3] + di2;
404 di4 = access_s1[5][i-3] - access_s1[4][i-4] + di3;
405 di1=MIN2(di1,maxPenalty[0]);
406 di2=MIN2(di2,maxPenalty[1]);
407 di3=MIN2(di3,maxPenalty[2]);
408 di4=MIN2(di4,maxPenalty[3]);
409 j=n2 - 9 /*- query_dead*/; /* start from n2-1 because no structure allow to begin with a single base pair */
410 while (--j > 9/*query_dead - 1*/) {
411 /* ----------------------------------------------------------update lin lbx lby matrix */
412 int bonus_2 = (structure[j-1]=='|' ? bonus :0 );
414 dj1 = access_s2[5][j+4] - access_s2[4][j+4];
415 dj2 = access_s2[5][j+5] - access_s2[4][j+5] + dj1;
416 dj3 = access_s2[5][j+6] - access_s2[4][j+6] + dj2;
417 dj4 = access_s2[5][j+7] - access_s2[4][j+7] + dj3;
418 dj1=MIN2(dj1,maxPenalty[0]);
419 dj2=MIN2(dj2,maxPenalty[1]);
420 dj3=MIN2(dj3,maxPenalty[2]);
421 dj4=MIN2(dj4,maxPenalty[3]);
422 int type2, type,temp;
423 type = pair[S1[i]][S2[j]];
424 lc[idx][j]= type ? P->DuplexInit + access_s1[1][i] + access_s2[1][j] + bonus_2 : INF;
426 type2=pair[S2[j+1]][S1[i-1]];
427 lin[idx][j]=MIN2(lc[idx_1][j+1]+P->mismatchI[type2][SS2[j]][SS1[i]]+di1+dj1+iopen+iext_s,lin[idx_1][j]+iext_ass + di1);
428 lin[idx][j]=MIN2(lin[idx][j],lin[idx][j+1]+iext_ass + dj1);
429 lin[idx][j]=MIN2(lin[idx][j],lin[idx_1][j+1]+iext_s + di1 + dj1);
430 linx[idx][j]=MIN2(lc[idx_1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+di1+dj1+iopen+iext_s,linx[idx_1][j]+iext_ass + di1);
431 liny[idx][j]=MIN2(lc[idx_1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+di1+dj1+iopen+iext_s,liny[idx][j+1]+iext_ass + dj1);
432 type2=pair[S2[j+1]][S1[i]];
433 lby[idx][j]=MIN2(lby[idx][j+1]+bext + dj1 ,
434 lc[idx][j+1]+bopen+bext+(type2>2?P->TerminalAU:0)+dj1);
437 lin[idx][j] = lby[idx][j] = linx[idx][j]= liny[idx][j]=INF; /* all loop containing "|" are rejected */
439 type2=pair[S2[j]][S1[i-1]];
440 lbx[idx][j]=MIN2(lbx[idx_1][j]+bext + di1, lc[idx_1][j]+bopen+bext+(type2>2?P->TerminalAU:0) + di1);
441 /* --------------------------------------------------------------- end update recursion */
443 if(!(structure[j]=='|')){
444 lc[idx][j]+=P->mismatchExt[type][SS1[i-1]][SS2[j+1]];
447 lc[idx][j]+=P->dangle5[type][SS1[i-1]];
449 lc[idx][j]+=(type>2?P->TerminalAU:0);
450 /* type > 2 -> no GC or CG pair */
451 /* ------------------------------------------------------------------update c matrix */
452 /* Be careful, no lc may come from a region where a "|" is in a loop, avoided in lin = lby = INF ... jedoch fuer klein loops muss man aufpassen .. */
453 if((type2=pair[S1[i-1]][S2[j+1]]))
454 lc[idx][j]=MIN2(lc[idx_1][j+1]+E_IntLoop(0,0,type2, rtype[type],SS1[i], SS2[j], SS1[i-1], SS2[j+1], P)+di1+dj1, lc[idx][j]); /* 0x0+1x1 */
455 if((type2=pair[S1[i-2]][S2[j+1]]))
456 lc[idx][j]=MIN2(lc[idx_2][j+1]+E_IntLoop(1,0,type2, rtype[type],SS1[i-1], SS2[j], SS1[i-1], SS2[j+1], P)+di2+dj1,lc[idx][j]);/* 0x1 +1x1 */
457 /* kleine loops checks wird in den folgenden if test gemacht. */
458 if(!(structure[j]=='|')){
459 if((type2=pair[S1[i-1]][S2[j+2]]))
460 lc[idx][j]=MIN2(lc[idx_1][j+2]+E_IntLoop(0,1,type2, rtype[type],SS1[i], SS2[j+1], SS1[i-1], SS2[j+1], P)+di1+dj2,lc[idx][j]);/* 1x0 + 1x1 */
461 if((type2=pair[S1[i-2]][S2[j+2]]))
462 lc[idx][j]=MIN2(lc[idx_2][j+2]+E_IntLoop(1,1,type2, rtype[type],SS1[i-1], SS2[j+1], SS1[i-1], SS2[j+1], P)+di2+dj2, lc[idx][j]); /* 1x1 +1x1 */
463 if((type2 = pair[S1[i-3]][S2[j+2]]))
464 lc[idx][j]=MIN2(lc[idx_3][j+2]+E_IntLoop(2,1,type2, rtype[type],SS1[i-2], SS2[j+1], SS1[i-1], SS2[j+1], P)+di3+dj2, lc[idx][j]); /* 2x1 +1x1 */
465 if(!(structure[j+1]=='|')){
466 if((type2 = pair[S1[i-3]][S2[j+3]]))
467 lc[idx][j]=MIN2(lc[idx_3][j+3]+E_IntLoop(2,2,type2, rtype[type],SS1[i-2], SS2[j+2], SS1[i-1], SS2[j+1], P)+di3+dj3,lc[idx][j]);/* 2x2 + 1x1 */
468 if((type2 = pair[S1[i-2]][S2[j+3]]))
469 lc[idx][j]=MIN2(lc[idx_2][j+3]+E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P)+di2+dj3, lc[idx][j]);/* 1x2 +1x1 */
470 if((type2 = pair[S1[i-4]][S2[j+3]]))
471 lc[idx][j]=MIN2(lc[idx_4][j+3]+E_IntLoop(3,2,type2, rtype[type],SS1[i-3], SS2[j+2], SS1[i-1], SS2[j+1], P)+di4+dj3, lc[idx][j]);
472 if(!(structure[j+2]=='|')){
473 if((type2 = pair[S1[i-3]][S2[j+4]]))
474 lc[idx][j]=MIN2(lc[idx_3][j+4]+E_IntLoop(2,3,type2, rtype[type],SS1[i-2], SS2[j+3], SS1[i-1], SS2[j+1], P)+di3+dj4, lc[idx][j]);
478 /* internal->stack */
479 lc[idx][j]=MIN2(lin[idx_3][j+3]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+di3+dj3+2*iext_s, lc[idx][j]);
480 lc[idx][j]=MIN2(lin[idx_4][j+2]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+di4+dj2, lc[idx][j]);
481 lc[idx][j]=MIN2(lin[idx_2][j+4]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+di2+dj4, lc[idx][j]);
482 lc[idx][j]=MIN2(linx[idx_3][j+1]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di3+dj1, lc[idx][j]);
483 lc[idx][j]=MIN2(liny[idx_1][j+3]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+dj3+di1, lc[idx][j]);
486 bAU=(type>2?P->TerminalAU:0);
487 lc[idx][j]=MIN2(lbx[idx_2][j+1]+di2+dj1+bext+bAU, lc[idx][j]);
488 /* min2=by[i][j+1]; */
489 lc[idx][j]=MIN2(lby[idx_1][j+2]+di1+dj2+bext+bAU, lc[idx][j]);
491 /* if(j<=const5end){ */
493 min_colonne=MIN2(lc[idx][j]+(type>2?P->TerminalAU:0)+
494 (!(structure[j-2]=='|') ?
495 P->mismatchExt[rtype[type]][SS2[j-1]][SS1[i+1]] : P->dangle3[rtype[type]][SS1[i+1]]),
497 if(temp>min_colonne){
501 /* ---------------------------------------------------------------------end update */
503 if(max>=min_colonne){
506 max_pos_j=min_j_colonne;
508 position[i+delta]=min_colonne;min_colonne=INF;
509 position_j[i+delta]=min_j_colonne;
512 /* printf("MAX :%d ", max); */
513 free(S1); free(S2); free(SS1); free(SS2);
514 if(max<threshold+constthreshold){
515 find_max_CXS(position, position_j, delta, threshold+constthreshold, constthreshold, alignment_length, s1, s2, access_s1, access_s2, fast, structure);
517 if(max<constthreshold){
518 plot_max_CXS(max, max_pos, max_pos_j,alignment_length, s1, s2, access_s1, access_s2,fast,structure);
520 for (i=0; i<=4; i++) {free(lc[i]);free(lin[i]);free(lbx[i]);free(lby[i]);free(linx[i]);free(liny[i]);}
521 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]); */
522 free(lc);free(lin);free(lbx);free(lby);free(linx);free(liny);
528 PRIVATE void find_max_CXS(const int *position, const int *position_j,const int delta, const int threshold, const int constthreshold, const int alignment_length, const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int fast, const char* structure){
533 if(position[pos+delta]<(threshold)){
535 search_range=delta+1;
536 while(--search_range){
537 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
538 temp_min=search_range;
543 max_pos_j=position_j[pos+delta];
545 max=position[pos+delta];
546 printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100);
547 pos=MAX2(10,pos-delta);
555 if(position[pos+delta]<(threshold)){
557 search_range=delta+1;
558 while(--search_range){
559 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
560 temp_min=search_range;
563 pos-=temp_min; /* position on i */
565 max_pos_j=position_j[pos+delta]; /* position on j */
566 /* int begin_t=MAX2(9, pos-alignment_length); */
567 /* int end_t =MIN2(n1-10, pos); */
568 /* int begin_q=MAX2(9, max_pos_j-2); */
569 /* int end_q =MIN2(n2-10, max_pos_j+alignment_length-2); */
570 int begin_t=MAX2(9,pos-alignment_length);
572 int begin_q=max_pos_j-2;
573 int end_q =MIN2(n2-9,max_pos_j+alignment_length-2);
574 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
575 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
576 char *local_structure = (char*) space(sizeof(char) * ( end_q - begin_q +2));
577 strncpy(s3, (s1+begin_t), end_t - begin_t+1);
578 strncpy(s4, (s2+begin_q) , end_q - begin_q+1 );
579 strncpy(local_structure, (structure+begin_q), end_q - begin_q +1);
580 s3[end_t -begin_t +1 ]='\0';
581 s4[end_q -begin_q +1 ]='\0';
582 local_structure[end_q - begin_q +1]='\0';
584 test = duplexfold_CXS(s3,s4,access_s1,access_s2,pos, max_pos_j,threshold,local_structure);
585 if(test.energy * 100 < (threshold - constthreshold)){
586 int l1=strchr(test.structure, '&')-test.structure;
587 int dL = strrchr(structure,'|') - strchr(structure,'|');
589 if(dL <= strlen(test.structure)-l1-1){
590 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
591 test.tb,test.te,test.qb,test.qe, test.ddG, test.energy, test.dG1, test.dG2);
592 pos=MAX2(10,pos-delta);
596 free(test.structure);
597 free(local_structure);
604 PRIVATE void plot_max_CXS(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int ** access_s1, const int ** access_s2, const int fast, const char* structure)
607 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos-3, max_pos_j, ((double)max)/100);
610 int begin_t=MAX2(9,max_pos-alignment_length);
612 int begin_q=max_pos_j-2;
613 int end_q =MIN2(n2-9,max_pos_j+alignment_length-2);
614 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
615 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
616 char *local_structure = (char*) space(sizeof(char)*(end_q - begin_q +2));
617 strncpy(s3, (s1+begin_t), end_t - begin_t+1);
618 strncpy(s4, (s2+begin_q) , end_q - begin_q+1 );
619 strncpy(local_structure, (structure+begin_q) , end_q - begin_q +1 );
620 s3[end_t -begin_t +1 ]='\0';
621 s4[end_q -begin_q +1 ]='\0';
622 local_structure[end_q - begin_q +1]='\0';
624 test = duplexfold_CXS(s3,s4,access_s1,access_s2,max_pos, max_pos_j,INF,local_structure);
625 int l1= strchr(test.structure, '&')-test.structure;
626 int dL = strrchr(structure,'|') - strchr(structure,'|');
628 if(dL<=strlen(test.structure)-l1-1){
629 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
630 test.tb,test.te,test.qb,test.qe, test.ddG, test.energy, test.dG1, test.dG2);
632 free(s3);free(s4);free(test.structure);free(local_structure);
638 /*---------------------------------------------------------duplexfold----------------------------------------------------------------------------------*/
641 PRIVATE duplexT duplexfold_C(const char *s1, const char *s2, const int extension_cost, const char* structure ) {
642 int i, j, l1, Emin=INF, i_min=0, j_min=0;
646 int *previous_const; /* for each "|" constraint returns the position of the next "|" constraint */
648 n3 = (int) strlen(s1);
649 n4 = (int) strlen(s2);
651 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
652 update_fold_params(); if(P) free(P); P = scale_parameters();
655 previous_const=(int *) space(sizeof(int) * (n4+1));
657 previous_const[j-1]=n4;
660 if(structure[j-1]=='|'){
661 previous_const[j-1]=prev_temp;
665 previous_const[j-1]=prev_temp;
668 c = (int **) space(sizeof(int *) * (n3+1));
669 for (i=0; i<=n3; i++) c[i] = (int *) space(sizeof(int) * (n4+1));
671 for (i=1; i<=n3; i++) {
672 for (j=n4; j>0; j--) {
673 int type, type2, E, k,l;
674 int bonus_2 = (structure[j-1]=='|'? bonus: 0);
675 type = pair[S1[i]][S2[j]];
676 c[i][j] = type ? P->DuplexInit +2 * extension_cost + bonus_2: INF;
677 if(!type){ continue;}
678 if(j<n4 && i>1 && !(structure[j]=='|') ) {
679 c[i][j]+=P->mismatchExt[type][SS1[i-1]][SS2[j+1]]+2*extension_cost;
682 c[i][j] += P->dangle5[type][SS1[i-1]]+ extension_cost;
684 else if(j<n4 && !(structure[j]=='|')){
685 c[i][j] += P->dangle3[type][SS2[j+1]]+ extension_cost;
687 if (type>2) c[i][j] += P->TerminalAU;
688 for (k=i-1; k>0 && k>i-MAXLOOP-2; k--) {
689 for (l=j+1; l<=previous_const[j]; l++) {
690 if (i-k+l-j-2>MAXLOOP) break;
691 type2 = pair[S1[k]][S2[l]];
692 if (!type2) continue;
693 E = E_IntLoop(i-k-1, l-j-1, type2, rtype[type],
694 SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P)+(i-k+l-j)*extension_cost + bonus_2;
695 c[i][j] = MIN2(c[i][j], c[k][l]+E);
699 if(i<n3 && j>1 && !(structure[j-2]=='|')){
700 E+= P->mismatchExt[rtype[type]][SS2[j-1]][SS1[i+1]]+2*extension_cost;
703 E += P->dangle3[rtype[type]][SS1[i+1]]+extension_cost;
705 else if (j>1 && !(structure[j-2]=='|')){
706 E += P->dangle5[rtype[type]][SS2[j-1]]+extension_cost;
708 if (type>2) E += P->TerminalAU;
711 Emin=E; i_min=i; j_min=j;
715 struc = backtrack_C(i_min, j_min, extension_cost,structure,&Emin);
716 if (i_min<n3) i_min++;
717 if (j_min>1 ) j_min--;
718 l1 = strchr(struc, '&')-struc;
720 size=strlen(struc)-1;
721 Emin-= size * (extension_cost);
724 mfe.energy = (double) Emin/100.;
725 mfe.structure = struc;
726 free(previous_const);
728 for (i=0; i<=n3; i++) free(c[i]);
731 free(S1); free(S2); free(SS1); free(SS2);
736 PRIVATE char *backtrack_C(int i, int j, const int extension_cost, const char* structure, int *Emin) {
737 /* backtrack structure going backwards from i, and forwards from j
738 return structure in bracket notation with & as separator */
739 int k, l, type, type2, E, traced, i0, j0, *previous_const;
740 char *st1, *st2, *struc;
742 previous_const=(int *) space(sizeof(int) * (n4+1)); /* encodes the position of the constraints */
744 previous_const[j_temp-1]=n4;
747 if(structure[j_temp-1]=='|'){
748 previous_const[j_temp-1]=prev_temp;
752 previous_const[j_temp-1]=prev_temp;
755 st1 = (char *) space(sizeof(char)*(n3+1));
756 st2 = (char *) space(sizeof(char)*(n4+1));
757 i0=MIN2(i+1,n3); j0=MAX2(j-1,1);
758 while (i>0 && j<=n4) {
759 int bonus_2 = (structure[j-1]== '|'? bonus: 0);
760 E = c[i][j]; traced=0;
763 type = pair[S1[i]][S2[j]];
764 if (!type) nrerror("backtrack failed in fold duplex a");
765 for (k=i-1; k>0 && k>i-MAXLOOP-2; k--) {
766 for (l=j+1; l<=previous_const[j]; l++) {
768 if (i-k+l-j-2>MAXLOOP) break;
769 type2 = pair[S1[k]][S2[l]];
770 if (!type2) continue;
771 LE = E_IntLoop(i-k-1, l-j-1, type2, rtype[type],
772 SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P)+(i-k+l-j)*extension_cost + bonus_2;
773 if (E == c[k][l]+LE) {
784 if (i>1 && j<n4 && !(structure[j]=='|')){
785 E -=P->mismatchExt[type][SS1[i-1]][SS2[j+1]]+2*extension_cost;
788 E -= P->dangle5[type][SS1[i-1]]+extension_cost;
790 else if (j<n4 && !(structure[j]=='|')){
791 E -= P->dangle3[type][SS2[j+1]]+ extension_cost;
793 /* if (j<n4) E -= P->dangle3[type][SS2[j+1]]+extension_cost; */
794 if (type>2) E -= P->TerminalAU;
795 if (E != P->DuplexInit+2*extension_cost + bonus_2) {
796 nrerror("backtrack failed in fold duplex b");
806 struc = (char *) space(i0-i+1+j-j0+1+2);
807 for (k=MAX2(i,1); k<=i0; k++) if (!st1[k-1]) st1[k-1] = '.';
808 for (k=j0; k<=j; k++) if (!st2[k-1]) st2[k-1] = '.';
809 strcpy(struc, st1+MAX2(i-1,0)); strcat(struc, "&");
810 strcat(struc, st2+j0-1);
812 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
813 free(st1); free(st2);
814 free(previous_const);
821 duplexT ** Lduplexfold_C(const char *s1, const char *s2, const int threshold, const int extension_cost, const int alignment_length, const int delta, const int fast, const char* structure, const int il_a, const int il_b, const int b_a, const int b_b)
823 /* duplexT test = duplexfold_C(s1, s2, extension_cost,structure); */
827 int bext=b_a+extension_cost;
829 int iext_s=2*(il_a+extension_cost);/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
830 int iext_ass=50+il_a+extension_cost;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
831 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
833 int max_pos;/* get position of the best hit */
836 int min_j_colonne=11;
839 int constthreshold=0; /* minimal threshold corresponding to a structure complying to all constraints */
841 while(structure[i]!='\0'){
842 if(structure[i]=='|') constthreshold+=bonus;
845 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
846 /* int nsubopt=10; */ /* total number of subopt */
847 int *position; /* contains the position of the hits with energy > E */
849 /* int const5end; */ /* position of the 5'most constraint. Only interaction reaching this position are taken into account. */
850 /* const5end = strchr(structure,'|') - structure; */
852 n1 = (int) strlen(s1);
853 n2 = (int) strlen(s2);
854 /* delta_check is the minimal distance allowed for two hits to be accepted */
855 /* if both hits are closer, reject the smaller ( in term of position) hits */
856 position = (int *) space ((delta+n1+3+delta) * sizeof(int));
857 position_j= (int *) space ((delta+n1+3+delta) * sizeof(int));
858 /* i want to implement a function that, given a position in a long sequence and a small sequence, */
859 /* duplexfold them at this position and report the result at the command line */
860 /* for this i first need to rewrite backtrack in order to remove the printf functio */
861 /* END OF DEFINITION FOR NEEDED SUBOPT DATA */
863 if ((!P) || (fabs(P->temperature - temperature)>1e-6))
864 update_dfold_params();
866 lc = (int**) space (sizeof(int *) * 5);
867 lin = (int**) space (sizeof(int *) * 5);
868 lbx = (int**) space (sizeof(int *) * 5);
869 lby = (int**) space (sizeof(int *) * 5);
870 linx = (int**) space (sizeof(int *) * 5);
871 liny = (int**) space (sizeof(int *) * 5);
873 for (i=0; i<=4; i++){
874 lc[i] = (int *) space(sizeof(int) * (n2+5));
875 lin[i] = (int *) space(sizeof(int) * (n2+5));
876 lbx[i] = (int *) space(sizeof(int) * (n2+5));
877 lby[i] = (int *) space(sizeof(int) * (n2+5));
878 linx[i]= (int *) space(sizeof(int) * (n2+5));
879 liny[i]= (int *) space(sizeof(int) * (n2+5));
882 lbx[0][j]=lbx[1][j]=lbx[2][j]=lbx[3][j] = lbx[4][j] =INF;
883 lin[0][j]=lin[1][j]=lin[2][j]=lin[3][j] = lin[4][j] =INF;
884 lc[0][j] =lc[1][j] =lc[2][j] = lc[3][j] = lc[4][j] =INF;
885 lby[0][j]=lby[1][j]=lby[2][j]=lby[3][j] = lby[4][j] =INF;
886 liny[0][j]=liny[1][j]=liny[2][j]=liny[3][j]=liny[4][j]=INF;
887 linx[0][j]=linx[1][j]=linx[2][j]=linx[3][j]=linx[4][j]=INF;
892 while(i < i_length) {
900 int bonus_2 = (structure[j-1]=='|' ? bonus : 0) ;
902 type = pair[S1[i]][S2[j]];
903 lc[idx][j]=type ? P->DuplexInit + 2*extension_cost + bonus_2 : INF; /* to avoid that previous value influence result should actually not be erforderlich */
905 type2=pair[S2[j+1]][S1[i-1]];
906 lin[idx][j]=MIN2(lc[idx_1][j+1]+P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s, lin[idx_1][j]+iext_ass);
907 lin[idx][j]=MIN2(lin[idx][j],lin[idx][j+1]+iext_ass);
908 lin[idx][j]=MIN2(lin[idx][j],lin[idx_1][j+1]+iext_s);
909 linx[idx][j]=MIN2(lc[idx_1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s,linx[idx_1][j]+iext_ass);
910 liny[idx][j]=MIN2(lc[idx_1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s,liny[idx][j+1]+iext_ass);
911 type2=pair[S2[j+1]][S1[i]];
912 lby[idx][j]=MIN2(lby[idx][j+1]+bext, lc[idx][j+1]+bopen+bext+(type2>2?P->TerminalAU:0));
915 lin[idx][j] = lby[idx][j] = linx[idx][j]= liny[idx][j]=INF;
917 type2=pair[S2[j]][S1[i-1]];
918 lbx[idx][j]=MIN2(lbx[idx_1][j]+bext, lc[idx_1][j]+bopen+bext+(type2>2?P->TerminalAU:0));
919 /* --------------------------------------------------------------- end update recursion */
921 if(!(structure[j]=='|')){
922 lc[idx][j]+=P->mismatchExt[type][SS1[i-1]][SS2[j+1]]+2*extension_cost;
925 lc[idx][j]+=P->dangle5[type][SS1[i-1]]+extension_cost;
927 lc[idx][j]+=(type>2?P->TerminalAU:0);
928 /* type > 2 -> no GC or CG pair */
929 /* ------------------------------------------------------------------update c matrix */
930 /* Be careful, no lc may come from a region where a "|" is in a loop, avoided in lin = lby = INF ... jedoch fuer klein loops muss man aufpassen .. */
931 type2=pair[S1[i-1]][S2[j+1]];
932 lc[idx][j]=MIN2(lc[idx_1][j+1]+E_IntLoop(0,0,type2, rtype[type],SS1[i], SS2[j], SS1[i-1], SS2[j+1], P)+2*extension_cost, lc[idx][j]);
933 type2=pair[S1[i-2]][S2[j+1]];
934 lc[idx][j]=MIN2(lc[idx_2][j+1]+E_IntLoop(1,0,type2, rtype[type],SS1[i-1], SS2[j], SS1[i-1], SS2[j+1], P)+3*extension_cost,lc[idx][j]);
935 /* kleine loops checks wird in den folgenden if test gemacht. */
936 if(!(structure[j]=='|')){
937 type2=pair[S1[i-1]][S2[j+2]];
938 lc[idx][j]=MIN2(lc[idx_1][j+2]+E_IntLoop(0,1,type2, rtype[type],SS1[i], SS2[j+1], SS1[i-1], SS2[j+1], P)+3*extension_cost,lc[idx][j]);
939 type2=pair[S1[i-2]][S2[j+2]];
940 lc[idx][j]=MIN2(lc[idx_2][j+2]+E_IntLoop(1,1,type2, rtype[type],SS1[i-1], SS2[j+1], SS1[i-1], SS2[j+1], P)+4*extension_cost, lc[idx][j]);
941 type2 = pair[S1[i-3]][S2[j+2]];
942 lc[idx][j]=MIN2(lc[idx_3][j+2]+E_IntLoop(2,1,type2, rtype[type],SS1[i-2], SS2[j+1], SS1[i-1], SS2[j+1], P)+5*extension_cost, lc[idx][j]);
943 if(!(structure[j+1]=='|')){
944 type2 = pair[S1[i-3]][S2[j+3]];
945 lc[idx][j]=MIN2(lc[idx_3][j+3]+E_IntLoop(2,2,type2, rtype[type],SS1[i-2], SS2[j+2], SS1[i-1], SS2[j+1], P)+6*extension_cost,lc[idx][j]);
946 type2 = pair[S1[i-2]][S2[j+3]];
947 lc[idx][j]=MIN2(lc[idx_2][j+3]+E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P)+5*extension_cost, lc[idx][j]);
948 type2 = pair[S1[i-4]][S2[j+3]];
949 lc[idx][j]=MIN2(lc[idx_4][j+3]+E_IntLoop(3,2,type2, rtype[type],SS1[i-3], SS2[j+2], SS1[i-1], SS2[j+1], P)+7*extension_cost, lc[idx][j]);
950 if(!(structure[j+2]=='|')){
951 type2 = pair[S1[i-3]][S2[j+4]];
952 lc[idx][j]=MIN2(lc[idx_3][j+4]+E_IntLoop(2,3,type2, rtype[type],SS1[i-2], SS2[j+3], SS1[i-1], SS2[j+1], P)+7*extension_cost, lc[idx][j]);
956 /* internal->stack */
957 lc[idx][j]=MIN2(lin[idx_3][j+3]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*extension_cost+2*iext_s, lc[idx][j]);
958 lc[idx][j]=MIN2(lin[idx_4][j+2]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost, lc[idx][j]);
959 lc[idx][j]=MIN2(lin[idx_2][j+4]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost, lc[idx][j]);
960 lc[idx][j]=MIN2(linx[idx_3][j+1]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost, lc[idx][j]);
961 lc[idx][j]=MIN2(liny[idx_1][j+3]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost, lc[idx][j]);
964 bAU=(type>2?P->TerminalAU:0);
965 lc[idx][j]=MIN2(lbx[idx_2][j+1]+2*extension_cost+bext+bAU, lc[idx][j]);
966 /* min2=by[i][j+1]; */
967 lc[idx][j]=MIN2(lby[idx_1][j+2]+2*extension_cost+bext+bAU, lc[idx][j]);
969 /* if(j<=const5end){ */
971 min_colonne=MIN2(lc[idx][j]+(type>2?P->TerminalAU:0)+
972 (!(structure[j-2]=='|') ?
973 P->mismatchExt[rtype[type]][SS2[j-1]][SS1[i+1]]+2*extension_cost :
974 P->dangle3[rtype[type]][SS1[i+1]]+extension_cost),
976 if(temp>min_colonne){
980 /* ---------------------------------------------------------------------end update */
982 if(max>=min_colonne){
985 max_pos_j=min_j_colonne;
987 position[i+delta]=min_colonne;min_colonne=INF;
988 position_j[i+delta]=min_j_colonne;
991 free(S1); free(S2); free(SS1); free(SS2);
992 /* printf("MAX: %d",max); */
993 if(max<threshold+constthreshold){
994 find_max_C(position, position_j, delta, threshold+constthreshold, constthreshold, alignment_length, s1, s2, extension_cost, fast, structure);
996 if(max<constthreshold){
997 plot_max_C(max, max_pos, max_pos_j,alignment_length, s1, s2, extension_cost,fast,structure);
999 for (i=0; i<=4; i++) {free(lc[i]);free(lin[i]);free(lbx[i]);free(lby[i]);free(linx[i]);free(liny[i]);}
1000 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]); */
1001 free(lc);free(lin);free(lbx);free(lby);free(linx);free(liny);
1008 PRIVATE void find_max_C(const int *position, const int *position_j,const int delta, const int threshold, const int constthreshold, const int alignment_length, const char *s1, const char *s2, const int extension_cost, const int fast,const char* structure){
1013 if(position[pos+delta]<(threshold)){
1015 search_range=delta+1;
1016 while(--search_range){
1017 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1018 temp_min=search_range;
1023 max_pos_j=position_j[pos+delta];
1025 max=position[pos+delta];
1026 printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100);
1027 pos=MAX2(10,pos-delta);
1035 if(position[pos+delta]<(threshold)){
1037 search_range=delta+1;
1038 while(--search_range){
1039 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1040 temp_min=search_range;
1045 max_pos_j=position_j[pos+delta];
1046 /* max_pos_j und pos entsprechen die realen position
1047 in der erweiterten sequenz.
1048 pos=1 -> position 1 in the sequence (and not 0 like in C)
1049 max_pos_j -> position 1 in the sequence ( not 0 like in C)
1051 int begin_t=MAX2(11, pos-alignment_length+1);
1052 int end_t =MIN2(n1-10, pos+1);
1053 int begin_q=MAX2(11, max_pos_j-1);
1054 int end_q =MIN2(n2-10, max_pos_j+alignment_length-2);
1055 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
1056 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
1057 char *local_structure = (char*) space(sizeof(char)*(end_q - begin_q +2));
1058 strncpy(s3, (s1+begin_t-1), end_t - begin_t +1);
1059 strncpy(s4, (s2+begin_q-1) , end_q - begin_q +1);
1060 strncpy(local_structure, (structure+begin_q-1) , end_q - begin_q +1 );
1061 s3[end_t -begin_t +1 ]='\0';
1062 s4[end_q -begin_q +1 ]='\0';
1063 local_structure[end_q - begin_q +1]='\0';
1065 test = duplexfold_C(s3, s4, extension_cost,local_structure);
1066 if(test.energy * 100 < (threshold-constthreshold)){
1067 int l1=strchr(test.structure, '&')-test.structure;
1068 int dL = strrchr(structure,'|') - strchr(structure,'|');
1070 if(dL <= strlen(test.structure)-l1-1){
1071 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
1072 begin_t-10+test.i-l1,
1073 begin_t-10+test.i-1,
1074 begin_q-10 + test.j-1 ,
1075 (begin_q -11) + test.j + strlen(test.structure)-l1-2,
1077 pos=MAX2(10,pos-delta);
1081 free(test.structure);
1082 free(local_structure);
1087 PRIVATE void plot_max_C(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int extension_cost, const int fast,const char* structure)
1090 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos-10, max_pos_j-10, ((double)max)/100);
1094 int begin_t=MAX2(11, max_pos-alignment_length+1);
1095 int end_t =MIN2(n1-10, max_pos+1);
1096 int begin_q=MAX2(11, max_pos_j-1);
1097 int end_q =MIN2(n2-10, max_pos_j+alignment_length-2);
1098 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
1099 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
1100 char *local_structure = (char*) space(sizeof(char)*(end_q - begin_q +2));
1101 strncpy(s3, (s1+begin_t-1), end_t - begin_t + 1);
1102 strncpy(s4, (s2+begin_q-1) , end_q - begin_q +1 );
1103 strncpy(local_structure, (structure+begin_q-1) , end_q - begin_q +1 );
1104 s3[end_t -begin_t +1 ]='\0';
1105 s4[end_q -begin_q +1 ]='\0';
1106 local_structure[end_q - begin_q +1]='\0';
1107 test = duplexfold_C(s3, s4, extension_cost,local_structure);
1108 int l1=strchr(test.structure, '&')-test.structure;
1109 int dL = strrchr(structure,'|') - strchr(structure,'|');
1111 if(dL <= strlen(test.structure)-l1-1){
1112 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
1113 begin_t-10+test.i-l1, begin_t-10+test.i-1, begin_q-10 +test.j-1 ,
1114 (begin_q -11) + test.j + strlen(test.structure)-l1-2 , test.energy);
1115 free(s3);free(s4);free(test.structure);
1117 free(local_structure);
1122 PRIVATE void update_dfold_params(void)
1125 P = scale_parameters();
1129 /*---------------------------------------------------------------------------*/
1132 PRIVATE void encode_seqs(const char *s1, const char *s2) {
1136 S1 = encode_seq(s1);
1137 SS1= (short *) space(sizeof(short)*(l+1));
1138 /* SS1 exists only for the special X K and I bases and energy_set!=0 */
1140 for (i=1; i<=l; i++) { /* make numerical encoding of sequence */
1141 SS1[i] = alias[S1[i]]; /* for mismatches of nostandard bases */
1145 S2 = encode_seq(s2);
1146 SS2= (short *) space(sizeof(short)*(l+1));
1147 /* SS2 exists only for the special X K and I bases and energy_set!=0 */
1149 for (i=1; i<=l; i++) { /* make numerical encoding of sequence */
1150 SS2[i] = alias[S2[i]]; /* for mismatches of nostandard bases */
1155 PRIVATE short * encode_seq(const char *sequence) {
1158 l = strlen(sequence);
1159 S = (short *) space(sizeof(short)*(l+2));
1162 /* make numerical encoding of sequence */
1163 for (i=1; i<=l; i++)
1164 S[i]= (short) encode_char(toupper(sequence[i-1]));
1166 /* for circular folding add first base at position n+1 */