2 /* Last changed Time-stamp: <2007-10-30 14:06:22 htafer> */
4 compute the duplex structure of two RNA strands,
5 allowing only inter-strand base pairs.
6 see cofold() for computing hybrid structures without
15 library containing the function used in rnaplex
16 the program rnaplex uses the following function
17 Lduplexfold: finds high scoring segments
18 it stores the end-position of these segments in an array
19 and call then for each of these positions the duplexfold function
20 which allows to make backtracking for each of the high scoring position
21 It allows to find suboptimal partially overlapping (depends on a a parameter)
22 duplexes between a long RNA and a shorter one.
23 Contrarly to RNAduplex, the energy model is not in E~log(N),
24 where N is the length of an interial loop but used an affine model,
25 where the extension and begin parameter are fitted to the energy
26 parameter used by RNAduplex. This allows to check for duplex between a short RNA(20nt)
27 and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
29 The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
30 given threshold this value is stored in an array. When the alignment score goes
31 then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
32 to find all non-overlapping high-scoring segments.
33 For more information check "durbin, biological sequence analysis"
43 #include "energy_par.h"
44 #include "fold_vars.h"
50 #include "loop_energies.h"
51 /* #################SIMD############### */
54 static char rcsid[] UNUSED = "$Id: plex.c,v 1.14 2007/06/12 12:50:16 htafer Exp $";
55 /* int subopt_sorted=0; */
58 #define PRIVATE static
60 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
61 #define NEW_NINIO 1 /* new asymetry penalty */
62 #define ARRAY 32 /*array size*/
64 #define MINPSCORE -2 * UNIT
67 *** Macro that define indices for the Single Array approach defined in FLduplexfold_XS->gain of 20% in runtime
68 *** so that everything is done in a 1D array.
69 *** input is idx for i, j for j and the length of the query RNA
70 *** 1D is divided in 6 subarrays, one for each number of allowed state
71 *** The length of each subarray is 5*L. 5 the maximal stored distance on the target sequence,
72 *** L is the length of the query sequence
74 #define LCI(i,j,l) ((i )*l + j)
75 #define LINI(i,j,l) ((i + 5)*l + j)
76 #define LBXI(i,j,l) ((i + 10)*l + j)
77 #define LBYI(i,j,l) ((i + 15)*l + j)
78 #define LINIX(i,j,l) ((i + 20)*l + j)
79 #define LINIY(i,j,l) ((i + 25)*l + j)
81 PRIVATE void encode_seqs(const char *s1, const char *s2);
82 PRIVATE short *encode_seq(const char *seq);
83 PRIVATE void update_dfold_params(void);
85 *** duplexfold(_XS)/backtrack(_XS) computes duplex interaction with standard energy and considers extension_cost
86 *** find_max(_XS)/plot_max(_XS) find suboptimals and MFE
87 *** fduplexfold(_XS) computes duplex in a plex way
89 PRIVATE duplexT duplexfold(const char *s1, const char *s2, const int extension_cost);
90 PRIVATE char * backtrack(int i, int j, const int extension_cost);
91 PRIVATE void find_max(const int *position, const int *position_j, const int delta, const int threshold, const int length, const char *s1, const char *s2, const int extension_cost, const int fast,const int il_a, const int il_b, const int b_a, const int b_b);
92 PRIVATE void plot_max(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int extension_cost, const int fast,const int il_a, const int il_b, const int b_a, const int b_b);
94 /* PRIVATE duplexT duplexfold_XS(const char *s1, const char *s2,const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold); */
95 PRIVATE duplexT duplexfold_XS(const char *s1, const char *s2,const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold, const int i_flag, const int j_flag);
96 /* PRIVATE char * backtrack_XS(int i, int j, const int** access_s1, const int** access_s2); */
97 PRIVATE char *backtrack_XS(int i, int j, const int **access_s1,const int **access_s2,const int i_flag, const int j_flag );
98 PRIVATE void find_max_XS(const int *position, const int *position_j,const int delta, const int threshold, const int alignment_length,
99 const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int fast,const int il_a, const int il_b, const int b_a, const int b_b);
100 PRIVATE void plot_max_XS(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int fast,const int il_a, const int il_b, const int b_a, const int b_b);
101 PRIVATE duplexT fduplexfold(const char *s1, const char *s2, const int extension_cost, const int il_a, const int il_b, const int b_a, const int b_b);
102 PRIVATE char *fbacktrack(int i, int j, const int extension_cost,const int il_a, const int il_b, const int b_a, const int b_b, int *dG);
103 PRIVATE duplexT fduplexfold_XS(const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold,const int il_a, const int il_b, const int b_a, const int b_b);
104 PRIVATE char * fbacktrack_XS(int i, int j, const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int il_a, const int il_b, const int b_a, const int b_b, int *dGe, int *dGeplex, int *dGx, int *dGy);
110 #define MAXSECTORS 500 /* dimension for a backtrack array */
111 #define LOCALITY 0. /* locality parameter for base-pairs */
113 #define MIN2(A, B) ((A) < (B) ? (A) : (B))
114 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
116 PRIVATE paramT *P = NULL;
119 *** energy array used in fduplexfold and fduplexfold_XS
120 *** We do not use the 1D array here as it is not time critical
121 *** It also makes the code more readable
122 *** c -> stack;in -> interior loop;bx/by->bulge;inx/iny->1xn loops
125 PRIVATE int **c=NULL, **in=NULL, **bx=NULL, **by=NULL, **inx=NULL, **iny=NULL;
128 *** S1, SS1, ... contains the encoded sequence for target and query
129 *** n1, n2, n3, n4 contains target and query length
132 PRIVATE short *S1=NULL, *SS1=NULL, *S2=NULL, *SS2=NULL;/*contains the sequences*/
133 PRIVATE int n1,n2; /* sequence lengths */
134 PRIVATE int n3, n4; /*sequence length for the duplex*/;
137 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
140 *** duplexfold_XS is the pendant to the duplex function as defined in duplex.c
141 *** but takes the accessibility into account. It is similar to the MFE version of RNAup
142 *** The only approximation made is that target 3' end - query 5' end base pair is known
143 *** s1,s2 are the query and target sequence; access_s1, access_s2 are the accessibility
144 *** profiles, i_pos, j_pos are the coordinates of the closing pair.
149 PRIVATE duplexT duplexfold_XS(const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold, const int i_flag, const int j_flag) {
150 int i, j,p,q, Emin=INF, l_min=0, k_min=0;
154 n3 = (int) strlen(s1);
155 n4 = (int) strlen(s2);
156 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
157 update_fold_params(); if(P) free(P); P = scale_parameters();
161 c = (int **) space(sizeof(int *) * (n3+1));
162 for (i=0; i<=n3; i++) c[i] = (int *) space(sizeof(int) * (n4+1));
163 for (i=0; i<=n3; i++){
169 int type, type2, type3, E, k,l;
170 i=n3-i_flag; j=1+j_flag;
171 type = pair[S1[i]][S2[j]];
173 printf("Error during initialization of the duplex in duplexfold_XS\n");
178 c[i][j] = P->DuplexInit;
179 /** if (type>2) c[i][j] += P->TerminalAU;
180 *** c[i][j]+=P->dangle3[rtype[type]][SS1[i+1]];
181 *** c[i][j]+=P->dangle5[rtype[type]][SS2[j-1]];
182 *** The three above lines are replaced by the line below
186 c[i][j] += E_ExtLoop(rtype[type], (j_flag ? SS2[j-1] : -1) , (i_flag ? SS1[i+1] : -1), P);
188 /* if(j_flag ==0 && i_flag==0){ */
189 /* c[i][j] += E_ExtLoop(rtype[type], -1 , -1 , P); */
190 /* }else if(j_flag ==0 && i_flag==1){ */
191 /* c[i][j] += E_ExtLoop(rtype[type], -1 , SS1[i+1], P); */
192 /* }else if(j_flag ==1 && i_flag==0){ */
193 /* c[i][j] += E_ExtLoop(rtype[type], SS2[j-1] , -1, P); */
195 /* c[i][j] += E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1], P); */
197 /* Just in case we have only one bp, we initialize ... */
198 /* k_min, l_min and Emin */
199 k_min=i; l_min=j;Emin=c[i][j];
200 for (k=i; k>1 ; k--) {
201 if(k<i) c[k+1][0]=INF;
202 for (l=j; l<=n4-1; l++) {
206 type2 = pair[S1[k]][S2[l]];
207 if (!type2) continue;
208 for (p=k+1; p<= n3 - i_flag && p<k+MAXLOOP-1; p++) {
209 for (q = l-1; q >= 1+j_flag; q--) {
210 if (p-k+l-q-2>MAXLOOP) break;
211 type3=pair[S1[p]][S2[q]];
213 E = E_IntLoop(p-k-1, l-q-1, type2, rtype[type3],SS1[k+1], SS2[l-1], SS1[p-1], SS2[q+1],P);
214 c[k][l] = MIN2(c[k][l], c[p][q]+E);
218 E+=access_s1[i-k+1][i_pos]+access_s2[l-1][j_pos+(l-1)-1];
219 /**if (type2>2) E += P->TerminalAU;
220 ***if (k>1) E += P->dangle5[type2][SS1[k-1]];
221 ***if (l<n4) E += P->dangle3[type2][SS2[l+1]];
222 *** Replaced by the line below
224 E+=E_ExtLoop(type2, (k>1) ? SS1[k-1] : -1, (l<n4) ? SS2[l+1] : -1, P);
227 Emin=E; k_min=k; l_min=l;
232 if(Emin > threshold){
236 for (i=0; i<=n3; i++) free(c[i]);
238 free(S1); free(S2); free(SS1); free(SS2);
241 struc = backtrack_XS(k_min, l_min, access_s1, access_s2, i_flag, j_flag);
246 *** find best dangles combination
248 int dx_5, dx_3, dy_5, dy_3,dGx,dGy,bonus_x;
249 dx_5=0; dx_3=0; dy_5=0; dy_3=0;dGx=0;dGy=0;bonus_x=0;
253 dGx = access_s1[i-k_min+1][i_pos];dx_3=0; dx_5=0;bonus_x=0;
254 dGy = access_s2[l_min-j+1][j_pos + (l_min-1)];
256 mfe.tb=i_pos -9 - i + k_min -1 -dx_5;
257 mfe.te=i_pos -9 -1 + dx_3;
258 mfe.qb=j_pos -9 -1 - dy_5;
259 mfe.qe=j_pos + l_min -3 -9 + dy_3;
260 mfe.ddG=(double) Emin * 0.01;
261 mfe.dG1=(double) dGx*0.01 ;
262 mfe.dG2=(double) dGy*0.01 ;
264 mfe.energy= mfe.ddG - mfe.dG1 - mfe.dG2;
266 mfe.structure = struc;
267 for (i=0; i<=n3; i++) free(c[i]);
269 free(S1); free(S2); free(SS1); free(SS2);
278 PRIVATE char *backtrack_XS(int i, int j, const int **access_s1,const int **access_s2, const int i_flag, const int j_flag) {
279 /* backtrack structure going backwards from i, and forwards from j
280 return structure in bracket notation with & as separator */
281 int k, l, type, type2, E, traced, i0, j0;
282 char *st1, *st2, *struc;
283 st1 = (char *) space(sizeof(char)*(n3+1));
284 st2 = (char *) space(sizeof(char)*(n4+1));
285 i0=i;/*MAX2(i-1,1);*/j0=j;/*MIN2(j+1,n4);*/
286 while (i<=n3-i_flag && j>=1+j_flag) {
287 E = c[i][j]; traced=0;
290 type = pair[S1[i]][S2[j]];
291 if (!type) nrerror("backtrack failed in fold duplex bli");
292 for (k=i+1; k<=n3 && k>i-MAXLOOP-2; k++) {
293 for (l=j-1; l>=1; l--) {
295 if (i-k+l-j-2>MAXLOOP) break;
296 type2 = pair[S1[k]][S2[l]];
297 if (!type2) continue;
298 LE = E_IntLoop(k-i-1, j-l-1, type, rtype[type2], SS1[i+1], SS2[j-1], SS1[k-1], SS2[l+1],P);
299 if (E == c[k][l]+LE) {
310 ***if (i<n3) E -= P->dangle3[rtype[type]][SS1[i+1]];//+access_s1[1][i+1];
311 ***if (j>1) E -= P->dangle5[rtype[type]][SS2[j-1]];//+access_s2[1][j+1];
312 ***if (type>2) E -= P->TerminalAU;
313 *** Changes with line below
316 E -= E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1], P);
318 if (E != P->DuplexInit) {
319 nrerror("backtrack failed in fold duplex bal");
325 struc = (char *) space(i-i0+1+j0-j+1+2);
326 for (k=MAX2(i0,1); k<=i; k++) if (!st1[k-1]) st1[k-1] = '.';
327 for (k=j; k<=j0; k++) if (!st2[k-1]) st2[k-1] = '.';
328 strcpy(struc, st1+MAX2(i0-1,0)); strcat(struc, "&");
329 strcat(struc, st2+j-1);
330 free(st1); free(st2);
336 *** fduplexfold(_XS) computes the interaction based on the plex energy model.
337 *** Faster than duplex approach, but not standard model compliant
338 *** We use the standard matrix (c, in, etc..., because we backtrack)
341 PRIVATE duplexT fduplexfold_XS(const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold,const int il_a, const int il_b, const int b_a, const int b_b) {
343 *** i,j recursion index
344 *** Emin, i_min, j_min MFE position and energy
345 *** mfe struc duplex structure
347 int i, j, Emin, i_min, j_min,l1;
351 *** bext=b_a bulge extension parameter for linear model
352 *** iopen=il_b interior opening for linear model
353 *** iext_s=2*il_a asymmetric extension for interior loop
354 *** iext_ass=60+il_a symmetric extension for interior loop
355 *** min_colonne=INF; max score of a row
357 *** max_pos; position of best hit during recursion on target
358 *** max_pos_j; position of best hit during recursion on query
359 *** temp; temp variable for min_colonne
360 *** min_j_colonne; position of the minimum on query in row j
361 *** max=INF; absolute MFE
362 *** n3,n4 length of target and query
363 *** DJ contains the accessibility penalty for the query sequence
364 *** maxPenalty contains the maximum penalty
369 int iext_s=2*il_a;/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
370 int iext_ass=50+il_a;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
371 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
373 int max_pos;/* get position of the best hit */
381 *** variable initialization
383 n3 = (int) strlen(s1);
384 n4 = (int) strlen(s2);
385 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
386 update_fold_params(); if(P) free(P); P = scale_parameters();
390 *** array initialization
392 c = (int**) space(sizeof(int *) * (n3+1));
393 in = (int**) space (sizeof(int *) * (n3+1));
394 bx = (int**) space (sizeof(int *) * (n3+1));
395 by = (int**) space (sizeof(int *) * (n3+1));
396 inx= (int**) space (sizeof(int *) * (n3+1));
397 iny= (int**) space (sizeof(int *) * (n3+1));
398 for (i=0; i<=n3; i++){
399 c[i] = (int *) space(sizeof(int) * (n4+1));
400 in[i] = (int *) space(sizeof(int) * (n4+1));
401 bx[i] = (int *) space(sizeof(int) * (n4+1));
402 by[i] = (int *) space(sizeof(int) * (n4+1));
403 inx[i]= (int *) space(sizeof(int) * (n4+1));
404 iny[i]= (int *) space(sizeof(int) * (n4+1));
408 in[i][j]=INF;/* no in before 1 */
409 c[i][j] =INF; /* no bulge and no in before n2 */
410 bx[i][j]=INF;/* no bulge before 1 */
412 inx[i][j]=INF;/* no bulge before 1 */
417 *** sequence encoding
421 *** Compute accessibility penalty for the query only once
423 maxPenalty[0]=(int) -1*P->stack[2][2]/2;
424 maxPenalty[1]=(int) -1*P->stack[2][2];
425 maxPenalty[2]=(int) -3*P->stack[2][2]/2;
426 maxPenalty[3]=(int) -2*P->stack[2][2];
429 DJ=(int **) space(4*sizeof(int*));
430 DJ[0]=(int *) space((1+n4)*sizeof(int));
431 DJ[1]=(int *) space((1+n4)*sizeof(int));
432 DJ[2]=(int *) space((1+n4)*sizeof(int));
433 DJ[3]=(int *) space((1+n4)*sizeof(int));
437 int jdiff=j_pos+j-11;
438 DJ[0][j] = access_s2[5][jdiff+4] - access_s2[4][jdiff+4] ;
439 DJ[1][j] = access_s2[5][jdiff+5] - access_s2[4][jdiff+5] + DJ[0][j];
440 DJ[2][j] = access_s2[5][jdiff+6] - access_s2[4][jdiff+6] + DJ[1][j];
441 DJ[3][j] = access_s2[5][jdiff+7] - access_s2[4][jdiff+7] + DJ[2][j];
442 DJ[0][j] = MIN2(DJ[0][j],maxPenalty[0]);
443 DJ[1][j] = MIN2(DJ[1][j],maxPenalty[1]);
444 DJ[2][j] = MIN2(DJ[2][j],maxPenalty[2]);
445 DJ[3][j] = MIN2(DJ[3][j],maxPenalty[3]);
449 *** Start of the recursion
450 *** first and last 10 nucleotides on target and query are dummy nucleotides
451 *** allow to reduce number of if test
455 while(i < i_length) {
457 int idiff=i_pos-(n3-10-i);
458 di1 = access_s1[5][idiff] - access_s1[4][idiff-1];
459 di2 = access_s1[5][idiff-1] - access_s1[4][idiff-2] + di1;
460 di3 = access_s1[5][idiff-2] - access_s1[4][idiff-3] + di2;
461 di4 = access_s1[5][idiff-3] - access_s1[4][idiff-4] + di3;
462 di1=MIN2(di1,maxPenalty[0]);
463 di2=MIN2(di2,maxPenalty[1]);
464 di3=MIN2(di3,maxPenalty[2]);
465 di4=MIN2(di4,maxPenalty[3]);
470 int jdiff=j_pos+j-11;
476 type = pair[S1[i]][S2[j]];
480 c[i][j]=type ? P->DuplexInit + access_s1[1][idiff]+access_s2[1][jdiff] : INF;
482 *** update lin bx by linx liny matrix
484 type2=pair[S2[j+1]][S1[i-1]];
486 *** start/extend interior loop
488 in[i][j]=MIN2(c[i - 1][j+1]+P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s+di1+dj1,
489 in[i - 1][j]+iext_ass + di1);
492 *** start/extend nx1 target
493 *** use same type2 as for in
495 inx[i][j]=MIN2(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s+di1+dj1,
496 inx[i-1][j]+iext_ass+di1);
498 *** start/extend 1xn target
499 *** use same type2 as for in
501 iny[i][j]=MIN2(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s+di1+dj1,
502 iny[i][j+1]+iext_ass+dj1);
504 *** extend interior loop
506 in[i][j]=MIN2(in[i][j],in[i][j+1]+iext_ass + dj1);
507 in[i][j]=MIN2(in[i][j],in[i - 1][j+1]+iext_s + di1 + dj1);
509 *** start/extend bulge target
511 type2=pair[S2[j]][S1[i-1]];
512 bx[i][j]=MIN2(bx[i - 1][j]+bext + di1, c[i - 1][j]+bopen+bext+(type2>2?P->TerminalAU:0) + di1);
514 *** start/extend bulge query
516 type2=pair[S2[j+1]][S1[i]];
517 by[i][j]=MIN2(by[i][j+1]+bext+dj1, c[i][j+1]+bopen+bext+(type2>2?P->TerminalAU:0)+dj1);
519 ***end update recursion
520 ***######################## Start stack extension##############################
523 c[i][j]+=E_ExtLoop(type, SS1[i-1], SS2[j+1],P);
527 if((type2=pair[S1[i-1]][S2[j+1]]))
528 c[i][j]=MIN2(c[i - 1][j+1]+P->stack[rtype[type]][type2]+di1+dj1, c[i][j]);
530 *** 1x0 / 0x1 stack extension
532 if((type2=pair[S1[i-1]][S2[j+2]]))
533 c[i][j]=MIN2(c[i - 1][j+2]+P->bulge[1]+P->stack[rtype[type]][type2]+di1+dj2,c[i][j]);
534 if((type2=pair[S1[i-2]][S2[j+1]]))
535 c[i][j]=MIN2(c[i - 2][j+1]+P->bulge[1]+P->stack[type2][rtype[type]]+di2+dj1,c[i][j]);
537 *** 1x1 / 2x2 stack extension
539 if((type2=pair[S1[i-2]][S2[j+2]]))
540 c[i][j]=MIN2(c[i - 2][j+2]+P->int11[type2][rtype[type]][SS1[i-1]][SS2[j+1]]+di2+dj2, c[i][j]);
541 if((type2 = pair[S1[i-3]][S2[j+3]]))
542 c[i][j]=MIN2(c[i - 3][j+3]+P->int22[type2][rtype[type]][SS1[i-2]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+di3+dj3,c[i][j]);
544 *** 1x2 / 2x1 stack extension
545 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
546 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
548 if((type2 = pair[S1[i-3]][S2[j+2]]))
549 c[i][j]=MIN2(c[i - 3][j+2]+P->int21[rtype[type]][type2][SS2[j+1]][SS1[i-2]][SS1[i-1]]+di3+dj2, c[i][j]);
550 if((type2 = pair[S1[i-2]][S2[j+3]]))
551 c[i][j]=MIN2(c[i - 2][j+3]+P->int21[type2][rtype[type]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+di2+dj3, c[i][j]);
554 *** 2x3 / 3x2 stack extension
556 if((type2 = pair[S1[i-4]][S2[j+3]]))
557 c[i][j]=MIN2(c[i - 4][j+3]+P->internal_loop[5]+P->ninio[2]+
558 P->mismatch23I[type2][SS1[i-3]][SS2[j+2]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+di4+dj3, c[i][j]);
559 if((type2 = pair[S1[i-3]][S2[j+4]]))
560 c[i][j]=MIN2(c[i - 3][j+4]+P->internal_loop[5]+P->ninio[2]+
561 P->mismatch23I[type2][SS1[i-2]][SS2[j+3]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+di3+dj4, c[i][j]);
564 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
569 c[i][j]=MIN2(in[i - 3][j+3]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*iext_s+di3+dj3, c[i][j]);
573 c[i][j]=MIN2(in[i - 4][j+2]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+di4+dj2, c[i][j]);
577 c[i][j]=MIN2(in[i - 2][j+4]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+di2+dj4, c[i][j]);
581 c[i][j]=MIN2(inx[i - 3][j+1]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di3+dj1, c[i][j]);
585 c[i][j]=MIN2(iny[i - 1][j+3]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+dj3+di1, c[i][j]);
590 bAU=(type>2?P->TerminalAU:0);
591 c[i][j]=MIN2(bx[i - 2][j+1]+di2+dj1+bext+bAU, c[i][j]);
595 c[i][j]=MIN2(by[i - 1][j+2]+di1+dj2+bext+bAU, c[i][j]);
597 min_colonne=MIN2(c[i][j]+ E_ExtLoop(rtype[type], SS2[j-1], SS1[i+1], P),min_colonne);
598 if(temp>min_colonne){
601 /* ---------------------------------------------------------------------end update */
603 if(max>=min_colonne){
606 max_pos_j=min_j_colonne;
612 free(S1); free(S2); free(SS1); free(SS2);
613 for (i=0; i<=n3; i++) {
621 for (i=0; i<=3; i++) {
624 free(c);free(in);free(bx);free(by);free(inx);free(iny);free(DJ);
631 int dGe, dGeplex, dGx, dGy;
632 dGe=dGeplex=dGx=dGy=0;
633 /* printf("MAX fduplexfold_XS %d\n",Emin); */
634 struc = fbacktrack_XS(i_min, j_min, access_s1, access_s2, i_pos, j_pos,il_a, il_b,b_a,b_b,&dGe, &dGeplex, &dGx, &dGy);
636 l1 = strchr(struc, '&')-struc;
638 size=strlen(struc)-1;
639 int lengthx; int endx; int lengthy; int endy;
641 lengthx-=(struc[0]=='.'?1:0);
642 lengthx-=(struc[l1-1]=='.'?1:0);
643 endx=(i_pos-(n3-i_min));
645 lengthy-=(struc[size]=='.'?1:0);
646 lengthy-=(struc[l1+1]=='.'?1:0);
647 endy=j_pos+j_min+lengthy -22;
648 if (i_min<n3-10) i_min++;
649 if (j_min>11 ) j_min--;
652 mfe.ddG = (double) Emin*0.01;
653 mfe.structure = struc;
654 mfe.energy_backtrack = (double) dGe * 0.01;
655 mfe.energy = (double) dGeplex * 0.01;
656 mfe.opening_backtrack_x = (double) dGx * 0.01;
657 mfe.opening_backtrack_y = (double) dGy * 0.01;
658 mfe.dG1=(double) access_s1[lengthx][endx+10] * 0.01;
659 mfe.dG2=(double) access_s2[lengthy][endy+10] * 0.01;
660 free(S1); free(S2); free(SS1); free(SS2);
661 for (i=0; i<=n3; i++) {
669 for (i=0; i<=3; i++) {
673 free(c);free(in);free(bx);free(by);free(iny);free(inx);
677 PRIVATE char *fbacktrack_XS(int i, int j, const int** access_s1, const int** access_s2, const int i_pos, const int j_pos,const int il_a, const int il_b, const int b_a, const int b_b, int *dG, int *dGplex, int *dGx, int *dGy) {
678 /* backtrack structure going backwards from i, and forwards from j
679 return structure in bracket notation with & as separator */
680 int k, l, type, type2, E, traced, i0, j0;
681 char *st1, *st2, *struc;
685 int iext_s=2*il_a;/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
686 int iext_ass=50+il_a;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
687 st1 = (char *) space(sizeof(char)*(n3+1));
688 st2 = (char *) space(sizeof(char)*(n4+1));
689 i0=MIN2(i+1,n3-10); j0=MAX2(j-1,11);
691 state=1; /* we start backtracking from a a pair , i.e. c-matrix */
692 /* state 1 -> base pair, c
693 state 2 -> interior loop, in
694 state 3 -> bx loop, bx
695 state 4 -> by loop, by
698 k=i; l=j; /* stores the i,j information for subsequence usage see * */
701 *** (type>2?P->TerminalAU:0)+P->dangle3[rtype[type]][SS1[i+1]]+P->dangle5[rtype[type]][SS2[j-1]];
705 if ((!P) || (fabs(P->temperature - temperature)>1e-6)){
706 update_dfold_params();
707 if(P) free(P); P = scale_parameters();
710 maxPenalty[0]=(int) -1*P->stack[2][2]/2;
711 maxPenalty[1]=(int) -1*P->stack[2][2];
712 maxPenalty[2]=(int) -3*P->stack[2][2]/2;
713 maxPenalty[3]=(int) -2*P->stack[2][2];
715 type = pair[S1[i]][S2[j]];
716 *dG+= E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1] , P);
719 while (i>10 && j<=n4-9 && traced) {
721 idiff=i_pos-(n3-10-i);
722 di1 = access_s1[5][idiff] - access_s1[4][idiff-1];
723 di2 = access_s1[5][idiff-1] - access_s1[4][idiff-2] + di1;
724 di3 = access_s1[5][idiff-2] - access_s1[4][idiff-3] + di2;
725 di4 = access_s1[5][idiff-3] - access_s1[4][idiff-4] + di3;
726 di1=MIN2(di1,maxPenalty[0]);
727 di2=MIN2(di2,maxPenalty[1]);
728 di3=MIN2(di3,maxPenalty[2]);
729 di4=MIN2(di4,maxPenalty[3]);
732 dj1 = access_s2[5][jdiff+4] - access_s2[4][jdiff+4];
733 dj2 = access_s2[5][jdiff+5] - access_s2[4][jdiff+5] + dj1;
734 dj3 = access_s2[5][jdiff+6] - access_s2[4][jdiff+6] + dj2;
735 dj4 = access_s2[5][jdiff+7] - access_s2[4][jdiff+7] + dj3;
736 dj1=MIN2(dj1,maxPenalty[0]);
737 dj2=MIN2(dj2,maxPenalty[1]);
738 dj3=MIN2(dj3,maxPenalty[2]);
739 dj4=MIN2(dj4,maxPenalty[3]);
743 type = pair[S1[i]][S2[j]];
745 bAU=(type>2?P->TerminalAU:0);
746 if(!type) nrerror("backtrack failed in fold duplex");
747 type2=pair[S1[i-1]][S2[j+1]];
748 if(type2 && c[i][j]== (c[i - 1][j+1]+P->stack[rtype[type]][type2]+di1+dj1)){
751 (*dG)+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
752 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
763 type2=pair[S1[i-1]][S2[j+2]];
764 if(type2 && c[i][j]==(c[i - 1][j+2]+P->bulge[1]+P->stack[rtype[type]][type2]+di1+dj2)){
767 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
768 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
779 type2=pair[S1[i-2]][S2[j+1]];
780 if(type2 && c[i][j]==(c[i - 2][j+1]+P->bulge[1]+P->stack[type2][rtype[type]]+di2+dj1)){
783 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
784 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
795 type2=pair[S1[i-2]][S2[j+2]];
796 if(type2 && c[i][j]==(c[i - 2][j+2]+P->int11[type2][rtype[type]][SS1[i-1]][SS2[j+1]]+di2+dj2)){
799 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
800 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
811 type2 = pair[S1[i-3]][S2[j+3]];
812 if(type2 && c[i][j]==(c[i - 3][j+3]+P->int22[type2][rtype[type]][SS1[i-2]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+di3+dj3)){
815 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
816 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
827 type2 = pair[S1[i-3]][S2[j+2]];
828 if(type2 && c[i][j]==(c[i - 3][j+2]+P->int21[rtype[type]][type2][SS2[j+1]][SS1[i-2]][SS1[i-1]]+di3+dj2)){
831 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
832 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
843 type2 = pair[S1[i-2]][S2[j+3]];
844 if(type2 && c[i][j]==(c[i - 2][j+3]+P->int21[type2][rtype[type]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+di2+dj3)){
847 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
848 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
859 type2 = pair[S1[i-4]][S2[j+3]];
860 if(type2 && c[i][j]==(c[i - 4][j+3]+P->internal_loop[5]+P->ninio[2]+
861 P->mismatch23I[type2][SS1[i-3]][SS2[j+2]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+di4+dj3)){
864 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
865 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
876 type2 = pair[S1[i-3]][S2[j+4]];
877 if(type2 && c[i][j]==(c[i - 3][j+4]+P->internal_loop[5]+P->ninio[2]+
878 P->mismatch23I[type2][SS1[i-2]][SS2[j+3]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+di3+dj4)){
881 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
882 *dGplex+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
893 if(c[i][j]==(in[i - 3][j+3]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+di3+dj3+2*iext_s)){
896 *dGplex+=P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*iext_s;
907 if(c[i][j]==(in[i - 4][j+2]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+di4+dj2+iext_s+2*iext_ass)){
910 *dGplex+=P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass;
921 if(c[i][j]==(in[i - 2][j+4]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+di2+dj4+iext_s+2*iext_ass)){
924 *dGplex+=P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass;
935 if(c[i][j]==(inx[i - 3][j+1]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di3+dj1)){
938 *dGplex+=P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di3+dj1;
949 if(c[i][j]==(iny[i - 1][j+3]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di1+dj3)){
952 *dGplex+=P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di1+dj3;
963 if(c[i][j]==(bx[i - 2][j+1]+di2+dj1+bext+bAU)){
977 if(c[i][j]==(by[i - 1][j+2]+di1+dj2+bext+bAU)){
993 if(in[i][j]==(in[i - 1][j+1]+iext_s + di1 + dj1)){
1003 if(in[i][j]==(in[i - 1][j]+iext_ass + di1)){
1011 if(in[i][j]==(in[i][j+1]+iext_ass + dj1)){
1019 type2=pair[SS2[j+1]][SS1[i-1]];
1020 if(type2 && in[i][j]==(c[i - 1][j+1]+P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s + di1 +dj1)){
1021 *dGplex+=P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s;
1022 int temp; temp=k; k=i-1; i=temp;
1023 temp=l; l=j+1; j=temp;
1024 type=pair[S1[i]][S2[j]];
1025 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
1035 if(bx[i][j]==(bx[i - 1][j]+bext+di1)){
1043 type2=pair[S2[j]][S1[i-1]];
1044 if(type2 && bx[i][j]==(c[i - 1][j]+bopen+bext+(type2>2?P->TerminalAU:0)+di1)){
1045 int temp; temp=k; k=i-1; i=temp;
1046 temp=l; l=j; j=temp;
1047 type=pair[S1[i]][S2[j]];
1048 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
1049 *dGplex+=bopen+bext+(type2>2?P->TerminalAU:0);
1058 if(by[i][j]==(by[i][j+1] + bext +dj1)){
1065 type2=pair[S2[j+1]][S1[i]];
1066 if(type2 && by[i][j]==(c[i][j+1]+bopen+bext+(type2>2?P->TerminalAU:0) + dj1)){
1067 int temp; temp=k; k=i; i=temp;
1068 temp=l; l=j+1; j=temp;
1069 type=pair[S1[i]][S2[j]];
1070 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
1071 *dGplex+=bopen+bext+(type2>2?P->TerminalAU:0);
1080 if(inx[i][j]==(inx[i-1][j]+iext_ass+di1)) {
1088 type2=pair[S2[j+1]][S1[i-1]];
1089 if(type2 && inx[i][j]==(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s+di1+dj1)){
1090 *dGplex+=P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s;
1091 int temp; temp=k; k=i-1; i=temp;
1092 temp=l; l=j+1; j=temp;
1093 type=pair[S1[i]][S2[j]];
1094 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
1104 if(iny[i][j]==(iny[i][j+1]+iext_ass+dj1)) {
1112 type2=pair[S2[j+1]][S1[i-1]];
1113 if(type2 && iny[i][j]==(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s+di1+dj1)){
1114 *dGplex+=P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s;
1115 int temp; temp=k; k=i-1; i=temp;
1116 temp=l; l=j+1; j=temp;
1117 type=pair[S1[i]][S2[j]];
1118 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
1130 idiff=i_pos-(n3-10-i);
1134 *** if (i>1) {E -= P->dangle5[type][SS1[i-1]]; *dG+=P->dangle5[type][SS1[i-1]];*dGplex+=P->dangle5[type][SS1[i-1]];}
1135 *** if (j<n4){E -= P->dangle3[type][SS2[j+1]]; *dG+=P->dangle3[type][SS2[j+1]];*dGplex+=P->dangle3[type][SS2[j+1]];}
1136 *** if (type>2) {E -= P->TerminalAU; *dG+=P->TerminalAU;*dGplex+=P->TerminalAU;}
1139 correction = E_ExtLoop(type, (i>1) ? SS1[i-1] : -1, (j<n4) ? SS2[j+1] : -1, P);
1141 *dGplex+=correction;
1144 if (E != P->DuplexInit+access_s1[1][idiff]+access_s2[1][jdiff]) {
1145 nrerror("backtrack failed in second fold duplex");
1149 *dGplex+=P->DuplexInit;
1150 *dGx+=access_s1[1][idiff];
1151 *dGy+=access_s2[1][jdiff];
1158 struc = (char *) space(i0-i+1+j-j0+1+2);
1159 for (k=MAX2(i,1); k<=i0; k++) if (!st1[k-1]) st1[k-1] = '.';
1160 for (k=j0; k<=j; k++) if (!st2[k-1]) st2[k-1] = '.';
1161 strcpy(struc, st1+MAX2(i-1,0));
1163 strcat(struc, st2+j0-1);
1164 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
1165 free(st1); free(st2);
1170 duplexT ** Lduplexfold_XS(const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int threshold, const int alignment_length, const int delta, const int fast, const int il_a, const int il_b, const int b_a, const int b_b)
1173 *** See variable definition in fduplexfold_XS
1180 int iext_ass=50+il_a;
1181 int min_colonne=INF;
1192 *** 1D array corresponding to the standard 2d recursion matrix
1193 *** Makes the computation 20% faster
1197 *** variable initialization
1199 n1 = (int) strlen(s1);
1200 n2 = (int) strlen(s2);
1202 *** Sequence encoding
1205 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
1206 update_dfold_params(); if(P) free(P); P = scale_parameters();
1211 *** Position of the high score on the target and query sequence
1213 position = (int *) space ((delta+n1+3+delta) * sizeof(int));
1214 position_j= (int *) space ((delta+n1+3+delta) * sizeof(int));
1216 *** extension penalty, computed only once, further reduce the computation time
1218 maxPenalty[0]=(int) -1*P->stack[2][2]/2;
1219 maxPenalty[1]=(int) -1*P->stack[2][2];
1220 maxPenalty[2]=(int) -3*P->stack[2][2]/2;
1221 maxPenalty[3]=(int) -2*P->stack[2][2];
1223 DJ=(int **) space(4*sizeof(int*));
1224 DJ[0]=(int *) space(n2*sizeof(int));
1225 DJ[1]=(int *) space(n2*sizeof(int));
1226 DJ[2]=(int *) space(n2*sizeof(int));
1227 DJ[3]=(int *) space(n2*sizeof(int));
1230 DJ[0][j] = access_s2[5][j+4] - access_s2[4][j+4] ;
1231 DJ[1][j] = access_s2[5][j+5] - access_s2[4][j+5] + DJ[0][j];
1232 DJ[2][j] = access_s2[5][j+6] - access_s2[4][j+6] + DJ[1][j];
1233 DJ[3][j] = access_s2[5][j+7] - access_s2[4][j+7] + DJ[2][j];
1234 DJ[0][j] = MIN2(DJ[0][j],maxPenalty[0]);
1235 DJ[1][j] = MIN2(DJ[1][j],maxPenalty[1]);
1236 DJ[2][j] = MIN2(DJ[2][j],maxPenalty[2]);
1237 DJ[3][j] = MIN2(DJ[3][j],maxPenalty[3]);
1240 *** instead of having 4 2-dim arrays we use a unique 1-dim array
1241 *** The mapping 2d -> 1D is done based ont the macro
1242 *** LCI(i,j,l) ((i )*l + j)
1243 *** LINI(i,j,l) ((i + 5)*l + j)
1244 *** LBXI(i,j,l) ((i + 10)*l + j)
1245 *** LBYI(i,j,l) ((i + 15)*l + j)
1246 *** LINIX(i,j,l) ((i + 20)*l + j)
1247 *** LINIY(i,j,l) ((i + 25)*l + j)
1249 *** SA has a length of 5 (number of columns we look back) *
1250 *** * 6 (number of structures we look at) *
1251 *** * length of the sequence
1254 SA=(int *) space(sizeof(int)*5*6*(n2+5));
1255 for(j=n2+4;j>=0;j--) {
1256 SA[(j*30) ]=SA[(j*30)+1 ]=SA[(j*30)+2 ]=SA[(j*30)+3 ]=SA[(j*30)+4 ]=INF;
1257 SA[(j*30)+5 ]=SA[(j*30)+1+5 ]=SA[(j*30)+2+5 ]=SA[(j*30)+3+5 ]=SA[(j*30)+4+5 ]=INF;
1258 SA[(j*30)+10]=SA[(j*30)+1+10]=SA[(j*30)+2+10]=SA[(j*30)+3+10]=SA[(j*30)+4+10]=INF;
1259 SA[(j*30)+15]=SA[(j*30)+1+15]=SA[(j*30)+2+15]=SA[(j*30)+3+15]=SA[(j*30)+4+15]=INF;
1260 SA[(j*30)+20]=SA[(j*30)+1+20]=SA[(j*30)+2+20]=SA[(j*30)+3+20]=SA[(j*30)+4+20]=INF;
1261 SA[(j*30)+25]=SA[(j*30)+1+25]=SA[(j*30)+2+25]=SA[(j*30)+3+25]=SA[(j*30)+4+25]=INF;
1266 while(i < i_length) {
1267 int di1,di2,di3,di4;
1273 di1 = access_s1[5][i] - access_s1[4][i-1];
1274 di2 = access_s1[5][i-1] - access_s1[4][i-2] + di1;
1275 di3 = access_s1[5][i-2] - access_s1[4][i-3] + di2;
1276 di4 = access_s1[5][i-3] - access_s1[4][i-4] + di3;
1277 di1=MIN2(di1,maxPenalty[0]);
1278 di2=MIN2(di2,maxPenalty[1]);
1279 di3=MIN2(di3,maxPenalty[2]);
1280 di4=MIN2(di4,maxPenalty[3]);
1283 int dj1,dj2,dj3,dj4;
1288 int type2, type,temp;
1289 type = pair[S1[i]][S2[j]];
1293 SA[LCI(idx,j,n2)] = type ? P->DuplexInit + access_s1[1][i] + access_s2[1][j] : INF;
1295 *** update lin bx by linx liny matrix
1297 type2=pair[S2[j+1]][S1[i-1]];
1299 *** start/extend interior loop
1301 SA[LINI(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->mismatchI[type2][SS2[j]][SS1[i]]+di1+dj1+iopen+iext_s,
1302 SA[LINI(idx_1,j,n2)]+iext_ass + di1);
1305 *** start/extend nx1 target
1306 *** use same type2 as for in
1308 SA[LINIX(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+di1+dj1+iopen+iext_s,
1309 SA[LINIX(idx_1,j,n2)]+iext_ass + di1);
1311 *** start/extend 1xn target
1312 *** use same type2 as for in
1314 SA[LINIY(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+di1+dj1+iopen+iext_s,
1315 SA[LINIY(idx,j+1,n2)]+iext_ass + dj1);
1317 *** extend interior loop
1319 SA[LINI(idx,j,n2)]=MIN2(SA[LINI(idx,j,n2)],SA[LINI(idx,j+1,n2)]+iext_ass + dj1);
1320 SA[LINI(idx,j,n2)]=MIN2(SA[LINI(idx,j,n2)],SA[LINI(idx_1,j+1,n2)]+iext_s + di1 + dj1);
1322 *** start/extend bulge target
1324 type2=pair[S2[j]][S1[i-1]];
1325 SA[LBXI(idx,j,n2)]=MIN2(SA[LBXI(idx_1,j,n2)]+bext + di1, SA[LCI(idx_1,j,n2)]+bopen+bext+(type2>2?P->TerminalAU:0) + di1);
1327 *** start/extend bulge query
1329 type2=pair[S2[j+1]][S1[i]];
1330 SA[LBYI(idx,j,n2)]=MIN2(SA[LBYI(idx,j+1,n2)]+bext + dj1 , SA[LCI(idx,j+1,n2)]+bopen+bext+(type2>2?P->TerminalAU:0)+ dj1);
1332 ***end update recursion
1334 if(!type){continue;}
1338 SA[LCI(idx,j,n2)]+= E_ExtLoop(type, SS1[i-1] , SS2[j+1], P);
1342 if((type2=pair[S1[i-1]][S2[j+1]]))
1343 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->stack[rtype[type]][type2]+di1+dj1, SA[LCI(idx,j,n2)]);
1345 *** 1x0 / 0x1 stack extension
1347 if((type2=pair[S1[i-1]][S2[j+2]]))
1348 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+2,n2)]+P->bulge[1]+P->stack[rtype[type]][type2]+di1+dj2, SA[LCI(idx,j,n2)]);
1349 if((type2=pair[S1[i-2]][S2[j+1]]))
1350 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_2,j+1,n2)]+P->bulge[1]+P->stack[type2][rtype[type]]+di2+dj1, SA[LCI(idx,j,n2)]);
1352 *** 1x1 / 2x2 stack extension
1354 if((type2=pair[S1[i-2]][S2[j+2]]))
1355 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_2,j+2,n2)]+P->int11[type2][rtype[type]][SS1[i-1]][SS2[j+1]]+di2+dj2, SA[LCI(idx,j,n2)]);
1356 if((type2 = pair[S1[i-3]][S2[j+3]]))
1357 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_3,j+3,n2)]+P->int22[type2][rtype[type]][SS1[i-2]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+di3+dj3, SA[LCI(idx,j,n2)]);
1359 *** 1x2 / 2x1 stack extension
1360 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
1361 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
1363 if((type2 = pair[S1[i-3]][S2[j+2]]))
1364 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_3,j+2,n2)]+P->int21[rtype[type]][type2][SS2[j+1]][SS1[i-2]][SS1[i-1]]+di3+dj2, SA[LCI(idx,j,n2)]);
1365 if((type2 = pair[S1[i-2]][S2[j+3]]))
1366 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_2,j+3,n2)]+P->int21[type2][rtype[type]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+di2+dj3, SA[LCI(idx,j,n2)]);
1368 *** 2x3 / 3x2 stack extension
1370 if((type2 = pair[S1[i-4]][S2[j+3]]))
1371 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_4,j+3,n2)]+P->internal_loop[5]+P->ninio[2]+
1372 P->mismatch23I[type2][SS1[i-3]][SS2[j+2]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+di4+dj3, SA[LCI(idx,j,n2)]);
1373 if((type2 = pair[S1[i-3]][S2[j+4]]))
1374 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_3,j+4,n2)]+P->internal_loop[5]+P->ninio[2]+
1375 P->mismatch23I[type2][SS1[i-2]][SS2[j+3]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+di3+dj4, SA[LCI(idx,j,n2)]);
1377 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
1382 SA[LCI(idx,j,n2)]=MIN2(SA[LINI(idx_3,j+3,n2)]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*iext_s+di3+dj3,SA[LCI(idx,j,n2)]);
1386 SA[LCI(idx,j,n2)]=MIN2(SA[LINI(idx_4,j+2,n2)]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+di4+dj2, SA[LCI(idx,j,n2)]);
1390 SA[LCI(idx,j,n2)]=MIN2(SA[LINI(idx_2,j+4,n2)]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+di2+dj4, SA[LCI(idx,j,n2)]);
1394 SA[LCI(idx,j,n2)]=MIN2(SA[LINIX(idx_3,j+1,n2)]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+di3+dj1, SA[LCI(idx,j,n2)]);
1398 SA[LCI(idx,j,n2)]=MIN2(SA[LINIY(idx_1,j+3,n2)]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+dj3+di1, SA[LCI(idx,j,n2)]);
1403 bAU=(type>2?P->TerminalAU:0);
1404 SA[LCI(idx,j,n2)]=MIN2(SA[LBXI(idx_2,j+1,n2)]+di2+dj1+bext+bAU,SA[LCI(idx,j,n2)]);
1408 SA[LCI(idx,j,n2)]=MIN2(SA[LBYI(idx_1,j+2,n2)]+di1+dj2+bext+bAU,SA[LCI(idx,j,n2)]);
1411 *** (type>2?P->TerminalAU:0)+
1412 *** P->dangle3[rtype[type]][SS1[i+1]]+
1413 *** P->dangle5[rtype[type]][SS2[j-1]],
1415 min_colonne=MIN2(SA[LCI(idx,j,n2)]+E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1] , P), min_colonne);
1417 if(temp>min_colonne){
1421 /* ---------------------------------------------------------------------end update */
1423 if(max>=min_colonne){
1426 max_pos_j=min_j_colonne;
1428 position[i+delta]=min_colonne;min_colonne=INF;
1429 position_j[i+delta]=min_j_colonne;
1432 /* printf("MAX: %d",max); */
1433 free(S1); free(S2); free(SS1); free(SS2);free(SA);
1435 find_max_XS(position, position_j, delta, threshold, alignment_length, s1, s2, access_s1, access_s2, fast,il_a, il_b,b_a, b_b);
1438 plot_max_XS(max, max_pos, max_pos_j, alignment_length, s1, s2, access_s1, access_s2,fast, il_a, il_b, b_a, b_b);
1440 for (i=0; i<=3; i++) {
1449 PRIVATE void find_max_XS(const int *position, const int *position_j,const int delta, const int threshold, const int alignment_length, const char *s1, const char *s2, const int **access_s1, const int **access_s2, const int fast,const int il_a, const int il_b, const int b_a, const int b_b){
1455 printf("%d %d\n",pos,position[pos+delta]);
1457 if(position[pos+delta]<(threshold)){
1459 search_range=delta+1;
1460 while(--search_range){
1461 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1462 temp_min=search_range;
1467 max_pos_j=position_j[pos+delta];
1469 max=position[pos+delta];
1470 printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100);
1471 pos=MAX2(10,pos-delta);
1479 if(position[pos+delta]<(threshold)){
1481 search_range=delta+1;
1482 while(--search_range){
1483 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1484 temp_min=search_range;
1489 max_pos_j=position_j[pos+delta];
1490 /* max_pos_j und pos entsprechen die realen position
1491 in der erweiterten sequenz.
1492 pos=1 -> position 1 in the sequence (and not 0 like in C)
1493 max_pos_j -> position 1 in the sequence ( not 0 like in C)
1495 int begin_t=MAX2(11, pos-alignment_length+1);/* 10 */
1496 int end_t =MIN2(n1-10, pos+1);
1497 int begin_q=MAX2(11, max_pos_j-1); /* 10 */
1498 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
1499 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2 + 20));
1500 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2 + 20));
1501 strcpy(s3,"NNNNNNNNNN");strcpy(s4,"NNNNNNNNNN");
1502 strncat(s3, (s1+begin_t-1), end_t - begin_t +1);
1503 strncat(s4, (s2+begin_q-1) , end_q - begin_q +1);
1504 strcat(s3,"NNNNNNNNNN");strcat(s4,"NNNNNNNNNN");
1505 s3[end_t -begin_t +1 +20 ]='\0';
1506 s4[end_q -begin_q +1 +20]='\0';
1508 test = fduplexfold_XS(s3, s4, access_s1, access_s2, end_t, begin_q,threshold, il_a, il_b, b_a, b_b);
1509 if(test.energy * 100 < threshold){
1510 int l1=strchr(test.structure, '&')-test.structure;
1511 printf(" %s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f) [%5.2f = %5.2f + %5.2f + %5.2f] %d \n", test.structure,
1512 begin_t-10+test.i-l1-10,
1513 begin_t-10+test.i-1-10,
1514 begin_q-10 + test.j-1-10 ,
1515 (begin_q -11) + test.j + strlen(test.structure)-l1-2-10,
1516 test.ddG, test.energy, test.opening_backtrack_x, test.opening_backtrack_y,
1517 test.energy_backtrack + test.dG1 + test.dG2, test.energy_backtrack , test.dG1 , test.dG2,
1518 position[pos+delta]);
1519 pos=MAX2(10,pos-delta);
1521 free(test.structure);
1531 while( pos-- > 10 ){
1533 if(position[pos+delta]<(threshold)){
1535 search_range=delta+1;
1536 while(--search_range){
1537 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1538 temp_min=search_range;
1541 pos-=temp_min; /* position on i */
1543 max_pos_j=position_j[pos+delta]; /* position on j */
1544 int begin_t=MAX2(11,pos-alignment_length);
1545 int end_t =MIN2(n1-10, pos+1);
1546 int begin_q=MAX2(11,max_pos_j-1);
1547 int end_q =MIN2(n2-10,max_pos_j+alignment_length-1);
1550 i_flag = (end_t == pos+1?1:0);
1551 j_flag = (begin_q == max_pos_j-1?1:0);
1552 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
1553 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
1555 strncpy(s3, (s1+begin_t), end_t - begin_t+1);
1556 strncpy(s4, (s2+begin_q) , end_q - begin_q+1);
1557 s3[end_t -begin_t +1 ]='\0';
1558 s4[end_q -begin_q +1 ]='\0';
1560 test = duplexfold_XS(s3,s4,access_s1,access_s2,pos, max_pos_j,threshold,i_flag,j_flag);
1561 if(test.energy * 100 < threshold){
1562 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
1563 test.tb,test.te,test.qb,test.qe, test.ddG, test.energy, test.dG1, test.dG2);
1564 pos=MAX2(10,pos-delta);
1567 free(test.structure);
1574 PRIVATE int compare(const void *sub1, const void *sub2) {
1576 if (((duplexT *) sub1)->ddG > ((duplexT *) sub2)->ddG)
1578 if (((duplexT *) sub1)->ddG < ((duplexT *) sub2)->ddG)
1580 d = ((duplexT *) sub1)->i - ((duplexT *) sub2)->i;
1582 return ((duplexT *) sub1)->j - ((duplexT *) sub2)->j;
1586 PRIVATE void plot_max_XS(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int ** access_s1, const int ** access_s2, const int fast,const int il_a, const int il_b, const int b_a, const int b_b)
1589 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos-3, max_pos_j, ((double)max)/100);
1592 int begin_t=MAX2(11, max_pos-alignment_length+1);/* 10 */
1593 int end_t =MIN2(n1-10, max_pos+1);
1594 int begin_q=MAX2(11, max_pos_j-1); /* 10 */
1595 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
1596 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2 + 20));
1597 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2 + 20));
1598 strcpy(s3,"NNNNNNNNNN");strcpy(s4,"NNNNNNNNNN");
1599 strncat(s3, (s1+begin_t-1), end_t - begin_t +1);
1600 strncat(s4, (s2+begin_q-1) , end_q - begin_q +1);
1601 strcat(s3,"NNNNNNNNNN");strcat(s4,"NNNNNNNNNN");
1602 s3[end_t -begin_t +1 +20 ]='\0';
1603 s4[end_q -begin_q +1 +20]='\0';
1605 test = fduplexfold_XS(s3, s4, access_s1, access_s2, end_t, begin_q, INF, il_a, il_b, b_a, b_b);
1606 int l1=strchr(test.structure, '&')-test.structure;
1607 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f) [%5.2f = %5.2f + %5.2f + %5.2f] \n", test.structure,
1608 begin_t-10+test.i-l1-10,
1609 begin_t-10+test.i-1-10,
1610 begin_q-10 + test.j-1-10 ,
1611 (begin_q -11) + test.j + strlen(test.structure)-l1-2-10,
1612 test.ddG, test.energy, test.opening_backtrack_x, test.opening_backtrack_y,
1613 test.energy_backtrack + test.dG1 + test.dG2, test.energy_backtrack , test.dG1 , test.dG2);
1615 free(test.structure);
1618 int begin_t=MAX2(11,max_pos-alignment_length);
1619 int end_t =MIN2(n1-10, max_pos+1);
1620 int begin_q=MAX2(11, max_pos_j-1);
1621 int end_q =MIN2(n2-10,max_pos_j+alignment_length-1);
1624 i_flag = (end_t == max_pos+1?1:0);
1625 j_flag = (begin_q == max_pos_j-1?1:0);
1626 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2)); /* +1 for \0 +1 for distance */
1627 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
1629 strncpy(s3, (s1+begin_t-1), end_t - begin_t+1);/* -1 to go from */
1630 strncpy(s4, (s2+begin_q-1) , end_q - begin_q+1);/* -1 to go from */
1631 s3[end_t -begin_t +1 ]='\0';/* */
1632 s4[end_q -begin_q +1 ]='\0';
1634 test = duplexfold_XS(s3,s4,access_s1,access_s2,max_pos, max_pos_j,INF,i_flag,j_flag);
1635 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
1636 test.tb,test.te,test.qb,test.qe, test.ddG, test.energy, test.dG1, test.dG2);
1637 free(s3);free(s4);free(test.structure);
1642 /*---------------------------------------------------------duplexfold----------------------------------------------------------------------------------*/
1645 PRIVATE duplexT duplexfold(const char *s1, const char *s2, const int extension_cost) {
1646 int i, j, l1, Emin=INF, i_min=0, j_min=0;
1650 n3 = (int) strlen(s1);
1651 n4 = (int) strlen(s2);
1653 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
1654 update_fold_params(); if(P) free(P); P = scale_parameters();
1658 c = (int **) space(sizeof(int *) * (n3+1));
1659 for (i=0; i<=n3; i++) c[i] = (int *) space(sizeof(int) * (n4+1));
1660 encode_seqs(s1, s2);
1661 for (i=1; i<=n3; i++) {
1662 for (j=n4; j>0; j--) {
1663 int type, type2, E, k,l;
1664 type = pair[S1[i]][S2[j]];
1665 c[i][j] = type ? P->DuplexInit +2 * extension_cost: INF;
1666 if (!type) continue;
1668 *** if (i>1) c[i][j] += P->dangle5[type][SS1[i-1]]+ extension_cost;
1669 *** if (j<n4) c[i][j] += P->dangle3[type][SS2[j+1]]+ extension_cost;
1670 *** if (type>2) c[i][j] += P->TerminalAU;
1672 c[i][j] += E_ExtLoop(type, (i>1) ? SS1[i-1] : -1, (j<n4) ? SS2[j+1] : -1, P);
1673 for (k=i-1; k>0 && k>i-MAXLOOP-2; k--) {
1674 for (l=j+1; l<=n4; l++) {
1675 if (i-k+l-j-2>MAXLOOP) break;
1676 type2 = pair[S1[k]][S2[l]];
1677 if (!type2) continue;
1678 E = E_IntLoop(i-k-1, l-j-1, type2, rtype[type],
1679 SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P)+(i-k+l-j)*extension_cost;
1680 c[i][j] = MIN2(c[i][j], c[k][l]+E);
1685 *** if (i<n3) E += P->dangle3[rtype[type]][SS1[i+1]]+extension_cost;
1686 *** if (j>1) E += P->dangle5[rtype[type]][SS2[j-1]]+extension_cost;
1687 *** if (type>2) E += P->TerminalAU;
1690 E += E_ExtLoop(rtype[type], (j > 1) ? SS2[j-1] : -1, (i<n3) ? SS1[i+1] : -1, P);
1692 Emin=E; i_min=i; j_min=j;
1696 struc = backtrack(i_min, j_min, extension_cost);
1697 if (i_min<n3) i_min++;
1698 if (j_min>1 ) j_min--;
1699 l1 = strchr(struc, '&')-struc;
1701 size=strlen(struc)-1;
1702 Emin-= size * (extension_cost);
1705 mfe.energy = (double) Emin/100.;
1706 mfe.structure = struc;
1707 for (i=0; i<=n3; i++) free(c[i]);
1709 free(S1); free(S2); free(SS1); free(SS2);
1713 PRIVATE char *backtrack(int i, int j, const int extension_cost) {
1714 /* backtrack structure going backwards from i, and forwards from j
1715 return structure in bracket notation with & as separator */
1716 int k, l, type, type2, E, traced, i0, j0;
1717 char *st1, *st2, *struc;
1719 st1 = (char *) space(sizeof(char)*(n3+1));
1720 st2 = (char *) space(sizeof(char)*(n4+1));
1722 i0=MIN2(i+1,n3); j0=MAX2(j-1,1);
1724 while (i>0 && j<=n4) {
1725 E = c[i][j]; traced=0;
1728 type = pair[S1[i]][S2[j]];
1729 if (!type) nrerror("backtrack failed in fold duplex");
1730 for (k=i-1; k>0 && k>i-MAXLOOP-2; k--) {
1731 for (l=j+1; l<=n4; l++) {
1733 if (i-k+l-j-2>MAXLOOP) break;
1734 type2 = pair[S1[k]][S2[l]];
1735 if (!type2) continue;
1736 LE = E_IntLoop(i-k-1, l-j-1, type2, rtype[type],
1737 SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P)+(i-k+l-j)*extension_cost;
1738 if (E == c[k][l]+LE) {
1748 E -= E_ExtLoop(type, (i>1) ? SS1[i-1] : -1, (j<n4) ? SS2[j+1] : -1, P);
1750 *** if (i>1) E -= P->dangle5[type][SS1[i-1]]+extension_cost;
1751 *** if (j<n4) E -= P->dangle3[type][SS2[j+1]]+extension_cost;
1752 *** if (type>2) E -= P->TerminalAU;
1754 if (E != P->DuplexInit+2*extension_cost) {
1755 nrerror("backtrack failed in fold duplex");
1762 struc = (char *) space(i0-i+1+j-j0+1+2);
1763 for (k=MAX2(i,1); k<=i0; k++) if (!st1[k-1]) st1[k-1] = '.';
1764 for (k=j0; k<=j; k++) if (!st2[k-1]) st2[k-1] = '.';
1765 strcpy(struc, st1+MAX2(i-1,0));
1767 strcat(struc, st2+j0-1);
1768 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
1769 free(st1); free(st2);
1773 PRIVATE duplexT fduplexfold(const char *s1, const char *s2, const int extension_cost,const int il_a, const int il_b, const int b_a, const int b_b) {
1774 int i, j, Emin, i_min, j_min,l1;
1778 int bext=b_a+extension_cost;
1780 int iext_s=2*(il_a+extension_cost);/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
1781 int iext_ass=50+il_a+extension_cost;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
1782 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
1784 int max_pos;/* get position of the best hit */
1789 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
1791 n3 = (int) strlen(s1);
1792 n4 = (int) strlen(s2);
1793 /* delta_check is the minimal distance allowed for two hits to be accepted */
1794 /* if both hits are closer, reject the smaller ( in term of position) hits */
1795 /* i want to implement a function that, given a position in a long sequence and a small sequence, */
1796 /* duplexfold them at this position and report the result at the command line */
1797 /* for this i first need to rewrite backtrack in order to remove the printf functio */
1798 /* END OF DEFINITION FOR NEEDED SUBOPT DATA */
1799 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
1800 update_fold_params(); if(P) free(P); P = scale_parameters();
1803 /*local c array initialization---------------------------------------------*/
1804 c = (int**) space(sizeof(int *) * (n3+1));
1805 in = (int**) space (sizeof(int *) * (n3+1));
1806 bx = (int**) space (sizeof(int *) * (n3+1));
1807 by = (int**) space (sizeof(int *) * (n3+1));
1808 inx= (int**) space (sizeof(int *) * (n3+1));
1809 iny= (int**) space (sizeof(int *) * (n3+1));
1810 for (i=0; i<=n3; i++){
1811 c[i] = (int *) space(sizeof(int) * (n4+1));
1812 in[i] = (int *) space(sizeof(int) * (n4+1));
1813 bx[i] = (int *) space(sizeof(int) * (n4+1));
1814 by[i] = (int *) space(sizeof(int) * (n4+1));
1815 inx[i] = (int *) space(sizeof(int) * (n4+1));
1816 iny[i] = (int *) space(sizeof(int) * (n4+1));
1818 /*-------------------------------------------------------------------------*/
1819 /*end of array initialisation----------------------------------*/
1820 /*maybe int *** would be better*/
1822 /* ------------------------------------------matrix initialisierung */
1823 for(i=0; i<n3; i++){
1824 for(j=0; j<n4; j++){
1825 in[i][j]=INF;/* no in before 1 */
1826 c[i][j] =INF; /* no bulge and no in before n2 */
1827 bx[i][j]=INF;/* no bulge before 1 */
1829 inx[i][j]=INF;/* no bulge before 1 */
1834 /*--------------------------------------------------------local array*/
1837 /* -------------------------------------------------------------matrix initialisierung */
1840 while(i < i_length) {
1845 type = pair[S1[i]][S2[j]];
1849 c[i][j]=type ? P->DuplexInit + 2*extension_cost : INF;
1851 *** update lin bx by linx liny matrix
1853 type2=pair[S2[j+1]][S1[i-1]];
1855 *** start/extend interior loop
1857 in[i][j]=MIN2(c[i - 1][j+1]+P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s, in[i - 1][j]+iext_ass);
1859 *** start/extend nx1 target
1860 *** use same type2 as for in
1862 inx[i][j]=MIN2(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s,
1863 inx[i-1][j]+iext_ass);
1865 *** start/extend 1xn target
1866 *** use same type2 as for in
1868 iny[i][j]=MIN2(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s,
1869 iny[i][j+1]+iext_ass);
1871 *** extend interior loop
1873 in[i][j]=MIN2(in[i][j],in[i][j+1]+iext_ass);
1874 in[i][j]=MIN2(in[i][j],in[i - 1][j+1]+iext_s);
1876 *** start/extend bulge target
1878 type2=pair[S2[j]][S1[i-1]];
1879 bx[i][j]=MIN2(bx[i - 1][j]+bext, c[i - 1][j]+bopen+bext+(type2>2?P->TerminalAU:0));
1881 *** start/extend bulge query
1883 type2=pair[S2[j+1]][S1[i]];
1884 by[i][j]=MIN2(by[i][j+1]+bext, c[i][j+1]+bopen+bext+(type2>2?P->TerminalAU:0));
1886 ***end update recursion
1887 ***######################## Start stack extension##############################
1889 if(!type){continue;}
1890 c[i][j]+=E_ExtLoop(type, SS1[i-1] , SS2[j+1], P) + 2*extension_cost;
1894 if((type2=pair[S1[i-1]][S2[j+1]]))
1895 c[i][j]=MIN2(c[i - 1][j+1]+P->stack[rtype[type]][type2]+2*extension_cost, c[i][j]);
1897 *** 1x0 / 0x1 stack extension
1899 type2=pair[S1[i-1]][S2[j+2]];
1900 c[i][j]=MIN2(c[i - 1][j+2]+P->bulge[1]+P->stack[rtype[type]][type2]+3*extension_cost,c[i][j]);
1901 type2=pair[S1[i-2]][S2[j+1]];
1902 c[i][j]=MIN2(c[i - 2][j+1]+P->bulge[1]+P->stack[type2][rtype[type]]+3*extension_cost,c[i][j]);
1904 *** 1x1 / 2x2 stack extension
1906 type2=pair[S1[i-2]][S2[j+2]];
1907 c[i][j]=MIN2(c[i - 2][j+2]+P->int11[type2][rtype[type]][SS1[i-1]][SS2[j+1]]+4*extension_cost, c[i][j]);
1908 type2 = pair[S1[i-3]][S2[j+3]];
1909 c[i][j]=MIN2(c[i - 3][j+3]+P->int22[type2][rtype[type]][SS1[i-2]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+6*extension_cost,c[i][j]);
1911 *** 1x2 / 2x1 stack extension
1912 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
1913 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
1915 type2 = pair[S1[i-3]][S2[j+2]];
1916 c[i][j]=MIN2(c[i - 3][j+2]+P->int21[rtype[type]][type2][SS2[j+1]][SS1[i-2]][SS1[i-1]]+5*extension_cost, c[i][j]);
1917 type2 = pair[S1[i-2]][S2[j+3]];
1918 c[i][j]=MIN2(c[i - 2][j+3]+P->int21[type2][rtype[type]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+5*extension_cost, c[i][j]);
1921 *** 2x3 / 3x2 stack extension
1923 if((type2 = pair[S1[i-4]][S2[j+3]]))
1924 c[i][j]=MIN2(c[i - 4][j+3]+P->internal_loop[5]+P->ninio[2]+
1925 P->mismatch23I[type2][SS1[i-3]][SS2[j+2]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+7*extension_cost, c[i][j]);
1926 if((type2 = pair[S1[i-3]][S2[j+4]]))
1927 c[i][j]=MIN2(c[i - 3][j+4]+P->internal_loop[5]+P->ninio[2]+
1928 P->mismatch23I[type2][SS1[i-2]][SS2[j+3]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+7*extension_cost, c[i][j]);
1930 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
1935 c[i][j]=MIN2(in[i - 3][j+3]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*iext_s+2*extension_cost, c[i][j]);
1939 c[i][j]=MIN2(in[i - 4][j+2]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost, c[i][j]);
1943 c[i][j]=MIN2(in[i - 2][j+4]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost, c[i][j]);
1947 c[i][j]=MIN2(inx[i - 3][j+1]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost, c[i][j]);
1951 c[i][j]=MIN2(iny[i - 1][j+3]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost, c[i][j]);
1956 bAU=(type>2?P->TerminalAU:0);
1957 c[i][j]=MIN2(bx[i - 2][j+1]+2*extension_cost+bext+bAU, c[i][j]);
1961 c[i][j]=MIN2(by[i - 1][j+2]+2*extension_cost+bext+bAU, c[i][j]);
1963 min_colonne=MIN2(c[i][j]+E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1] , P) + 2*extension_cost, min_colonne);
1964 if(temp>min_colonne){
1967 /* ---------------------------------------------------------------------end update */
1969 if(max>=min_colonne){
1972 max_pos_j=min_j_colonne;
1981 struc = fbacktrack(i_min, j_min, extension_cost, il_a, il_b, b_a, b_b,&dGe);
1982 if (i_min<n3-10) i_min++;
1983 if (j_min>11 ) j_min--;
1984 l1 = strchr(struc, '&')-struc;
1986 size=strlen(struc)-1;
1987 Emin-= size * (extension_cost);
1990 mfe.energy = (double) Emin/100.;
1991 mfe.energy_backtrack = (double) dGe/100.;
1992 mfe.structure = struc;
1993 free(S1); free(S2); free(SS1); free(SS2);
1994 for (i=0; i<=n3; i++) {
2002 free(c);free(in);free(bx);free(by);free(inx);free(iny);
2007 PRIVATE char *fbacktrack(int i, int j, const int extension_cost,const int il_a, const int il_b, const int b_a, const int b_b, int *dG) {
2008 /* backtrack structure going backwards from i, and forwards from j
2009 return structure in bracket notation with & as separator */
2010 int k, l, type, type2, E, traced, i0, j0;
2011 char *st1, *st2, *struc;
2013 int bext=b_a+extension_cost;
2015 int iext_s=2*(il_a+extension_cost);/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
2016 int iext_ass=50+il_a+extension_cost;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
2017 st1 = (char *) space(sizeof(char)*(n3+1));
2018 st2 = (char *) space(sizeof(char)*(n4+1));
2019 i0=MIN2(i+1,n3-10); j0=MAX2(j-1,11);
2021 state=1; /* we start backtracking from a a pair , i.e. c-matrix */
2022 /* state 1 -> base pair, c
2023 state 2 -> interior loop, in
2024 state 3 -> bx loop, bx
2025 state 4 -> by loop, by
2029 type=pair[S1[i]][S2[j]];
2030 *dG+=E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1] , P);
2031 /* (type>2?P->TerminalAU:0)+P->dangle3[rtype[type]][SS1[i+1]]+P->dangle5[rtype[type]][SS2[j-1]]; */
2032 while (i>10 && j<=n4-9 && traced) {
2036 type = pair[S1[i]][S2[j]];
2038 bAU=(type>2?P->TerminalAU:0);
2039 if(!type) nrerror("backtrack failed in fold duplex");
2040 type2=pair[S1[i-1]][S2[j+1]];
2041 if(type2 && c[i][j]== (c[i - 1][j+1]+P->stack[rtype[type]][type2]+2*extension_cost)){
2044 (*dG)+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2053 type2=pair[S1[i-1]][S2[j+2]];
2054 if(type2 && c[i][j]==(c[i - 1][j+2]+P->bulge[1]+P->stack[rtype[type]][type2]+3*extension_cost)){
2057 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2066 type2=pair[S1[i-2]][S2[j+1]];
2067 if(type2 && c[i][j]==(c[i - 2][j+1]+P->bulge[1]+P->stack[type2][rtype[type]]+3*extension_cost)){
2070 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2079 type2=pair[S1[i-2]][S2[j+2]];
2080 if(type2 && c[i][j]==(c[i - 2][j+2]+P->int11[type2][rtype[type]][SS1[i-1]][SS2[j+1]]+4*extension_cost)){
2083 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2092 type2 = pair[S1[i-3]][S2[j+3]];
2093 if(type2 && c[i][j]==(c[i - 3][j+3]+P->int22[type2][rtype[type]][SS1[i-2]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+6*extension_cost)){
2096 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2105 type2 = pair[S1[i-3]][S2[j+2]];
2106 if(type2 && c[i][j]==(c[i - 3][j+2]+P->int21[rtype[type]][type2][SS2[j+1]][SS1[i-2]][SS1[i-1]]+5*extension_cost)){
2109 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2118 type2 = pair[S1[i-2]][S2[j+3]];
2119 if(type2 && c[i][j]==(c[i - 2][j+3]+P->int21[type2][rtype[type]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+5*extension_cost)){
2122 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2131 type2 = pair[S1[i-4]][S2[j+3]];
2132 if(type2 && c[i][j]==(c[i - 4][j+3]+P->internal_loop[5]+P->ninio[2]+
2133 P->mismatch23I[type2][SS1[i-3]][SS2[j+2]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+7*extension_cost)){
2136 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2145 type2 = pair[S1[i-3]][S2[j+4]];
2146 if(type2 && c[i][j]==(c[i - 3][j+4]+P->internal_loop[5]+P->ninio[2]+
2147 P->mismatch23I[type2][SS1[i-2]][SS2[j+3]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+7*extension_cost)){
2150 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2159 if(c[i][j]==(in[i - 3][j+3]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*extension_cost+2*iext_s)){
2170 if(c[i][j]==(in[i - 4][j+2]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost)){
2181 if(c[i][j]==(in[i - 2][j+4]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost)){
2192 if(c[i][j]==(inx[i - 3][j+1]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost)){
2203 if(c[i][j]==(iny[i - 1][j+3]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost)){
2214 if(c[i][j]==(bx[i - 2][j+1]+2*extension_cost+bext+bAU)){
2225 if(c[i][j]==(by[i - 1][j+2]+2*extension_cost+bext+bAU)){
2238 if(in[i][j]==(in[i - 1][j+1]+iext_s)){
2245 if(in[i][j]==(in[i - 1][j]+iext_ass)){
2251 if(in[i][j]==(in[i][j+1]+iext_ass)){
2257 type2=pair[S2[j+1]][S1[i-1]];
2258 if(type2 && in[i][j]==(c[i - 1][j+1]+P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s)){
2259 int temp; temp=k; k=i-1; i=temp;
2260 temp=l; l=j+1; j=temp;
2261 type=pair[S1[i]][S2[j]];
2262 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2270 if(bx[i][j]==(bx[i - 1][j]+bext)){
2276 type2=pair[S2[j]][S1[i-1]];
2277 if(type2 && bx[i][j]==(c[i - 1][j]+bopen+bext+(type2>2?P->TerminalAU:0))){
2278 int temp; temp=k; k=i-1; i=temp;
2279 temp=l; l=j; j=temp;
2280 type=pair[S1[i]][S2[j]];
2281 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2289 if(by[i][j]==(by[i][j+1] + bext)){
2296 type2=pair[S2[j+1]][S1[i]];
2297 if(type2 && by[i][j]==(c[i][j+1]+bopen+bext+(type2>2?P->TerminalAU:0))){
2298 int temp; temp=k; k=i; i=temp;
2299 temp=l; l=j+1; j=temp;
2300 type=pair[S1[i]][S2[j]];
2301 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2309 if(inx[i][j]==(inx[i-1][j]+iext_ass)) {
2315 type2=pair[S2[j+1]][S1[i-1]];
2316 if(type2 && inx[i][j]==(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s)){
2317 int temp; temp=k; k=i-1; i=temp;
2318 temp=l; l=j+1; j=temp;
2319 type=pair[S1[i]][S2[j]];
2320 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2328 if(iny[i][j]==(iny[i][j+1]+iext_ass)) {
2334 type2=pair[S2[j+1]][S1[i-1]];
2335 if(type2 && iny[i][j]==(c[i-1][j+1]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s)){
2336 int temp; temp=k; k=i-1; i=temp;
2337 temp=l; l=j+1; j=temp;
2338 type=pair[S1[i]][S2[j]];
2339 *dG+=E_IntLoop(i-k-1, l-j-1, type2, rtype[type],SS1[k+1], SS2[l-1], SS1[i-1], SS2[j+1],P);
2351 *** if (i>1) {E -= P->dangle5[type][SS1[i-1]]+extension_cost; *dG+=P->dangle5[type][SS1[i-1]];}
2352 *** if (j<n4){E -= P->dangle3[type][SS2[j+1]]+extension_cost; *dG+=P->dangle3[type][SS2[j+1]];}
2353 *** if (type>2) {E -= P->TerminalAU; *dG+=P->TerminalAU;}
2356 correction = E_ExtLoop(type, (i>1) ? SS1[i-1] : -1, (j<n4) ? SS2[j+1] : -1, P);
2358 E-=correction+2*extension_cost;
2359 if (E != P->DuplexInit+2*extension_cost) {
2360 nrerror("backtrack failed in second fold duplex");
2370 struc = (char *) space(i0-i+1+j-j0+1+2);
2371 for (k=MAX2(i,1); k<=i0; k++) if (!st1[k-1]) st1[k-1] = '.';
2372 for (k=j0; k<=j; k++) if (!st2[k-1]) st2[k-1] = '.';
2373 strcpy(struc, st1+MAX2(i-1,0));
2375 strcat(struc, st2+j0-1);
2376 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
2377 free(st1); free(st2);
2382 duplexT ** Lduplexfold(const char *s1, const char *s2, const int threshold, const int extension_cost, const int alignment_length, const int delta, const int fast, const int il_a, const int il_b, const int b_a, const int b_b)
2385 *** See variable definition in fduplexfold_XS
2389 int bext=b_a+extension_cost;
2391 int iext_s=2*(il_a+extension_cost);/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
2392 int iext_ass=50+il_a+extension_cost;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
2393 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
2395 int max_pos;/* get position of the best hit */
2400 int *position; /* contains the position of the hits with energy > E */
2403 *** 1D array corresponding to the standard 2d recursion matrix
2404 *** Makes the computation 20% faster
2408 *** variable initialization
2410 n1 = (int) strlen(s1);
2411 n2 = (int) strlen(s2);
2413 *** Sequence encoding
2415 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
2416 update_fold_params(); if(P) free(P); P = scale_parameters();
2421 *** Position of the high score on the target and query sequence
2423 position = (int *) space ((delta+n1+3+delta) * sizeof(int));
2424 position_j= (int *) space ((delta+n1+3+delta) * sizeof(int));
2426 *** instead of having 4 2-dim arrays we use a unique 1-dim array
2427 *** The mapping 2d -> 1D is done based ont the macro
2428 *** LCI(i,j,l) ((i )*l + j)
2429 *** LINI(i,j,l) ((i + 5)*l + j)
2430 *** LBXI(i,j,l) ((i + 10)*l + j)
2431 *** LBYI(i,j,l) ((i + 15)*l + j)
2432 *** LINIX(i,j,l) ((i + 20)*l + j)
2433 *** LINIY(i,j,l) ((i + 25)*l + j)
2435 *** SA has a length of 5 (number of columns we look back) *
2436 *** * 6 (number of structures we look at) *
2437 *** * length of the sequence
2439 SA=(int *) space(sizeof(int)*5*6*(n2+5));
2440 for(j=n2+4;j>=0;j--) {
2441 SA[(j*30) ]=SA[(j*30)+1 ]=SA[(j*30)+2 ]=SA[(j*30)+3 ]=SA[(j*30)+4 ]=INF;
2442 SA[(j*30)+5 ]=SA[(j*30)+1+5 ]=SA[(j*30)+2+5 ]=SA[(j*30)+3+5 ]=SA[(j*30)+4+5 ]=INF;
2443 SA[(j*30)+10]=SA[(j*30)+1+10]=SA[(j*30)+2+10]=SA[(j*30)+3+10]=SA[(j*30)+4+10]=INF;
2444 SA[(j*30)+15]=SA[(j*30)+1+15]=SA[(j*30)+2+15]=SA[(j*30)+3+15]=SA[(j*30)+4+15]=INF;
2445 SA[(j*30)+20]=SA[(j*30)+1+20]=SA[(j*30)+2+20]=SA[(j*30)+3+20]=SA[(j*30)+4+20]=INF;
2446 SA[(j*30)+25]=SA[(j*30)+1+25]=SA[(j*30)+2+25]=SA[(j*30)+3+25]=SA[(j*30)+4+25]=INF;
2450 while(i < i_length) {
2459 type = pair[S1[i]][S2[j]];
2463 SA[LCI(idx,j,n2)]=type ? P->DuplexInit + 2*extension_cost : INF;
2465 *** update lin bx by linx liny matrix
2467 type2=pair[S2[j+1]][S1[i-1]];
2469 *** start/extend interior loop
2471 SA[LINI(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->mismatchI[type2][SS2[j]][SS1[i]]+iopen+iext_s,
2472 SA[LINI(idx_1,j,n2)]+iext_ass);
2474 *** start/extend nx1 target
2475 *** use same type2 as for in
2477 SA[LINIX(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s,
2478 SA[LINIX(idx_1,j,n2)]+iext_ass);
2480 *** start/extend 1xn target
2481 *** use same type2 as for in
2483 SA[LINIY(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->mismatch1nI[type2][SS2[j]][SS1[i]]+iopen+iext_s,
2484 SA[LINIY(idx,j+1,n2)]+iext_ass);
2486 *** extend interior loop
2488 SA[LINI(idx,j,n2)]=MIN2(SA[LINI(idx,j,n2)],SA[LINI(idx,j+1,n2)]+iext_ass);
2489 SA[LINI(idx,j,n2)]=MIN2(SA[LINI(idx,j,n2)],SA[LINI(idx_1,j+1,n2)]+iext_s);
2491 *** start/extend bulge target
2493 type2=pair[S2[j]][S1[i-1]];
2494 SA[LBXI(idx,j,n2)]=MIN2(SA[LBXI(idx_1,j,n2)]+bext, SA[LCI(idx_1,j,n2)]+bopen+bext+(type2>2?P->TerminalAU:0));
2496 *** start/extend bulge query
2498 type2=pair[S2[j+1]][S1[i]];
2499 SA[LBYI(idx,j,n2)]=MIN2(SA[LBYI(idx,j+1,n2)]+bext, SA[LCI(idx,j+1,n2)]+bopen+bext+(type2>2?P->TerminalAU:0));
2501 ***end update recursion
2502 ***##################### Start stack extension ######################
2504 if(!type){continue;}
2508 SA[LCI(idx,j,n2)]+= E_ExtLoop(type, SS1[i-1] , SS2[j+1], P) + 2*extension_cost;
2512 if((type2=pair[S1[i-1]][S2[j+1]]))
2513 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+1,n2)]+P->stack[rtype[type]][type2]+2*extension_cost, SA[LCI(idx,j,n2)]);
2515 *** 1x0 / 0x1 stack extension
2517 if((type2=pair[S1[i-1]][S2[j+2]]))
2518 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_1,j+2,n2)]+P->bulge[1]+P->stack[rtype[type]][type2]+3*extension_cost,SA[LCI(idx,j,n2)]);
2519 if((type2=pair[S1[i-2]][S2[j+1]]))
2520 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_2,j+1,n2)]+P->bulge[1]+P->stack[type2][rtype[type]]+3*extension_cost,SA[LCI(idx,j,n2)]);
2522 *** 1x1 / 2x2 stack extension
2524 if((type2=pair[S1[i-2]][S2[j+2]]))
2525 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_2,j+2,n2)]+P->int11[type2][rtype[type]][SS1[i-1]][SS2[j+1]]+4*extension_cost, SA[LCI(idx,j,n2)]);
2526 if((type2 = pair[S1[i-3]][S2[j+3]]))
2527 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_3,j+3,n2)]+P->int22[type2][rtype[type]][SS1[i-2]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+6*extension_cost,SA[LCI(idx,j,n2)]);
2529 *** 1x2 / 2x1 stack extension
2530 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
2531 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
2533 if((type2 = pair[S1[i-3]][S2[j+2]]))
2534 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_3,j+2,n2)]+P->int21[rtype[type]][type2][SS2[j+1]][SS1[i-2]][SS1[i-1]]+5*extension_cost, SA[LCI(idx,j,n2)]);
2535 if((type2 = pair[S1[i-2]][S2[j+3]]))
2536 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_2,j+3,n2)]+P->int21[type2][rtype[type]][SS1[i-1]][SS2[j+1]][SS2[j+2]]+5*extension_cost, SA[LCI(idx,j,n2)]);
2538 *** 2x3 / 3x2 stack extension
2540 if((type2 = pair[S1[i-4]][S2[j+3]]))
2541 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_4,j+3,n2)]+P->internal_loop[5]+P->ninio[2]+
2542 P->mismatch23I[type2][SS1[i-3]][SS2[j+2]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+7*extension_cost, SA[LCI(idx,j,n2)]);
2543 if((type2 = pair[S1[i-3]][S2[j+4]]))
2544 SA[LCI(idx,j,n2)]=MIN2(SA[LCI(idx_3,j+4,n2)]+P->internal_loop[5]+P->ninio[2]+
2545 P->mismatch23I[type2][SS1[i-2]][SS2[j+3]]+P->mismatch23I[rtype[type]][SS2[j+1]][SS1[i-1]]+7*extension_cost, SA[LCI(idx,j,n2)]);
2547 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
2552 SA[LCI(idx,j,n2)]=MIN2(SA[LINI(idx_3,j+3,n2)]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+2*iext_s+2*extension_cost,SA[LCI(idx,j,n2)]);
2556 SA[LCI(idx,j,n2)]=MIN2(SA[LINI(idx_4,j+2,n2)]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost, SA[LCI(idx,j,n2)]);
2560 SA[LCI(idx,j,n2)]=MIN2(SA[LINI(idx_2,j+4,n2)]+P->mismatchI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_s+2*iext_ass+2*extension_cost, SA[LCI(idx,j,n2)]);
2564 SA[LCI(idx,j,n2)]=MIN2(SA[LINIX(idx_3,j+1,n2)]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost, SA[LCI(idx,j,n2)]);
2568 SA[LCI(idx,j,n2)]=MIN2(SA[LINIY(idx_1,j+3,n2)]+P->mismatch1nI[rtype[type]][SS1[i-1]][SS2[j+1]]+iext_ass+iext_ass+2*extension_cost, SA[LCI(idx,j,n2)]);
2573 bAU=(type>2?P->TerminalAU:0);
2574 SA[LCI(idx,j,n2)]=MIN2(SA[LBXI(idx_2,j+1,n2)]+2*extension_cost+bext+bAU,SA[LCI(idx,j,n2)]);
2578 SA[LCI(idx,j,n2)]=MIN2(SA[LBYI(idx_1,j+2,n2)]+2*extension_cost+bext+bAU,SA[LCI(idx,j,n2)]);
2581 min_colonne=MIN2(SA[LCI(idx,j,n2)]+E_ExtLoop(rtype[type], SS2[j-1] , SS1[i+1] , P) + 2*extension_cost, min_colonne);
2582 if(temp>min_colonne){
2586 if(max>=min_colonne){
2589 max_pos_j=min_j_colonne;
2591 position[i+delta]=min_colonne;min_colonne=INF;
2592 position_j[i+delta]=min_j_colonne;
2595 /* printf("MAX: %d",max); */
2596 free(S1); free(S2); free(SS1); free(SS2);
2598 find_max(position, position_j, delta, threshold, alignment_length, s1, s2, extension_cost, fast, il_a, il_b, b_a, b_b);
2601 plot_max(max, max_pos, max_pos_j,alignment_length, s1, s2, extension_cost,fast, il_a, il_b, b_a, b_b);
2612 PRIVATE void find_max(const int *position, const int *position_j,const int delta, const int threshold, const int alignment_length, const char *s1, const char *s2, const int extension_cost, const int fast,const int il_a, const int il_b, const int b_a, const int b_b){
2617 if(position[pos+delta]<(threshold)){
2619 search_range=delta+1;
2620 while(--search_range){
2621 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
2622 temp_min=search_range;
2627 max_pos_j=position_j[pos+delta];
2629 max=position[pos+delta];
2630 printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100);
2631 pos=MAX2(10,pos-alignment_length);
2639 if(position[pos+delta]<(threshold)){
2641 search_range=delta+1;
2642 while(--search_range){
2643 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
2644 temp_min=search_range;
2649 max_pos_j=position_j[pos+delta];
2650 /* max_pos_j und pos entsprechen die realen position
2651 in der erweiterten sequenz.
2652 pos=1 -> position 1 in the sequence (and not 0 like in C)
2653 max_pos_j -> position 1 in the sequence ( not 0 like in C)
2655 int begin_t=MAX2(11, pos-alignment_length+1);/* 10 */
2656 int end_t =MIN2(n1-10, pos+1);
2657 int begin_q=MAX2(11, max_pos_j-1); /* 10 */
2658 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
2659 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2 + 20));
2660 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2 + 20));
2661 strcpy(s3,"NNNNNNNNNN");strcpy(s4,"NNNNNNNNNN");
2662 strncat(s3, (s1+begin_t-1), end_t - begin_t +1);
2663 strncat(s4, (s2+begin_q-1) , end_q - begin_q +1);
2664 strcat(s3,"NNNNNNNNNN");strcat(s4,"NNNNNNNNNN");
2665 s3[end_t -begin_t +1 +20 ]='\0';
2666 s4[end_q -begin_q +1 +20]='\0';
2668 test = fduplexfold(s3, s4, extension_cost,il_a, il_b, b_a, b_b);
2669 if(test.energy * 100 < threshold){
2670 int l1=strchr(test.structure, '&')-test.structure;
2671 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) [%5.2f]\n", test.structure,
2672 begin_t-10+test.i-l1-10,
2673 begin_t-10+test.i-1-10,
2674 begin_q-10 + test.j-1-10 ,
2675 (begin_q -11) + test.j + strlen(test.structure)-l1-2-10,
2676 test.energy,test.energy_backtrack);
2677 pos=MAX2(10,pos-delta);
2680 free(test.structure);
2688 if(position[pos+delta]<(threshold)){
2690 search_range=delta+1;
2691 while(--search_range){
2692 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
2693 temp_min=search_range;
2698 max_pos_j=position_j[pos+delta];
2699 /* max_pos_j und pos entsprechen die realen position
2700 in der erweiterten sequenz.
2701 pos=1 -> position 1 in the sequence (and not 0 like in C)
2702 max_pos_j -> position 1 in the sequence ( not 0 like in C)
2704 int begin_t=MAX2(11, pos-alignment_length+1);/* 10 */
2705 int end_t =MIN2(n1-10, pos+1);
2706 int begin_q=MAX2(11, max_pos_j-1); /* 10 */
2707 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
2708 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
2709 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
2710 strncpy(s3, (s1+begin_t-1), end_t - begin_t +1);
2711 strncpy(s4, (s2+begin_q-1) , end_q - begin_q +1);
2712 s3[end_t -begin_t +1 ]='\0';
2713 s4[end_q -begin_q +1 ]='\0';
2715 test = duplexfold(s3, s4, extension_cost);
2716 if(test.energy * 100 < threshold){
2717 int l1=strchr(test.structure, '&')-test.structure;
2718 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
2719 begin_t-10+test.i-l1,
2720 begin_t-10+test.i-1,
2721 begin_q-10 + test.j-1 ,
2722 (begin_q -11) + test.j + strlen(test.structure)-l1-2,
2724 pos=MAX2(10,pos-delta);
2727 free(test.structure);
2732 PRIVATE void plot_max(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1, const char *s2, const int extension_cost, const int fast,const int il_a, const int il_b, const int b_a, const int b_b)
2735 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos-10, max_pos_j-10, ((double)max)/100);
2738 int begin_t=MAX2(11, max_pos-alignment_length+1);/* 10 */
2739 int end_t =MIN2(n1-10, max_pos+1);
2740 int begin_q=MAX2(11, max_pos_j-1); /* 10 */
2741 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
2742 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2 + 20));
2743 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2 + 20));
2744 strcpy(s3,"NNNNNNNNNN");strcpy(s4,"NNNNNNNNNN");
2745 strncat(s3, (s1+begin_t-1), end_t - begin_t +1);
2746 strncat(s4, (s2+begin_q-1) , end_q - begin_q +1);
2747 strcat(s3,"NNNNNNNNNN");strcat(s4,"NNNNNNNNNN");
2748 s3[end_t -begin_t +1 +20 ]='\0';
2749 s4[end_q -begin_q +1 +20]='\0';
2751 test = fduplexfold(s3, s4, extension_cost,il_a, il_b, b_a, b_b);
2752 int l1=strchr(test.structure, '&')-test.structure;
2753 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) [%5.2f]\n", test.structure,
2754 begin_t-10+test.i-l1-10,
2755 begin_t-10+test.i-1-10,
2756 begin_q-10 + test.j-1-10 ,
2757 (begin_q -11) + test.j + strlen(test.structure)-l1-2-10,
2758 test.energy, test.energy_backtrack);
2759 free(s3);free(s4);free(test.structure);
2763 int begin_t=MAX2(11, max_pos-alignment_length+1);
2764 int end_t =MIN2(n1-10, max_pos+1);
2765 int begin_q=MAX2(11, max_pos_j-1);
2766 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
2767 char *s3 = (char*) space(sizeof(char)*(end_t - begin_t +2));
2768 char *s4 = (char*) space(sizeof(char)*(end_q - begin_q +2));
2769 strncpy(s3, (s1+begin_t-1), end_t - begin_t + 1);
2770 strncpy(s4, (s2+begin_q-1) , end_q - begin_q +1 );
2771 s3[end_t -begin_t +1 ]='\0';
2772 s4[end_q -begin_q +1 ]='\0';
2773 test = duplexfold(s3, s4, extension_cost);
2774 int l1=strchr(test.structure, '&')-test.structure;
2775 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
2776 begin_t-10+test.i-l1,
2777 begin_t-10+test.i-1,
2778 begin_q-10 +test.j-1 ,
2779 (begin_q -11) + test.j + strlen(test.structure)-l1-2,
2781 free(s3);free(s4);free(test.structure);
2786 PRIVATE void update_dfold_params(void)
2789 P = scale_parameters();
2793 PRIVATE void encode_seqs(const char *s1, const char *s2) {
2797 S1 = encode_seq(s1);
2798 SS1= (short *) space(sizeof(short)*(l+1));
2799 /* SS1 exists only for the special X K and I bases and energy_set!=0 */
2801 for (i=1; i<=l; i++) { /* make numerical encoding of sequence */
2802 SS1[i] = alias[S1[i]]; /* for mismatches of nostandard bases */
2806 S2 = encode_seq(s2);
2807 SS2= (short *) space(sizeof(short)*(l+1));
2808 /* SS2 exists only for the special X K and I bases and energy_set!=0 */
2810 for (i=1; i<=l; i++) { /* make numerical encoding of sequence */
2811 SS2[i] = alias[S2[i]]; /* for mismatches of nostandard bases */
2815 PRIVATE short * encode_seq(const char *sequence) {
2818 l = strlen(sequence);
2819 S = (short *) space(sizeof(short)*(l+2));
2822 /* make numerical encoding of sequence */
2823 for (i=1; i<=l; i++)
2824 S[i]= (short) encode_char(toupper(sequence[i-1]));
2826 /* for circular folding add first base at position n+1 */
2832 int arraySize(duplexT** array)
2835 while(array[site_count]!=NULL){
2841 void freeDuplexT(duplexT** array)
2843 int size=arraySize(array);
2845 free(array[size]->structure);
2848 free(array[0]->structure);