1 /* Last changed Time-stamp: <2007-10-30 14:06:22 htafer> */
3 compute the duplex structure of two RNA strands,
4 allowing only inter-strand base pairs.
5 see cofold() for computing hybrid structures without
14 library containing the function used in rnaplex
15 the program rnaplex uses the following function
16 Lduplexfold: finds high scoring segments
17 it stores the end-position of these segments in an array
18 and call then for each of these positions the duplexfold function
19 which allows to make backtracking for each of the high scoring position
20 It allows to find suboptimal partially overlapping (depends on a a parameter)
21 duplexes between a long RNA and a shorter one.
22 Contrarly to RNAduplex, the energy model is not in E~log(N),
23 where N is the length of an interial loop but used an affine model,
24 where the extension and begin parameter are fitted to the energy
25 parameter used by RNAduplex. This allows to check for duplex between a short RNA(20nt)
26 and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
28 The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
29 given threshold this value is stored in an array. When the alignment score goes
30 then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
31 to find all non-overlapping high-scoring segments.
32 For more information check "durbin, biological sequence analysis"
42 #include "energy_par.h"
43 #include "fold_vars.h"
47 #include "loop_energies.h"
52 static char rcsid[] UNUSED = "$Id: plex.c,v 1.14 2007/06/12 12:50:16 htafer Exp $";
56 #define PRIVATE static
58 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
59 #define NEW_NINIO 1 /* new asymetry penalty */
60 #define ARRAY 32 /*array size*/
62 #define MINPSCORE -2 * UNIT
64 *** Due to the great similarity between functions,
65 *** more annotation can be found in plex.c
68 PRIVATE short *encode_seq(const char *seq);
69 PRIVATE void update_dfold_params(void);
71 *** aliduplexfold(_XS)/alibacktrack(_XS) computes duplex interaction with standard energy and considers extension_cost
72 *** alifind_max(_XS)/aliplot_max(_XS) find suboptimals and MFE
74 PRIVATE duplexT aliduplexfold(const char *s1[], const char *s2[], const int extension_cost);
75 PRIVATE char * alibacktrack(int i, int j, const short *s1[], const short *s2[], const int extension_cost);
76 PRIVATE void alifind_max(const int *position, const int *position_j,const int delta, const int threshold,
77 const int alignment_length, const char *s1[], const char *s2[], const int extension_cost, const int fast);
78 PRIVATE void aliplot_max(const int max, const int max_pos, const int max_pos_j,
79 const int alignment_length, const char *s1[], const char *s2[], const int extension_cost, const int fast);
80 PRIVATE duplexT aliduplexfold_XS(const char *s1[], const char *s2[],const int **access_s1,
81 const int **access_s2, const int i_pos, const int j_pos, const int threshold,const int i_flag, const int j_flag);
82 PRIVATE char * alibacktrack_XS(int i, int j, const short *s1[], const short *s2[], const int** access_s1, const int** access_s2,const int i_flag, const int j_flag);
83 PRIVATE void alifind_max_XS(const int *position, const int *position_j,
84 const int delta, const int threshold, const int alignment_length,
85 const char* s1[], const char* s2[],
86 const int **access_s1, const int **access_s2, const int fast);
87 PRIVATE void aliplot_max_XS(const int max, const int max_pos, const int max_pos_j,
88 const int alignment_length, const char *s1[], const char* s2[],
89 const int **access_s1, const int **access_s2, const int fast);
92 *** computes covariance score
95 PRIVATE int covscore(const int *types, int n_seq);
97 extern double cv_fact; /* from alifold.c, default 1 */
98 extern double nc_fact;
103 #define MAXSECTORS 500 /* dimension for a backtrack array */
104 #define LOCALITY 0. /* locality parameter for base-pairs */
106 PRIVATE paramT *P = NULL;
107 PRIVATE int **c = NULL;
108 PRIVATE int **lc = NULL, **lin = NULL, **lbx = NULL, **lby = NULL,**linx = NULL, **liny = NULL;
115 PRIVATE int delay_free=0;
118 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
122 /*----------------------------------------------ALIDUPLEXFOLD-----------------------------------------------------------------------------------------------------------*/
123 PRIVATE duplexT aliduplexfold(const char *s1[], const char *s2[], const int extension_cost) {
124 int i, j, s, n_seq, l1, Emin=INF, i_min=0, j_min=0;
129 n3 = (int) strlen(s1[0]);
130 n4 = (int) strlen(s2[0]);
131 for (s=0; s1[s]!=NULL; s++);
133 for (s=0; s2[s]!=NULL; s++);
134 if (n_seq != s) nrerror("unequal number of sequences in aliduplexfold()\n");
136 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
137 update_fold_params(); if(P) free(P); P = scale_parameters();
141 c = (int **) space(sizeof(int *) * (n3+1));
142 for (i=1; i<=n3; i++) c[i] = (int *) space(sizeof(int) * (n4+1));
144 S1 = (short **) space((n_seq+1)*sizeof(short *));
145 S2 = (short **) space((n_seq+1)*sizeof(short *));
146 for (s=0; s<n_seq; s++) {
147 if (strlen(s1[s]) != n3) nrerror("uneqal seqence lengths");
148 if (strlen(s2[s]) != n4) nrerror("uneqal seqence lengths");
149 S1[s] = encode_seq(s1[s]);
150 S2[s] = encode_seq(s2[s]);
152 type = (int *) space(n_seq*sizeof(int));
154 for (i=1; i<=n3; i++) {
155 for (j=n4; j>0; j--) {
157 for (s=0; s<n_seq; s++) {
158 type[s] = pair[S1[s][i]][S2[s][j]];
160 psc = covscore(type, n_seq);
161 for (s=0; s<n_seq; s++) if (type[s]==0) type[s]=7;
162 c[i][j] = (psc>=MINPSCORE) ? (n_seq*(P->DuplexInit + 2*extension_cost)) : INF;
163 if (psc<MINPSCORE) continue;
164 for (s=0; s<n_seq; s++) {
165 c[i][j] += E_ExtLoop(type[s], (i>1) ? S1[s][i-1] : -1, (j<n4) ? S2[s][j+1] : -1, P) + 2*extension_cost;
167 for (k=i-1; k>0 && k>i-MAXLOOP-2; k--) {
168 for (l=j+1; l<=n4; l++) {
170 if (i-k+l-j-2>MAXLOOP) break;
171 if (c[k][l]>INF/2) continue;
172 for (E=s=0; s<n_seq; s++) {
173 type2 = pair[S1[s][k]][S2[s][l]];
174 if (type2==0) type2=7;
175 E += E_IntLoop(i-k-1, l-j-1, type2, rtype[type[s]],
176 S1[s][k+1], S2[s][l-1], S1[s][i-1], S2[s][j+1],P) + (i-k+l-j)*extension_cost;
178 c[i][j] = MIN2(c[i][j], c[k][l]+E);
183 for (s=0; s<n_seq; s++) {
184 E += E_ExtLoop(rtype[type[s]], (j>1) ? S2[s][j-1] : -1, (i<n3) ? S1[s][i+1] : -1, P) +2*extension_cost;
187 Emin=E; i_min=i; j_min=j;
191 struc = alibacktrack(i_min, j_min, (const short int**) S1, (const short int**) S2 , extension_cost);
192 if (i_min<n3) i_min++;
193 if (j_min>1 ) j_min--;
194 l1 = strchr(struc, '&')-struc;
196 size=strlen(struc)-1;
197 Emin-=size * n_seq * extension_cost;
200 mfe.energy = (float) (Emin/(100.*n_seq));
201 mfe.structure = struc;
203 for (i=1; i<=n3; i++) free(c[i]);
206 for (s=0; s<n_seq; s++) {
207 free(S1[s]); free(S2[s]);
209 free(S1); free(S2); free(type);
214 PRIVATE char *alibacktrack(int i, int j, const short *S1[], const short *S2[], const int extension_cost) {
215 /* backtrack structure going backwards from i, and forwards from j
216 return structure in bracket notation with & as separator */
217 int k, l, *type, type2, E, traced, i0, j0, s, n_seq;
218 char *st1, *st2, *struc;
223 for (s=0; S1[s]!=NULL; s++);
225 for (s=0; S2[s]!=NULL; s++);
226 if (n_seq != s) nrerror("unequal number of sequences in alibacktrack()\n");
228 st1 = (char *) space(sizeof(char)*(n3+1));
229 st2 = (char *) space(sizeof(char)*(n4+1));
230 type = (int *) space(n_seq*sizeof(int));
232 i0=MIN2(i+1,n3); j0=MAX2(j-1,1);
234 while (i>0 && j<=n4) {
236 E = c[i][j]; traced=0;
239 for (s=0; s<n_seq; s++) {
240 type[s] = pair[S1[s][i]][S2[s][j]];
242 psc = covscore(type, n_seq);
243 for (s=0; s<n_seq; s++) if (type[s]==0) type[s] = 7;
245 for (k=i-1; k>0 && k>i-MAXLOOP-2; k--) {
246 for (l=j+1; l<=n4; l++) {
248 if (i-k+l-j-2>MAXLOOP) break;
249 if (c[k][l]>INF/2) continue;
250 for (s=LE=0; s<n_seq; s++) {
251 type2 = pair[S1[s][k]][S2[s][l]];
252 if (type2==0) type2=7;
253 LE += E_IntLoop(i-k-1, l-j-1, type2, rtype[type[s]],
254 S1[s][k+1], S2[s][l-1], S1[s][i-1], S2[s][j+1],P)+(i-k+l-j)*extension_cost;
256 if (E == c[k][l]+LE) {
265 for (s=0; s<n_seq; s++) {
266 E -= E_ExtLoop(type[s], (i>1) ? S1[s][i-1] : -1, (j<n4) ? S2[s][j+1] : -1, P) + 2*extension_cost;
268 if (E != n_seq*P->DuplexInit + n_seq*2*extension_cost) {
269 nrerror("backtrack failed in aliduplex");
276 struc = (char *) space(i0-i+1+j-j0+1+2);
277 for (k=MAX2(i,1); k<=i0; k++) if (!st1[k-1]) st1[k-1] = '.';
278 for (k=j0; k<=j; k++) if (!st2[k-1]) st2[k-1] = '.';
279 strcpy(struc, st1+MAX2(i-1,0)); strcat(struc, "&");
280 strcat(struc, st2+j0-1);
282 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
283 free(st1); free(st2); free(type);
288 duplexT** aliLduplexfold(const char *s1[], const char *s2[], const int threshold, const int extension_cost, const int alignment_length, const int delta, const int fast,const int il_a, const int il_b, const int b_a, const int b_b)
295 int bext=b_a+extension_cost;
297 int iext_s=2*(il_a+extension_cost);/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
298 int iext_ass=50+il_a+extension_cost;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
299 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
301 int max_pos;/* get position of the best hit */
306 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
307 int *position; /* contains the position of the hits with energy > E */
311 n1 = (int) strlen(s1[0]);
312 n2 = (int) strlen(s2[0]);
313 for (s=0; s1[s]; s++);
315 for (s=0; s2[s]; s++);
316 if (n_seq != s) nrerror("unequal number of sequences in aliduplexfold()\n");
318 position = (int *) space ((delta+(n1)+4+delta) * sizeof(int));
319 position_j= (int *) space ((delta+(n1)+4+delta) * sizeof(int));
321 if ((!P) || (fabs(P->temperature - temperature)>1e-6)){
322 update_dfold_params();
325 lc = (int**) space (sizeof(int *) * 5);
326 lin = (int**) space (sizeof(int *) * 5);
327 lbx = (int**) space (sizeof(int *) * 5);
328 lby = (int**) space (sizeof(int *) * 5);
329 linx = (int**) space (sizeof(int *) * 5);
330 liny = (int**) space (sizeof(int *) * 5);
332 for (i=0; i<=4; i++){
333 lc[i] = (int *) space(sizeof(int) * (n2+5));
334 lin[i] = (int *) space(sizeof(int) * (n2+5));
335 lbx[i] = (int *) space(sizeof(int) * (n2+5));
336 lby[i] = (int *) space(sizeof(int) * (n2+5));
337 linx[i]= (int *) space(sizeof(int) * (n2+5));
338 liny[i]= (int *) space(sizeof(int) * (n2+5));
342 S1 = (short **) space((n_seq+1)*sizeof(short *));
343 S2 = (short **) space((n_seq+1)*sizeof(short *));
344 for (s=0; s<n_seq; s++) {
345 if (strlen(s1[s]) != n1) nrerror("uneqal seqence lengths");
346 if (strlen(s2[s]) != n2) nrerror("uneqal seqence lengths");
347 S1[s] = encode_seq(s1[s]);
348 S2[s] = encode_seq(s2[s]);
350 type = (int *) space(n_seq*sizeof(int));
352 *** array initialization
355 lbx[0][j]=lbx[1][j]=lbx[2][j]=lbx[3][j] = lbx[4][j] =INF;
356 lin[0][j]=lin[1][j]=lin[2][j]=lin[3][j] = lin[4][j] =INF;
357 lc[0][j] =lc[1][j] =lc[2][j] = lc[3][j] = lc[4][j] =INF;
358 lby[0][j]=lby[1][j]=lby[2][j]=lby[3][j] = lby[4][j] =INF;
359 liny[0][j]=liny[1][j]=liny[2][j]=liny[3][j]=liny[4][j]=INF;
360 linx[0][j]=linx[1][j]=linx[2][j]=linx[3][j]=linx[4][j]=INF;
364 while(i < i_length) {
373 for (s=0; s<n_seq; s++) {
374 type[s] = pair[S1[s][i]][S2[s][j]];
376 psc = covscore(type, n_seq);
377 for (s=0; s<n_seq; s++) if (type[s]==0) type[s]=7;
378 lc[idx][j] = (psc>=MINPSCORE) ? (n_seq*P->DuplexInit + 2*n_seq*extension_cost) : INF;
380 *** Update matrix. It is the average over all sequence of a given structure element
381 *** c_stack -> stacking of c
382 *** c_10, c01 -> stack from bulge
383 *** c_nm -> arrives in stack from nxm loop
384 *** c_in -> arrives in stack from interior loop
385 *** c_bx -> arrives in stack from large bulge on target
386 *** c_by -> arrives in stack from large bulge on query
389 int c_stack, c_10, c_01, c_11, c_22, c_21, c_12, c_23, c_32, c_in, c_in2x, c_in2y, c_bx, c_by, c_inx, c_iny; /* matrix c */
390 int in, in_x, in_y, in_xy; /* in begin, in_x assymetric, in_y assymetric, in_xy symetric; */
395 in=lc[idx_1][j+1]; in_x=lin[idx_1][j]; in_y=lin[idx][j+1]; in_xy=lin[idx_1][j+1];
396 inx=lc[idx_1][j+1]; inx_x=linx[idx_1][j];
397 iny=lc[idx_1][j+1]; iny_y=liny[idx][j+1];
398 bx=lc[idx_1][j]; bx_x=lbx[idx_1][j];
399 by=lc[idx][j+1]; by_y=lby[idx][j+1];
400 c_stack=lc[idx_1][j+1]; c_01=lc[idx_1][j+2];c_10=lc[idx_2][j+1];
401 c_12=lc[idx_2][j+3];c_21=lc[idx_3][j+2];c_11=lc[idx_2][j+2];
402 c_22=lc[idx_3][j+3];c_32=lc[idx_4][j+3];c_23=lc[idx_3][j+4];
403 c_in=lin[idx_3][j+3];c_in2x=lin[idx_4][j+2];c_in2y=lin[idx_2][j+4];
404 c_inx=linx[idx_3][j+1]; c_iny=liny[idx_1][j+3];
405 c_bx=lbx[idx_2][j+1];c_by=lby[idx_1][j+2];
406 for (s=0; s<n_seq; s++) {
407 type2 = pair[S2[s][j+1]][S1[s][i-1]];
408 in +=P->mismatchI[type2][S2[s][j]][S1[s][i]]+iopen+iext_s;
412 inx +=P->mismatch1nI[type2][S2[s][j]][S1[s][i]]+iopen+iext_s;
414 iny +=P->mismatch1nI[type2][S2[s][j]][S1[s][i]]+iopen+iext_s;
416 type2=pair[S2[s][j]][S1[s][i-1]];
417 bx +=bopen+bext+(type2>2?P->TerminalAU:0);
419 type2=pair[S2[s][j+1]][S1[s][i]];
420 by +=bopen+bext+(type2>2?P->TerminalAU:0);
423 lin [idx][j]=MIN2(in, MIN2(in_x, MIN2(in_y, in_xy)));
424 linx[idx][j]=MIN2(inx_x, inx);
425 liny[idx][j]=MIN2(iny_y, iny);
426 lby[idx][j] =MIN2(by, by_y);
427 lbx[idx][j] =MIN2(bx, bx_x);
429 if (psc<MINPSCORE) continue;
430 for (s=0; s<n_seq; s++) {
431 lc[idx][j]+=E_ExtLoop(type[s], S1[s][i-1],S2[s][j+1], P) + 2*extension_cost;
433 for (s=0; s<n_seq; s++) {
434 type2=pair[S1[s][i-1]][S2[s][j+1]];if (type2==0) type2=7;
435 c_stack+=E_IntLoop(0,0,type2, rtype[type[s]],S1[s][i], S2[s][j], S1[s][i-1], S2[s][j+1], P)+2*extension_cost;
436 type2=pair[S1[s][i-1]][S2[s][j+2]];if (type2==0) type2=7;
437 c_01 +=E_IntLoop(0,1,type2, rtype[type[s]],S1[s][i], S2[s][j+1], S1[s][i-1], S2[s][j+1], P)+3*extension_cost;
438 type2=pair[S1[s][i-2]][S2[s][j+1]]; if (type2==0) type2=7;
439 c_10 +=E_IntLoop(1,0,type2, rtype[type[s]],S1[s][i-1], S2[s][j], S1[s][i-1], S2[s][j+1], P)+3*extension_cost;
440 type2=pair[S1[s][i-2]][S2[s][j+2]]; if (type2==0) type2=7;
441 c_11 +=E_IntLoop(1,1,type2, rtype[type[s]],S1[s][i-1], S2[s][j+1], S1[s][i-1], S2[s][j+1], P)+4*extension_cost;
442 type2 = pair[S1[s][i-3]][S2[s][j+3]];if (type2==0) type2=7;
443 c_22 +=E_IntLoop(2,2,type2, rtype[type[s]],S1[s][i-2], S2[s][j+2], S1[s][i-1], S2[s][j+1], P)+6*extension_cost;
444 type2 = pair[S1[s][i-3]][S2[s][j+2]];if (type2==0) type2=7;
445 c_21 +=E_IntLoop(2,1,type2, rtype[type[s]],S1[s][i-2], S2[s][j+1], S1[s][i-1], S2[s][j+1], P)+5*extension_cost;
446 type2 = pair[S1[s][i-2]][S2[s][j+3]];if (type2==0) type2=7;
447 c_12 +=E_IntLoop(1,2,type2, rtype[type[s]],S1[s][i-1], S2[s][j+2], S1[s][i-1], S2[s][j+1], P)+5*extension_cost;
448 type2 = pair[S1[s][i-4]][S2[s][j+3]];if (type2==0) type2=7;
449 c_32 +=E_IntLoop(3,2,type2, rtype[type[s]],S1[s][i-3], S2[s][j+2], S1[s][i-1], S2[s][j+1], P)+7*extension_cost;
450 type2 = pair[S1[s][i-3]][S2[s][j+4]];if (type2==0) type2=7;
451 c_23 +=E_IntLoop(2,3,type2, rtype[type[s]],S1[s][i-2], S2[s][j+3], S1[s][i-1], S2[s][j+1], P)+7*extension_cost;
452 c_in +=P->mismatchI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+2*extension_cost+2*iext_s;
453 c_in2x +=P->mismatchI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_s+2*iext_ass+2*extension_cost;
454 c_in2y +=P->mismatchI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_s+2*iext_ass+2*extension_cost;
455 c_inx +=P->mismatch1nI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_ass+iext_ass+2*extension_cost;
456 c_iny +=P->mismatch1nI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_ass+iext_ass+2*extension_cost;
458 bAU=(type[s]>2?P->TerminalAU:0);
459 c_bx +=2*extension_cost+bext+bAU;
460 c_by +=2*extension_cost+bext+bAU;
462 lc[idx][j] =MIN2(lc[idx][j],
495 for (s=0; s<n_seq; s++) {
496 temp+=E_ExtLoop(rtype[type[s]], S2[s][j-1],S1[s][i+1],P)+2*extension_cost;
498 if(min_colonne > temp){
503 if(max>=min_colonne){
506 max_pos_j=min_j_colonne;
508 position[i+delta]=min_colonne;min_colonne=INF;
509 position_j[i+delta]=min_j_colonne;
512 /* printf("MAX:%d ",max); */
513 for (s=0; s<n_seq; s++) {free(S1[s]);free(S2[s]);}
516 alifind_max(position, position_j, delta, threshold, alignment_length, s1, s2, extension_cost, fast);
518 aliplot_max(max, max_pos, max_pos_j,alignment_length, s1, s2, extension_cost,fast);
519 for (i=0; i<=4; i++) {free(lc[i]);free(lin[i]);free(lbx[i]);free(lby[i]);free(linx[i]);free(liny[i]);}
520 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]);free(linx[0]);free(liny[0]); */
521 free(lc);free(lin);free(lbx);free(lby);free(linx);free(liny);
529 PRIVATE void alifind_max(const int *position, const int *position_j,
530 const int delta, const int threshold, const int alignment_length,
531 const char *s1[], const char *s2[],
532 const int extension_cost, const int fast){
534 for (n_seq=0; s1[n_seq]!=NULL; n_seq++);
539 if(position[pos+delta]<(threshold)){
541 search_range=delta+1;
542 while(--search_range){
543 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
544 temp_min=search_range;
549 max_pos_j=position_j[pos+delta];
551 max=position[pos+delta];
552 printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/(n_seq*100));
553 pos=MAX2(10,pos-delta);
560 /* printf("delta %d position:%d value:%d\n", delta, pos, position[pos]); */
562 if(position[pos+delta]<(threshold)){
564 search_range=delta+1;
565 while(--search_range){
566 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
567 temp_min=search_range;
572 max_pos_j=position_j[pos+delta];
573 /* printf("%d %d %d\n", pos, max_pos_j,position[pos+delta]); */
574 int begin_t=MAX2(11, pos-alignment_length+1);
575 int end_t =MIN2(n1-10, pos+1);
576 int begin_q=MAX2(11, max_pos_j-1);
577 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
579 s3 = (char**) space(sizeof(char*)*(n_seq+1));
580 s4 = (char**) space(sizeof(char*)*(n_seq+1));
582 for(i=0; i<n_seq; i++){
583 s3[i] = (char*) space(sizeof(char)*(end_t-begin_t+2));
584 s4[i] = (char*) space(sizeof(char)*(end_q-begin_q+2));
585 strncpy(s3[i], (s1[i]+begin_t-1), end_t - begin_t +1);
586 strncpy(s4[i], (s2[i]+begin_q-1), end_q - begin_q +1);
587 s3[i][end_t - begin_t +1]='\0';
588 s4[i][end_q - begin_q +1]='\0';
591 test = aliduplexfold((const char**)s3, (const char**)s4, extension_cost);
592 /* printf("test %d threshold %d",test.energy*100,(threshold/n_seq)); */
593 if(test.energy * 100 < (int) (threshold/n_seq)){
594 int l1=strchr(test.structure, '&')-test.structure;
595 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
596 begin_t -10 +test.i-l1,
597 begin_t -10 +test.i-1,
598 begin_q -10 + test.j - 1,
599 begin_q-11 + test.j +strlen(test.structure) -l1 -2 , test.energy);
600 pos=MAX2(10,pos-delta);
603 for(i=0;i<n_seq;i++){
604 free(s3[i]);free(s4[i]);
607 free(test.structure);
612 PRIVATE void aliplot_max(const int max, const int max_pos, const int max_pos_j, const int alignment_length, const char *s1[], const char *s2[], const int extension_cost, const int fast)
615 for (n_seq=0; !(s1[n_seq]==NULL); n_seq++);
616 n1 = strlen(s1[0]); /* get length of alignment */
617 n2 = strlen(s2[0]); /* get length of alignment */
619 printf("target upper bound %d: query lower bound %d (%5.2f)\n",
620 max_pos-10, max_pos_j-10, (double) ((double)max)/(100*n_seq));
623 int begin_t=MAX2(11, max_pos-alignment_length+1);
624 int end_t =MIN2(n1-10, max_pos+1);
625 int begin_q=MAX2(11, max_pos_j-1);
626 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
628 s3 = (char**) space(sizeof(char*)*(n_seq+1));
629 s4 = (char**) space(sizeof(char*)*(n_seq+1));
631 for(i=0; i<n_seq; i++){
632 s3[i] = (char*) space(sizeof(char)*(end_t-begin_t+2));
633 s4[i] = (char*) space(sizeof(char)*(end_q-begin_q+2));
634 strncpy(s3[i], (s1[i]+begin_t-1), end_t - begin_t +1);
635 strncpy(s4[i], (s2[i]+begin_q-1), end_q - begin_q +1);
636 s3[i][end_t - begin_t +1]='\0';
637 s4[i][end_q - begin_q +1]='\0';
640 s3[n_seq]=s4[n_seq]=NULL;
641 test = aliduplexfold((const char**) s3,(const char**) s4, extension_cost);
642 int l1=strchr(test.structure, '&')-test.structure;
643 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n",
645 begin_t -10 +test.i-l1,
646 begin_t -10 +test.i-1,
647 begin_q-10 + test.j - 1,
648 begin_q -11 + test.j +strlen(test.structure) - l1 - 2,
650 for(i=0; i<n_seq ; i++){
651 free(s3[i]);free(s4[i]);
654 free(test.structure);
658 PRIVATE duplexT aliduplexfold_XS(const char *s1[], const char *s2[],
659 const int **access_s1, const int **access_s2,
660 const int i_pos, const int j_pos, const int threshold,
661 const int i_flag, const int j_flag){
662 int i,j,s,p,q, Emin=INF, l_min=0, k_min=0;
669 n3 = (int) strlen(s1[0]);
670 n4 = (int) strlen(s2[0]);
671 for (s=0; s1[s]!=NULL; s++);
673 for (s=0; s2[s]!=NULL; s++);
674 /* printf("%d \n",i_pos); */
675 if ((!P) || (fabs(P->temperature - temperature)>1e-6)) {
676 update_fold_params(); if(P) free(P); P = scale_parameters();
679 c = (int **) space(sizeof(int *) * (n3+1));
680 for (i=0; i<=n3; i++) c[i] = (int *) space(sizeof(int) * (n4+1));
681 for (i=0; i<=n3; i++){
686 S1 = (short **) space((n_seq+1)*sizeof(short *));
687 S2 = (short **) space((n_seq+1)*sizeof(short *));
688 for (s=0; s<n_seq; s++) {
689 if (strlen(s1[s]) != n3) nrerror("uneqal seqence lengths");
690 if (strlen(s2[s]) != n4) nrerror("uneqal seqence lengths");
691 S1[s] = encode_seq(s1[s]);
692 S2[s] = encode_seq(s2[s]);
694 type = (int *) space(n_seq*sizeof(int));
695 type2 = (int *) space(n_seq*sizeof(int));
697 i=n3-i_flag; j=1+j_flag;
698 for (s=0; s<n_seq; s++) {
699 type[s] = pair[S1[s][i]][S2[s][j]];
701 c[i][j] = n_seq*P->DuplexInit - covscore(type,n_seq);
702 for (s=0; s<n_seq; s++) if (type[s]==0) type[s]=7;
703 for (s=0; s<n_seq; s++) {
704 c[i][j]+=E_ExtLoop(rtype[type[s]], (j_flag ? S2[s][j-1] : -1) , (i_flag ? S1[s][i+1] : -1), P);
706 k_min=i; l_min=j; Emin=c[i][j];
707 for (k=i; k>1 ; k--) {
708 if(k<i) c[k+1][0]=INF;
709 for (l=j; l<=n4-1; l++) {
714 for(s=0;s<n_seq;s++){
715 type2[s] = pair[S1[s][k]][S2[s][l]];
717 psc2=covscore(type2, n_seq);
718 if (psc2<MINPSCORE) continue;
719 for (s=0; s<n_seq; s++) if (type2[s]==0) type2[s]=7;
720 for (p=k+1; p<= n3 -i_flag && p<k+MAXLOOP-1; p++) {
721 for (q = l-1; q >= 1+j_flag; q--) {
722 if (p-k+l-q-2>MAXLOOP) break;
723 for(E=s=0;s<n_seq;s++){
724 type3=pair[S1[s][p]][S2[s][q]];
725 if(type3==0) type3=7;
726 E += E_IntLoop(p-k-1, l-q-1, type2[s], rtype[type3],
727 S1[s][k+1], S2[s][l-1], S1[s][p-1], S2[s][q+1],P);
729 c[k][l] = MIN2(c[k][l], c[p][q]+E);
734 E+=n_seq*(access_s1[i-k+1][i_pos]+access_s2[l-1][j_pos+(l-1)-1]);
735 for (s=0; s<n_seq; s++) {
736 E+=E_ExtLoop(type2[s], (k>1) ? S1[s][k-1] : -1, (l<n4) ? S2[s][l+1] : -1, P);
739 Emin=E; k_min=k; l_min=l;
743 if(Emin > threshold-1){
746 for (i=0; i<=n3; i++) free(c[i]);
748 for(i=0; i<=n_seq;i++){
752 free(S1); free(S2); /* free(SS1); free(SS2); */
753 free(type);free(type2);
756 struc = alibacktrack_XS(k_min, l_min,(const short int**)S1,(const short int**)S2,access_s1, access_s2,i_flag,j_flag);
758 int dx_5, dx_3, dy_5, dy_3,dGx,dGy,bonus_x, bonus_y,temp_dangle;
759 dx_5=0; dx_3=0; dy_5=0; dy_3=0;dGx=0;dGy=0;bonus_x=0; bonus_y=0;temp_dangle=0;
760 dGx =n_seq*(access_s1[i-k_min+1][i_pos]);dx_3=0; dx_5=0;bonus_x=0;
761 dGy =n_seq*access_s2[l_min-j+1][j_pos + (l_min-1)-1];
762 mfe.tb=i_pos -9 - i + k_min -1 -dx_5;
763 mfe.te=i_pos -9 -1 + dx_3;
764 mfe.qb=j_pos -9 -1 - dy_5;
765 mfe.qe=j_pos + l_min -3 -9 + dy_3;
766 mfe.ddG=(double) (Emin *0.01);
767 mfe.dG1=(double) (dGx*(0.01));
768 mfe.dG2=(double) (dGy*(0.01));
769 mfe.energy= mfe.ddG - mfe.dG1 - mfe.dG2;
770 mfe.structure = struc;
771 for (i=0; i<=n3; i++) free(c[i]);
773 for(i=0; i<=n_seq;i++){
777 free(S1); free(S2); free(type);free(type2);
781 PRIVATE char *alibacktrack_XS(int i, int j, const short *S1[], const short *S2[],
782 const int** access_s1, const int ** access_s2, const int i_flag, const int j_flag) {
783 int n3,n4,k, l, *type, type2, E, traced, i0, j0,s,n_seq,psc;
784 char *st1=NULL, *st2=NULL, *struc=NULL;
788 for (s=0; S1[s]!=NULL; s++);
790 for (s=0; S2[s]!=NULL; s++);
791 if (n_seq != s) nrerror("unequal number of sequences in alibacktrack()\n");
793 st1 = (char *) space(sizeof(char)*(n3+1));
794 st2 = (char *) space(sizeof(char)*(n4+1));
795 type = (int *) space(n_seq*sizeof(int));
797 i0=i;/*MAX2(i-1,1);*/j0=j;/*MIN2(j+1,n4);*/
798 while (i<=n3-i_flag && j>=1+j_flag) {
799 E = c[i][j]; traced=0;
802 for (s=0; s<n_seq; s++) {
803 type[s] = pair[S1[s][i]][S2[s][j]];
805 psc = covscore(type,n_seq);
806 for (s=0; s<n_seq; s++) if (type[s]==0) type[s] = 7;
808 for (k=i+1; k<=n3 && k>i-MAXLOOP-2; k++) {
809 for (l=j-1; l>=1; l--) {
811 if (i-k+l-j-2>MAXLOOP) break;
812 for (s=LE=0; s<n_seq; s++) {
813 type2 = pair[S1[s][k]][S2[s][l]];
814 if (type2==0) type2=7;
815 LE += E_IntLoop(k-i-1, j-l-1, type[s], rtype[type2],
816 S1[s][i+1], S2[s][j-1], S1[s][k-1], S2[s][l+1],P);
818 if (E == c[k][l]+LE) {
827 for (s=0; s<n_seq; s++) {
828 if (type[s]>2) E -= P->TerminalAU;
831 if (E != n_seq*P->DuplexInit) {
832 nrerror("backtrack failed in fold duplex bal");
836 struc = (char *) space(i-i0+1+j0-j+1+2);
837 for (k=MAX2(i0,1); k<=i; k++) if (!st1[k-1]) st1[k-1] = '.';
838 for (k=j; k<=j0; k++) if (!st2[k-1]) st2[k-1] = '.';
839 strcpy(struc, st1+MAX2(i0-1,0)); strcat(struc, "&");
840 strcat(struc, st2+j-1);
847 duplexT** aliLduplexfold_XS(const char*s1[], const char* s2[], const int **access_s1, const int **access_s2, const int threshold, const int alignment_length, const int delta, const int fast,const int il_a, const int il_b, const int b_a, const int b_b)
856 int iext_s=2*(il_a);/* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
857 int iext_ass=50+il_a;/* iext_ass assymetric extension of interior loop, either on i or on j side. */
858 int min_colonne=INF; /* enthaelt das maximum einer kolonne */
860 int max_pos;/* get position of the best hit */
865 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
866 int *position; /* contains the position of the hits with energy > E */
870 n1 = (int) strlen(s1[0]);
871 n2 = (int) strlen(s2[0]);
872 for (s=0; s1[s]; s++);
874 for (s=0; s2[s]; s++);
875 if (n_seq != s) nrerror("unequal number of sequences in aliduplexfold()\n");
877 position = (int *) space ((delta+(n1)+4+delta) * sizeof(int));
878 position_j= (int *) space ((delta+(n1)+4+delta) * sizeof(int));
881 *** extension penalty, computed only once, further reduce the computation time
883 if ((!P) || (fabs(P->temperature - temperature)>1e-6)){
884 update_dfold_params();
886 maxPenalty[0]=(int) -1*P->stack[2][2]/2;
887 maxPenalty[1]=(int) -1*P->stack[2][2];
888 maxPenalty[2]=(int) -3*P->stack[2][2]/2;
889 maxPenalty[3]=(int) -2*P->stack[2][2];
892 lc = (int**) space (sizeof(int *) * 5);
893 lin = (int**) space (sizeof(int *) * 5);
894 lbx = (int**) space (sizeof(int *) * 5);
895 lby = (int**) space (sizeof(int *) * 5);
896 linx = (int**) space (sizeof(int *) * 5);
897 liny = (int**) space (sizeof(int *) * 5);
899 for (i=0; i<=4; i++){
900 lc[i] = (int *) space(sizeof(int) * (n2+5));
901 lin[i] = (int *) space(sizeof(int) * (n2+5));
902 lbx[i] = (int *) space(sizeof(int) * (n2+5));
903 lby[i] = (int *) space(sizeof(int) * (n2+5));
904 linx[i]= (int *) space(sizeof(int) * (n2+5));
905 liny[i]= (int *) space(sizeof(int) * (n2+5));
909 S1 = (short **) space((n_seq+1)*sizeof(short *));
910 S2 = (short **) space((n_seq+1)*sizeof(short *));
911 for (s=0; s<n_seq; s++) {
912 if (strlen(s1[s]) != n1) nrerror("uneqal seqence lengths");
913 if (strlen(s2[s]) != n2) nrerror("uneqal seqence lengths");
914 S1[s] = encode_seq(s1[s]);
915 S2[s] = encode_seq(s2[s]);
917 type = (int *) space(n_seq*sizeof(int));
919 *** array initialization
922 for(j=n2+4;j>=0;j--) {
923 lbx[0][j]=lbx[1][j]=lbx[2][j]=lbx[3][j] = lbx[4][j] =INF;
924 lin[0][j]=lin[1][j]=lin[2][j]=lin[3][j] = lin[4][j] =INF;
925 lc[0][j] =lc[1][j] =lc[2][j] = lc[3][j] = lc[4][j] =INF;
926 lby[0][j]=lby[1][j]=lby[2][j]=lby[3][j] = lby[4][j] =INF;
927 liny[0][j]=liny[1][j]=liny[2][j]=liny[3][j]=liny[4][j]=INF;
928 linx[0][j]=linx[1][j]=linx[2][j]=linx[3][j]=linx[4][j]=INF;
932 int di1,di2,di3,di4; /* contains accessibility penalty */
933 while(i < i_length) {
940 di1 = access_s1[5][i] - access_s1[4][i-1];
941 di2 = access_s1[5][i-1] - access_s1[4][i-2] + di1;
942 di3 = access_s1[5][i-2] - access_s1[4][i-3] + di2;
943 di4 = access_s1[5][i-3] - access_s1[4][i-4] + di3;
944 di1=MIN2(di1,maxPenalty[0]);
945 di2=MIN2(di2,maxPenalty[1]);
946 di3=MIN2(di3,maxPenalty[2]);
947 di4=MIN2(di3,maxPenalty[3]);
951 dj1 = access_s2[5][j+4] - access_s2[4][j+4];
952 dj2 = access_s2[5][j+5] - access_s2[4][j+5] + dj1;
953 dj3 = access_s2[5][j+6] - access_s2[4][j+6] + dj2;
954 dj4 = access_s2[5][j+7] - access_s2[4][j+7] + dj3;
955 dj1=MIN2(dj1,maxPenalty[0]);
956 dj2=MIN2(dj2,maxPenalty[1]);
957 dj3=MIN2(dj3,maxPenalty[2]);
958 dj4=MIN2(dj4,maxPenalty[3]);
960 for (s=0; s<n_seq; s++) { /* initialize type1 */
961 type[s] = pair[S1[s][i]][S2[s][j]];
963 psc = covscore(type, n_seq);
964 for (s=0; s<n_seq; s++) if (type[s]==0) type[s]=7;
965 lc[idx][j] = ((psc >= MINPSCORE) ? n_seq*(P->DuplexInit + access_s1[1][i] + access_s2[1][j]) : INF);
966 int c_stack, c_10, c_01, c_11, c_22, c_21, c_12, c_23, c_32, c_in, c_in2x, c_in2y, c_bx, c_by, c_inx, c_iny; /* matrix c */
967 int in, in_x, in_y, in_xy; /* in begin, in_x assymetric, in_y assymetric, in_xy symetric; */
972 in=lc[idx_1][j+1]; in_x=lin[idx_1][j]; in_y=lin[idx][j+1]; in_xy=lin[idx_1][j+1];
973 inx=lc[idx_1][j+1]; inx_x=linx[idx_1][j];
974 iny=lc[idx_1][j+1]; iny_y=liny[idx][j+1];
975 bx=lc[idx_1][j]; bx_x=lbx[idx_1][j];
976 by=lc[idx][j+1]; by_y=lby[idx][j+1];
977 c_stack=lc[idx_1][j+1]; c_01=lc[idx_1][j+2];c_10=lc[idx_2][j+1];
978 c_12=lc[idx_2][j+3];c_21=lc[idx_3][j+2];c_11=lc[idx_2][j+2];
979 c_22=lc[idx_3][j+3];c_32=lc[idx_4][j+3];c_23=lc[idx_3][j+4];
980 c_in=lin[idx_3][j+3];c_in2x=lin[idx_4][j+2];c_in2y=lin[idx_2][j+4];
981 c_inx=linx[idx_3][j+1]; c_iny=liny[idx_1][j+3];
982 c_bx=lbx[idx_2][j+1];c_by=lby[idx_1][j+2];
983 for (s=0; s<n_seq; s++) {
984 type2 = pair[S2[s][j+1]][S1[s][i-1]];
985 in +=P->mismatchI[type2][S2[s][j]][S1[s][i]]+iopen+iext_s+di1+dj1;
988 in_xy+=iext_s+di1+dj1;
989 inx +=P->mismatch1nI[type2][S2[s][j]][S1[s][i]]+iopen+iext_s+di1+dj1;
991 iny +=P->mismatch1nI[type2][S2[s][j]][S1[s][i]]+iopen+iext_s+di1+dj1;
993 type2=pair[S2[s][j]][S1[s][i-1]];
994 bx +=bopen+bext+(type2>2?P->TerminalAU:0)+di1;
996 type2=pair[S2[s][j+1]][S1[s][i]];
997 by +=bopen+bext+(type2>2?P->TerminalAU:0)+dj1;
1000 lin[idx][j]=MIN2(in, MIN2(in_x, MIN2(in_y, in_xy)));
1001 linx[idx][j]=MIN2(inx_x, inx);
1002 liny[idx][j]=MIN2(iny_y, iny);
1003 lby[idx][j]=MIN2(by, by_y);
1004 lbx[idx][j]=MIN2(bx, bx_x);
1005 if (psc<MINPSCORE) continue;
1006 for (s=0; s<n_seq; s++) {
1007 lc[idx][j]+=E_ExtLoop(type[s], S1[s][i-1],S2[s][j+1],P);
1009 for (s=0; s<n_seq; s++) {
1010 type2=pair[S1[s][i-1]][S2[s][j+1]];if (type2==0) type2=7;
1011 c_stack+=E_IntLoop(0,0,type2, rtype[type[s]],S1[s][i], S2[s][j], S1[s][i-1], S2[s][j+1], P)+di1+dj1;
1012 type2=pair[S1[s][i-1]][S2[s][j+2]];if (type2==0) type2=7;
1013 c_01 +=E_IntLoop(0,1,type2, rtype[type[s]],S1[s][i], S2[s][j+1], S1[s][i-1], S2[s][j+1], P)+di1+dj2;
1014 type2=pair[S1[s][i-2]][S2[s][j+1]];if (type2==0) type2=7;
1015 c_10 +=E_IntLoop(1,0,type2, rtype[type[s]],S1[s][i-1], S2[s][j], S1[s][i-1], S2[s][j+1], P)+di2+dj1;
1016 type2=pair[S1[s][i-2]][S2[s][j+2]];if (type2==0) type2=7;
1017 c_11 +=E_IntLoop(1,1,type2, rtype[type[s]],S1[s][i-1], S2[s][j+1], S1[s][i-1], S2[s][j+1], P)+di2+dj2;
1018 type2=pair[S1[s][i-3]][S2[s][j+3]];if (type2==0) type2=7;
1019 c_22 +=E_IntLoop(2,2,type2, rtype[type[s]],S1[s][i-2], S2[s][j+2], S1[s][i-1], S2[s][j+1], P)+di3+dj3;
1020 type2= pair[S1[s][i-3]][S2[s][j+2]];if (type2==0) type2=7;
1021 c_21 +=E_IntLoop(2,1,type2, rtype[type[s]],S1[s][i-2], S2[s][j+1], S1[s][i-1], S2[s][j+1], P)+di3+dj2;
1022 type2= pair[S1[s][i-2]][S2[s][j+3]];if (type2==0) type2=7;
1023 c_12 +=E_IntLoop(1,2,type2, rtype[type[s]],S1[s][i-1], S2[s][j+2], S1[s][i-1], S2[s][j+1], P)+di2+dj3;
1024 type2 = pair[S1[s][i-4]][S2[s][j+3]];if (type2==0) type2=7;
1025 c_32 +=E_IntLoop(3,2,type2, rtype[type[s]],S1[s][i-3], S2[s][j+2], S1[s][i-1], S2[s][j+1], P)+di4+dj3;
1026 type2 = pair[S1[s][i-3]][S2[s][j+4]];if (type2==0) type2=7;
1027 c_23 +=E_IntLoop(2,3,type2, rtype[type[s]],S1[s][i-2], S2[s][j+3], S1[s][i-1], S2[s][j+1], P)+dj4+di3;
1028 c_in +=P->mismatchI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+di3+dj3+2*iext_s;
1029 c_in2x +=P->mismatchI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_s+2*iext_ass+di4+dj2;
1030 c_in2y +=P->mismatchI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_s+2*iext_ass+di2+dj4;
1031 c_inx +=P->mismatch1nI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_ass+iext_ass+di3+dj1;
1032 c_iny +=P->mismatch1nI[rtype[type[s]]][S1[s][i-1]][S2[s][j+1]]+iext_ass+iext_ass+di1+dj3;
1034 bAU=(type[s]>2?P->TerminalAU:0);
1035 c_bx +=di2+dj1+bext+bAU;
1036 c_by +=di1+dj2+bext+bAU;
1038 lc[idx][j] =MIN2(lc[idx][j],
1072 for (s=0; s<n_seq; s++) {
1073 temp+=E_ExtLoop(rtype[type[s]], S2[s][j-1],S1[s][i+1],P);
1075 if(min_colonne > temp){
1080 if(max>=min_colonne){
1083 max_pos_j=min_j_colonne;
1085 position[i+delta]=min_colonne;min_colonne=INF;
1086 position_j[i+delta]=min_j_colonne;
1089 /* printf("MAX: %d",max); */
1090 for (s=0; s<n_seq; s++) {free(S1[s]);free(S2[s]);}
1093 alifind_max_XS(position, position_j, delta, threshold, alignment_length, s1, s2,access_s1,access_s2, fast);
1095 aliplot_max_XS(max, max_pos, max_pos_j,alignment_length, s1, s2, access_s1, access_s2, fast);
1096 for (i=0; i<=4; i++) {free(lc[i]);free(lin[i]);free(lbx[i]);free(lby[i]);free(linx[i]);free(liny[i]);}
1097 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]);free(linx[0]);free(liny[0]); */
1098 free(lc);free(lin);free(lbx);free(lby);free(linx);free(liny);
1108 PRIVATE void alifind_max_XS(const int *position, const int *position_j,
1109 const int delta, const int threshold, const int alignment_length,
1110 const char *s1[], const char *s2[],
1111 const int **access_s1, const int **access_s2, const int fast){
1114 for (n_seq=0; s1[n_seq]!=NULL; n_seq++);
1119 if(position[pos+delta]<(threshold)){
1121 search_range=delta+1;
1122 while(--search_range){
1123 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1124 temp_min=search_range;
1129 max_pos_j=position_j[pos+delta];
1131 max=position[pos+delta];
1132 /* printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100); */
1133 pos=MAX2(10,pos-delta);
1139 while( pos-- > 10 ) {
1140 /* printf("delta %d position:%d value:%d\n", delta, pos, position[pos]); */
1142 if(position[pos+delta]<(threshold)){
1144 search_range=delta+1;
1145 while(--search_range){
1146 if(position[pos+delta-search_range]<=position[pos+delta-temp_min]){
1147 temp_min=search_range;
1152 max_pos_j=position_j[pos+delta]; /* position on j */
1153 int begin_t=MAX2(11, pos-alignment_length);
1154 int end_t =MIN2(n1-10, pos+1);
1155 int begin_q=MAX2(11, max_pos_j-1);
1156 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
1159 i_flag = (end_t == pos+1?1:0);
1160 j_flag = (begin_q == max_pos_j-1?1:0);
1162 s3 = (char**) space(sizeof(char*)*(n_seq+1));
1163 s4 = (char**) space(sizeof(char*)*(n_seq+1));
1165 for(i=0; i<n_seq; i++){
1166 s3[i] = (char*) space(sizeof(char)*(end_t-begin_t+2));
1167 s4[i] = (char*) space(sizeof(char)*(end_q-begin_q+2));
1168 strncpy(s3[i], (s1[i]+begin_t), end_t - begin_t +1);
1169 strncpy(s4[i], (s2[i]+begin_q), end_q - begin_q +1);
1170 s3[i][end_t - begin_t +1]='\0';
1171 s4[i][end_q - begin_q +1]='\0';
1174 test = aliduplexfold_XS((const char**) s3, (const char**) s4, access_s1, access_s2,pos, max_pos_j,threshold,i_flag,j_flag);
1175 /* printf("position %d approximation %d test %d threshold %d\n", pos, position[pos+delta], (int)test.energy,(int)(threshold/n_seq)); */
1176 if(test.energy*100 < (int) (threshold/n_seq)){
1177 printf("%s %3d,%-3d: %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
1178 test.tb,test.te,test.qb,test.qe, test.ddG/n_seq, test.energy/n_seq, test.dG1/n_seq, test.dG2/n_seq);
1179 free(test.structure);
1180 pos=MAX2(10,pos-delta);
1182 for(i=0;i<n_seq;i++){
1183 free(s3[i]);free(s4[i]);
1186 /* free(test.structure); */
1192 PRIVATE void aliplot_max_XS(const int max, const int max_pos, const int max_pos_j,const int alignment_length,
1193 const char *s1[], const char* s2[],const int **access_s1, const int **access_s2,
1197 for (n_seq=0; !(s1[n_seq]==NULL); n_seq++);
1198 n1 = strlen(s1[0]); /* get length of alignment */
1199 n2 = strlen(s2[0]); /* get length of alignme */
1202 printf("target upper bound %d: query lower bound %d (%5.2f)\n",
1203 max_pos-10, max_pos_j-10, (double) ((double)max)/(100*n_seq));
1206 int begin_t=MAX2(11, max_pos-alignment_length); /* only get the position that binds.. */
1207 int end_t =MIN2(n1-10, max_pos+1); /* ..no dangles */
1208 int begin_q=MAX2(11, max_pos_j-1);
1209 int end_q =MIN2(n2-10, max_pos_j+alignment_length-1);
1212 i_flag = (end_t == max_pos+1?1:0);
1213 j_flag = (begin_q == max_pos_j-1?1:0);
1215 s3 = (char**) space(sizeof(char*)*(n_seq+1));
1216 s4 = (char**) space(sizeof(char*)*(n_seq+1));
1218 for(i=0; i<n_seq; i++){
1219 s3[i] = (char*) space(sizeof(char)*(end_t-begin_t+2));
1220 s4[i] = (char*) space(sizeof(char)*(end_q-begin_q+2));
1221 strncpy(s3[i], (s1[i]+begin_t), end_t - begin_t +1);
1222 strncpy(s4[i], (s2[i]+begin_q), end_q - begin_q +1);
1223 s3[i][end_t - begin_t +1]='\0';
1224 s4[i][end_q - begin_q +1]='\0';
1227 test = aliduplexfold_XS((const char**) s3, (const char**) s4, access_s1, access_s2,max_pos, max_pos_j,INF,i_flag,j_flag);
1228 printf("%s %3d,%-3d: %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
1229 test.tb,test.te,test.qb,test.qe, test.ddG/n_seq, test.energy/n_seq, test.dG1/n_seq, test.dG2/n_seq);
1230 free(test.structure);
1231 for(i=0;i<n_seq;i++){
1232 free(s3[i]);free(s4[i]);
1238 PRIVATE int covscore(const int *types, int n_seq) {
1239 /* calculate co-variance bonus for a pair depending on */
1240 /* compensatory/consistent mutations and incompatible seqs */
1241 /* should be 0 for conserved pairs, >0 for good pairs */
1242 #define NONE -10000 /* score for forbidden pairs */
1243 int k,l,s,score, pscore;
1244 int dm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */
1245 {0,0,2,2,1,2,2} /* CG */,
1246 {0,2,0,1,2,2,2} /* GC */,
1247 {0,2,1,0,2,1,2} /* GU */,
1248 {0,1,2,2,0,2,1} /* UG */,
1249 {0,2,2,1,2,0,2} /* AU */,
1250 {0,2,2,2,1,2,0} /* UA */};
1252 int pfreq[8]={0,0,0,0,0,0,0,0};
1253 for (s=0; s<n_seq; s++)
1256 if (pfreq[0]*2>n_seq) return NONE;
1257 for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */
1258 for (l=k+1; l<=6; l++)
1259 /* scores for replacements between pairtypes */
1260 /* consistent or compensatory mutations score 1 or 2 */
1261 score += pfreq[k]*pfreq[l]*dm[k][l];
1263 /* counter examples score -1, gap-gap scores -0.25 */
1265 ((UNIT*score)/n_seq - nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25));
1270 PRIVATE void update_dfold_params(void)
1273 P = scale_parameters();
1279 PRIVATE short * encode_seq(const char *sequence) {
1282 l = strlen(sequence);
1283 S = (short *) space(sizeof(short)*(l+2));
1286 /* make numerical encoding of sequence */
1287 for (i=1; i<=l; i++)
1288 S[i]= (short) encode_char(toupper(sequence[i-1]));
1290 /* for circular folding add first base at position n+1 */