1 /* Last changed Time-stamp: <2006-03-02 22:32:02 ivo> */
3 minimum free energy consensus
4 RNA secondary structure prediction
5 with maximum distance base pairs
7 c Ivo Hofacker, Stephan Bernhart
19 #include "energy_par.h"
20 #include "fold_vars.h"
26 #include "loop_energies.h"
34 static char rcsid[] UNUSED = "$Id: aliLfold.c,v 1.1 2007/06/23 08:49:57 ivo Exp $";
39 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
40 #define NEW_NINIO 1 /* new asymetry penalty */
41 #define MAXSECTORS 500 /* dimension for a backtrack array */
42 #define LOCALITY 0. /* locality parameter for base-pairs */
44 #define MINPSCORE -2 * UNIT
45 #define NONE -10000 /* score for forbidden pairs */
48 #################################
50 #################################
54 #################################
56 #################################
58 PRIVATE paramT *P = NULL;
59 PRIVATE int **c = NULL; /* energy array, given that i-j pair */
60 PRIVATE int *cc = NULL; /* linear array for calculating canonical structures */
61 PRIVATE int *cc1 = NULL; /* " " */
62 PRIVATE int *f3 = NULL; /* energy of 5' end */
63 PRIVATE int **fML = NULL; /* multi-loop auxiliary energy array */
64 PRIVATE int *Fmi = NULL; /* holds row i of fML (avoids jumps in memory) */
65 PRIVATE int *DMLi = NULL; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
66 PRIVATE int *DMLi1 = NULL; /* MIN(fML[i+1,k]+fML[k+1,j]) */
67 PRIVATE int *DMLi2 = NULL; /* MIN(fML[i+2,k]+fML[k+1,j]) */
68 PRIVATE int **pscore = NULL; /* precomputed array of pair types */
69 PRIVATE unsigned int length;
70 PRIVATE short **S = NULL;
71 PRIVATE short **S5 = NULL; /*S5[s][i] holds next base 5' of i in sequence s*/
72 PRIVATE short **S3 = NULL; /*Sl[s][i] holds next base 3' of i in sequence s*/
73 PRIVATE char **Ss = NULL;
74 PRIVATE unsigned short **a2s = NULL;
75 PRIVATE float **dm = NULL;
76 PRIVATE int olddm[7][7]= {{0,0,0,0,0,0,0}, /* hamming distance between pairs PRIVATE needed??*/
77 {0,0,2,2,1,2,2} /* CG */,
78 {0,2,0,1,2,2,2} /* GC */,
79 {0,2,1,0,2,1,2} /* GU */,
80 {0,1,2,2,0,2,1} /* UG */,
81 {0,2,2,1,2,0,2} /* AU */,
82 {0,2,2,2,1,2,0} /* UA */};
83 PRIVATE int energyout;
84 PRIVATE int energyprev;
88 /* NOTE: all variables are assumed to be uninitialized if they are declared as threadprivate
90 #pragma omp threadprivate(P, c, cc, cc1, f3, fML, Fmi, DMLi, DMLi1, DMLi2, pscore, length, S, dm, S5, S3, Ss, a2s, energyout, energyprev)
95 #################################
96 # PRIVATE FUNCTION DECLARATIONS #
97 #################################
99 PRIVATE void initialize_aliLfold(int length, int maxdist);
100 PRIVATE void free_aliL_arrays(int maxdist);
101 PRIVATE void get_arrays(unsigned int size, int maxdist);
102 PRIVATE short *encode_seq(const char *sequence, short *s5, short *s3, char *ss, unsigned short *as);
103 PRIVATE void make_pscores(const char ** AS, const char *structure,int maxdist, int start);
104 PRIVATE int fill_arrays(const char **strings, int maxdist, char *structure);
105 PRIVATE char *backtrack(const char **strings, int start, int maxdist);
108 #################################
109 # BEGIN OF FUNCTION DEFINITIONS #
110 #################################
113 PRIVATE void initialize_aliLfold(int length, int maxdist){
114 if (length<1) nrerror("initialize_fold: argument must be greater 0");
115 get_arrays((unsigned) length, maxdist);
118 P = scale_parameters();
121 /*--------------------------------------------------------------------------*/
123 PRIVATE void get_arrays(unsigned int size, int maxdist)
126 c = (int **)space(sizeof(int *)*(size+1));
127 fML = (int **)space(sizeof(int *)*(size+1));
128 pscore = (int **)space(sizeof(int *)*(size+1));
129 f3 = (int *) space(sizeof(int)*(size+2)); /* has to be one longer */
130 cc = (int *) space(sizeof(int)*(maxdist+5));
131 cc1 = (int *) space(sizeof(int)*(maxdist+5));
132 Fmi = (int *) space(sizeof(int)*(maxdist+5));
133 DMLi = (int *) space(sizeof(int)*(maxdist+5));
134 DMLi1 = (int *) space(sizeof(int)*(maxdist+5));
135 DMLi2 = (int *) space(sizeof(int)*(maxdist+5));
136 for (i=size; i>(int)size-maxdist-5 && i>=0; i--) {
137 c[i] = (int *) space(sizeof(int) *(maxdist+5));
138 fML[i] = (int *) space(sizeof(int) *(maxdist+5));
139 pscore[i] = (int *) space(sizeof(int )*(maxdist+5));
144 /*--------------------------------------------------------------------------*/
146 PRIVATE void free_aliL_arrays(int maxdist) {
148 for(i=0; i<maxdist+5 && i<=length; i++){
165 /*--------------------------------------------------------------------------*/
166 PUBLIC float aliLfold(const char **strings, char *structure, int maxdist) {
167 int length, energy, s, n_seq, i, j;
168 length = (int) strlen(strings[0]);
169 if (maxdist>length) maxdist = length;
170 initialize_aliLfold(length, maxdist);
172 for (s=0; strings[s]!=NULL; s++);
174 S = (short **) space(n_seq*sizeof(short *));
175 S5 = (short **) space(n_seq*sizeof(short *));
176 S3 = (short **) space(n_seq*sizeof(short *));
177 a2s = (unsigned short **) space(n_seq*sizeof(unsigned short *));
178 Ss = (char **) space(n_seq*sizeof(char *));
180 for (s=0; s<n_seq; s++) {
181 if (strlen(strings[s]) != length) nrerror("uneqal seqence lengths");
182 S5[s] = (short *) space((length+2)*sizeof(short));
183 S3[s] = (short *) space((length+2)*sizeof(short));
184 a2s[s] = (unsigned short *) space((length+2)*sizeof(unsigned short));
185 Ss[s] = (char *) space((length+2)*sizeof(char));
186 S[s] = encode_seq(strings[s], S5[s],S3[s],Ss[s],a2s[s]);
190 if (RibosumFile !=NULL) dm=readribosum(RibosumFile);
191 else dm=get_ribosum(strings, n_seq, S[0][0]);
193 else { /*use usual matrix*/
194 dm=(float **)space(7*sizeof(float*));
196 dm[i]=(float *)space(7*sizeof(float));
198 dm[i][j] = (float) olddm[i][j];
202 for (i=length; i>=(int)length-(int)maxdist-4 && i>0; i--)
203 make_pscores((const char **) strings,structure,maxdist,i);
205 energy = fill_arrays(strings, maxdist, structure);
207 free_aliL_arrays(maxdist);
208 return (float) energy/100.;
211 PRIVATE int fill_arrays(const char **strings, int maxdist, char *structure) {
212 /* fill "c", "fML" and "f3" arrays and return optimal energy */
214 int i, j, k, length, energy;
215 int decomp, new_fML,MLenergy ;
216 int *type, type_2, tt, s, n_seq, no_close, lastf, lastf2, thisj, lastj, lastj2;
218 lastf = lastf2 = INF;
222 length = (int) strlen(strings[0]);
223 for (s=0; strings[s]!=NULL; s++);
225 type = (int *) space(n_seq*sizeof(int));
226 for (j=0; j<maxdist+5; j++)
227 Fmi[j]=DMLi[j]=DMLi1[j]=DMLi2[j]=INF;
228 for (j=length; j>length-maxdist-3; j--) {
229 for (i=(length-maxdist-2>0)?length-maxdist-2:1 ; i<j; i++)
230 c[i][j-i] = fML[i][j-i] = INF;
233 for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
234 for (j = i+1; j<=length && j<=i+TURN; j++) {
235 c[i][j-i]=fML[i][j-i]=INF;
237 for (j = i+TURN+1; j <= length && j <= i+maxdist; j++) {
240 for (s=0; s<n_seq; s++) {
241 type[s] = pair[S[s][i]][S[s][j]];
242 if (type[s]==0) type[s]=7;
245 psc = pscore[i][j-i];
247 if (psc>=cv_fact*MINPSCORE) { /* we have a pair 2 consider */
248 int new_c=0, stackEnergy=INF;
249 /* hairpin ----------------------------------------------*/
250 for (new_c=s=0; s<n_seq; s++){
251 if((a2s[s][j-1] - a2s[s][i]) < 3) new_c += 600;
252 else new_c += E_Hairpin(a2s[s][j-1]-a2s[s][i],type[s],S3[s][i],S5[s][j],Ss[s]+(a2s[s][i-1]), P);
254 /*--------------------------------------------------------
255 check for elementary structures involving more than one
257 --------------------------------------------------------*/
258 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++) {
259 int minq = j-i+p-MAXLOOP-2;
260 if (minq<p+1+TURN) minq = p+1+TURN;
261 for (q = minq; q < j; q++) {
262 if (pscore[p][q-p]<MINPSCORE) continue;
265 for (energy = s=0; s<n_seq; s++) {
266 type_2 = pair[S[s][q]][S[s][p]]; /* q,p not p,q! */
267 if (type_2 == 0) type_2 = 7;
268 energy += E_IntLoop(a2s[s][p-1]-a2s[s][i],
269 a2s[s][j-1]-a2s[s][q],
278 new_c = MIN2(energy+c[p][q-p], new_c);
279 if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
285 /* multi-loop decomposition ------------------------*/
286 decomp = DMLi1[j-1-(i+1)];
288 for (s=0; s<n_seq; s++) {
290 decomp += E_MLstem(tt, S5[s][j], S3[s][i], P);
294 for(s=0; s<n_seq; s++){
296 decomp += E_MLstem(tt, -1, -1, P);
299 MLenergy = decomp + n_seq*P->MLclosing;
300 new_c = MIN2(new_c, MLenergy);
302 new_c = MIN2(new_c, cc1[j-1-(i+1)]+stackEnergy);
303 cc[j-i] = new_c - psc; /* add covariance bonnus/penalty */
305 c[i][j-i] = cc1[j-1-(i+1)]+stackEnergy-psc;
309 } /* end >> if (pair) << */
310 else c[i][j-i] = INF;
313 /* done with c[i,j], now compute fML[i,j] */
314 /* free ends ? -----------------------------------------*/
316 new_fML = fML[i+1][j-i-1]+n_seq*P->MLbase;
317 new_fML = MIN2(fML[i][j-1-i]+n_seq*P->MLbase, new_fML);
318 energy = c[i][j-i]/*+P->MLintern[type]*/;
320 for (s=0; s<n_seq; s++) {
321 energy += E_MLstem(type[s], (i > 1) ? S5[s][i] : -1, (j < length) ? S3[s][j] : -1, P);
325 for (s=0; s<n_seq; s++) {
326 energy += E_MLstem(type[s], -1, -1, P);
329 new_fML = MIN2(energy, new_fML);
331 /* modular decomposition -------------------------------*/
333 for (decomp = INF, k = i+1+TURN; k <= j-2-TURN; k++)
334 decomp = MIN2(decomp, Fmi[k-i]+fML[k+1][j-k-1]);
336 DMLi[j-i] = decomp; /* store for use in ML decompositon */
337 new_fML = MIN2(new_fML,decomp);
341 fML[i][j-i] = Fmi[j-i] = new_fML; /* substring energy */
345 /* calculate energies of 5' and 3' fragments */
347 static int do_backtrack = 0, prev_i=0, prev_j=0;
348 static char * prev=NULL;
354 for (j=i+TURN+1; j<length && j<=i+maxdist; j++) {
356 /* if (c[j+1]<INF) {*/
359 for(s = 0; s < n_seq; s++){
360 tt = pair[S[s][i]][S[s][j]];
362 energy += E_ExtLoop(tt, (i>1) ? S5[s][i] : -1, S3[s][j], P);
366 for(s = 0; s < n_seq; s++){
367 tt = pair[S[s][i]][S[s][j]];
369 energy += E_ExtLoop(tt, -1, -1, P);
372 if (energy/(j-i+1) < thisf){
373 thisf = energy/(j-i+1);
383 if(length <= i+maxdist){
388 for (s=0; s<n_seq; s++) {
389 tt = pair[S[s][i]][S[s][j]];
391 energy += E_ExtLoop(tt, (i>1) ? S5[s][i] : -1, -1, P);
395 for (s=0; s<n_seq; s++) {
396 tt = pair[S[s][i]][S[s][j]];
398 energy += E_ExtLoop(tt, -1, -1, P);
401 /* thisf=MIN2(energy/(j-i+1),thisf); ???*/
402 if (energy/(j-i+1) < thisf){
403 thisf = energy/(j-i+1);
406 f3[i] = MIN2(f3[i], energy);
409 /* backtrack partial structure */
410 /* if (i+maxdist<length) {*/
412 if (f3[i] != f3[i+1]) {
414 backtrack_type = 'F';
416 prev = backtrack(strings, i , MIN2(maxdist,length-i));
425 else if((thisf < lastf) && (thisf < lastf2) && ((thisf/(n_seq*100)) < -0.01)){ /*?????????*/
427 backtrack_type = 'C';
429 else if (do_backtrack){
430 if(do_backtrack == 1){
431 ss = backtrack(strings, i+1 , MIN2(maxdist,length-i)/*+1*/);
432 energyout = f3[i] - f3[i+strlen(ss)-1];/*??*/
435 ss = backtrack(strings, i+1 , lastj-i-2);
436 energyout=c[i+1][lastj-(i+1)];
438 for (s=0; s<n_seq; s++) {
440 type = pair[S[s][i+1]][S[s][lastj-i]]; if (type==0) type=7;
441 energyout += E_ExtLoop(type, (i>1) ? S5[s][i+1] : -1, S3[s][lastj-i], P);
445 for (s=0; s<n_seq; s++) {
447 type = pair[S[s][i+1]][S[s][lastj-i]]; if (type==0) type=7;
448 energyout += E_ExtLoop(type, -1, -1, P);
453 if((prev_i + strlen(prev) > i+1+strlen(ss)) || (do_backtrack==2)){
454 char *outstr = (char *)space(sizeof(char) * (strlen(prev)+1));
455 strncpy(outstr, strings[0]+prev_i-1, strlen(prev));
456 outstr[strlen(prev)] = '\0';
457 if (csv==1) printf("%s , %6.2f, %4d, %4d\n",prev, energyprev/(100.*n_seq), prev_i,prev_i + (int)strlen(prev)-1);
458 /* if(do_backtrack==1)*/
460 printf("%s (%6.2f) %4d - %4d\n",prev, energyprev/(100.*n_seq), prev_i,prev_i + (int)strlen(prev)-1);
466 energyprev = energyout;
482 outstr = (char *)space(sizeof(char) *(strlen(prev) + 1));
483 strncpy(outstr, strings[0]+prev_i-1, strlen(prev));
484 outstr[strlen(prev)] = '\0';
486 printf("%s ,%6.2f, %4d, %4d\n", prev, (energyprev)/(100.*n_seq), prev_i,prev_i + (int)strlen(prev)-1);
488 printf("%s (%6.2f) %4d - %4d\n", prev, (energyprev)/(100.*n_seq), prev_i,prev_i + (int)strlen(prev)-1);
491 if ((f3[prev_i] != f3[1]) || !prev){
492 ss = backtrack(strings, i , maxdist);
493 if(outstr) free(outstr);
494 outstr = (char *)space(sizeof(char) * (strlen(ss) + 1));
495 strncpy(outstr, strings[0], strlen(ss));
496 outstr[strlen(ss)] = '\0';
497 printf("%s \n", outstr);
499 printf("%s ,%6.2f ,%4d ,%4d\n", ss, (f3[1]-f3[1 + strlen(ss)-1])/(100.*n_seq), 1, (int)strlen(ss)-1);
501 printf("%s (%6.2f) %4d - %4d\n", ss, (f3[1]-f3[1 + strlen(ss)-1])/(100.*n_seq), 1, (int)strlen(ss)-1);
506 if(outstr) free(outstr);
510 int ii, *FF; /* rotate the auxilliary arrays */
511 FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
512 FF = cc1; cc1=cc; cc=FF;
513 for (j=0; j< maxdist+5; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
514 if (i<=length-maxdist-4) {
515 c[i-1] = c[i+maxdist+4]; c[i+maxdist+4] = NULL;
516 fML[i-1] = fML[i+maxdist+4]; fML[i+maxdist+4]=NULL;
517 pscore[i-1] = pscore[i+maxdist+4]; pscore[i+maxdist+4] = NULL;
519 make_pscores((const char**) strings, structure, maxdist, i-1);
520 for(ii=0; ii<maxdist+5; ii++) {
521 c[i-1][ii] = fML[i-1][ii] = INF;
530 PRIVATE char * backtrack(const char **strings, int start, int maxdist) {
531 /*------------------------------------------------------------------
532 trace back through the "c", "f3" and "fML" arrays to get the
533 base pairing list. No search for equivalent structures is done.
534 This is fast, since only few structure elements are recalculated.
535 ------------------------------------------------------------------*/
536 sect sector[MAXSECTORS]; /* backtracking sectors */
539 int *type, type_2, tt, n_seq;
543 for (s=0; strings[s]!=NULL; s++);
545 type = (int *) space(n_seq*sizeof(int));
547 length = strlen(strings[0]);
548 sector[++s].i = start;
549 sector[s].j = MIN2(length, start+maxdist+1);
550 sector[s].ml = (backtrack_type=='M') ? 1 : ((backtrack_type=='C')?2:0);
552 structure = (char *) space((MIN2(length-start, maxdist)+3)*sizeof(char));
553 for (i=0; i<=MIN2(length-start, maxdist); i++) structure[i] = '.';
556 int ml, fij, cij, traced, i1, j1, d3, d5, mm, p, q, jj=0;
557 int canonical = 1; /* (i,j) closes a canonical structure */
560 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
561 occur in the fML- (1) or in the f-array (0) */
563 structure[i-start] = '(';
564 structure[j-start] = ')';
568 if (j < i+TURN+1) continue; /* no more pairs in this interval */
570 fij = (ml)? fML[i][j-i] : f3[i];
572 if (ml == 0) { /* backtrack in f3 */
574 if (fij == f3[i+1]) {
580 /* i is paired. Find pairing partner */
581 for (k=i+TURN+1,traced=0; k<=j; k++) {
587 for (ss=0; ss<n_seq; ss++) {
588 type[ss] = pair[S[ss][i]][S[ss][k]];
589 if (type[ss]==0) type[ss]=7;
590 cc += E_ExtLoop(type[ss], (i>1) ? S5[ss][i] : -1, (k<length) ? S3[ss][k] : -1, P);
594 for (ss=0; ss<n_seq; ss++) {
595 type[ss] = pair[S[ss][i]][S[ss][k]];
596 if (type[ss]==0) type[ss]=7;
597 cc += E_ExtLoop(type[ss], -1, -1, P);
600 if (fij == cc + f3[k+1]) traced=i;
605 if (!traced) nrerror("backtrack failed in f3");
606 if (j==length) { /* backtrack only one component, unless j==length */
612 structure[i-start] = '('; structure[j-start] = ')';
615 else { /* trace back in fML array */
616 if (fML[i][j-1-i]+n_seq*P->MLbase == fij) { /* 3' end is unpaired */
622 if (fML[i+1][j-(i+1)]+n_seq*P->MLbase == fij) { /* 5' end is unpaired */
631 for (ss=0; ss<n_seq; ss++) {
632 tt = pair[S[ss][i]][S[ss][j]];
634 cij += E_MLstem(tt, (i>1) ? S5[ss][i] : -1, (j<length) ? S3[ss][j] : -1, P);
638 for (ss=0; ss<n_seq; ss++) {
639 tt = pair[S[ss][i]][S[ss][j]];
641 cij += E_MLstem(tt, -1, -1, P);
647 structure[i-start] = '('; structure[j-start] = ')';
651 for (k = i+1+TURN; k <= j-2-TURN; k++)
652 if (fij == (fML[i][k-i]+fML[k+1][j-(k+1)]))
662 if (k>j-2-TURN) nrerror("backtrack failed in fML");
668 /*----- begin of "repeat:" -----*/
669 if (canonical) cij = c[i][j-i];
671 for (ss=0; ss<n_seq; ss++) {
672 type[ss] = pair[S[ss][i]][S[ss][j]];
673 if (type[ss]==0) type[ss] = 7;
679 if (cij == c[i][j-i]) {
680 /* (i.j) closes canonical structures, thus
681 (i+1.j-1) must be a pair */
682 for (ss=0; ss<n_seq; ss++) {
683 type_2 = pair[S[ss][j-1]][S[ss][i+1]]; /* j,i not i,j */
684 if (type_2==0) type_2 = 7;
685 cij -= P->stack[type[ss]][type_2];
687 cij += pscore[i][j-i];
688 structure[i+1-start] = '('; structure[j-1-start] = ')';
694 cij += pscore[i][j-i];
698 for (ss=0; ss<n_seq; ss++){
699 if((a2s[ss][j-1] - a2s[ss][i]) < 3) cc += 600;
700 else cc += E_Hairpin(a2s[ss][j-1] - a2s[ss][i], type[ss], S3[ss][i], S5[ss][j], Ss[ss] + a2s[ss][i-1], P);
702 if (cij == cc) /* found hairpin */
706 for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
708 minq = j-i+p-MAXLOOP-2;
709 if (minq<p+1+TURN) minq = p+1+TURN;
710 for (q = j-1; q >= minq; q--) {
711 if (c[p][q-p]>=INF) continue;
712 for (ss=energy=0; ss<n_seq; ss++) {
713 type_2 = pair[S[ss][q]][S[ss][p]]; /* q,p not p,q */
714 if (type_2==0) type_2 = 7;
715 energy += E_IntLoop(a2s[ss][p-1] - a2s[ss][i],
716 a2s[ss][j-1] - a2s[ss][q],
725 traced = (cij == energy+c[p][q-p]);
727 structure[p-start] = '(';
728 structure[q-start] = ')';
735 /* end of repeat: --------------------------------------------------*/
737 /* (i.j) must close a multi-loop */
738 mm = n_seq*P->MLclosing;
740 for (ss=0; ss<n_seq; ss++) {
741 tt = rtype[type[ss]];
742 mm += E_MLstem(tt, S5[ss][j],S3[ss][i], P);
746 for (ss=0; ss<n_seq; ss++) {
747 tt = rtype[type[ss]];
748 mm += E_MLstem(tt, -1, -1, P);
752 sector[s+1].ml = sector[s+2].ml = 1;
754 for (k = i+TURN+2; k < j-TURN-2; k++){
755 if(cij == fML[i+1][k-(i+1)] + fML[k+1][j-1-(k+1)] + mm) break;
757 if (k<=j-3-TURN){ /* found the decomposition */
763 nrerror("backtracking failed in repeat");
767 if (start+maxdist<length) {
768 for (i=strlen(structure); i>0 && structure[i-1] == '.'; i--)
774 /*---------------------------------------------------------------------------*/
775 PRIVATE short *encode_seq(const char *sequence, short *s5, short *s3, char *ss, unsigned short *as){
780 l = strlen(sequence);
781 S = (short *) space(sizeof(short)*(l+2));
785 /* make numerical encoding of sequence */
786 for (i=1; i<=l; i++) {
787 short ctemp = (short)encode_char(toupper(sequence[i-1]));
792 /*use alignment sequences in all energy evaluations*/
794 for (i=1; i<l; i++) {
815 for (i=l; i>0; i--) {
818 if ((c5=='-')||(c5=='_')||(c5=='~')||(c5=='.')) continue;
822 for (i=1; i<=l; i++) {
825 if ((c3=='-')||(c3=='_')||(c3=='~')||(c3=='.')) continue;
829 } else s5[1]=s3[l]=0;
831 for (i=1,p=0; i<=l; i++) {
834 if ((c5=='-')||(c5=='_')||(c5=='~')||(c5=='.'))
837 ss[p++]=sequence[i-1]; /*start at 0!!*/
842 for (i=l; i>=1; i--) {
845 if ((c3=='-')||(c3=='_')||(c3=='~')||(c3=='.'))
855 PRIVATE double cov_score(const char **AS, int i, int j) {
858 int pfreq[8]={0,0,0,0,0,0,0,0};
859 for (n_seq=0; AS[n_seq]!=NULL; n_seq++);
860 for (s=0; s<n_seq; s++) {
862 if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap */
864 if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7;
865 else type = pair[S[s][i]][S[s][j]];
870 if (pfreq[0]*2+pfreq[7]>n_seq)
873 for (k=1,score=0.; k<=6; k++) /* ignore pairtype 7 (gap-gap) */
875 /* scores for replacements between pairtypes */
876 /* consistent or compensatory mutations score 1 or 2 */
877 score += pfreq[k]*pfreq[l]*dm[k][l];
879 /* counter examples score -1, gap-gap scores -0.25 */
880 return cv_fact * ((UNIT*score)/n_seq - nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25));
883 PRIVATE void make_pscores(const char ** AS,
884 const char *structure, int maxd, int i) {
885 /* calculate co-variance bonus for each pair depending on */
886 /* compensatory/consistent mutations and incompatible seqs */
887 /* should be 0 for conserved pairs, >0 for good pairs */
889 n=S[0][0]; /* length of seqs */
891 /*first allocate space:*/
892 pscore[i]=(int *)space((maxd+5)*sizeof(int));
893 /* pscore[start]-=start;*/
894 /*fill pscore[start], too close*/
895 for (j=i+1; (j<i+TURN+1) && (j<=n); j++) {
896 pscore[i][j-i] = NONE;
898 for (j=i+TURN+1; ((j<=n) && (j<=i+maxd)); j++) {
899 pscore[i][j-i] = cov_score(AS, i, j);
902 if (noLonelyPairs) { /* remove unwanted lonely pairs */
903 int type, otype=0, ntype=0;
904 for (j=i+TURN; ((j<n)&&(j<i+maxd)); j++) {
905 if ((i>1) && (j<n)) otype = cov_score(AS, i-1, j+1);
907 if (i<n) ntype=pscore[i+1][j-1-(i+1)];
910 if ((otype<-4*UNIT)&&(ntype<-4*UNIT)) /* worse than 2 counterex */
911 pscore[i][j-i] = NONE; /* i.j can only form isolated pairs */
915 if (fold_constrained&&(structure!=NULL)) {
916 int psij, hx, *stack;
917 stack = (int *) space(sizeof(int)*(n+1));
919 /* for(hx=0, j=i+TURN; ((j<=i+maxd)&&(j<=n)); j++) {*/
920 switch (structure[i-1]) {
921 case 'x': /* can't pair */
922 for (l=i+TURN+1; l<=i+maxd; l++) pscore[i][l-i] = NONE;
927 for (l=i+1; l<=i+maxd; l++) {
928 switch (structure[l-1]) {
931 pscore[i][l-i] = NONE;
935 if (hx!=0) pscore[i][l-i] = NONE;
938 pscore[i][l-i] = NONE;
943 for (l=i+TURN+1; l<=i+maxd; l++) pscore[i][l-i] = NONE;
946 for (l=i+TURN+1; l<=i+maxd; l++) pscore[i][l-i] = NONE;
950 if (!psij) for (l=i+1; l<=i+maxd; l++) { /*no '(' constraint on i*/
951 switch (structure[l-1]) {
953 pscore[i][l-i] = NONE;
956 pscore[i][l-i] = NONE;
959 pscore[i][l-i] = NONE;
962 pscore[i][l-i] = NONE;
967 fprintf(stderr, "%s\n", structure);
968 nrerror("unbalanced brackets in constraint string");