1 /* Bonnie Berger, David Wilson and Theodore Tonchev 1992 */
2 /* Modified by Bonnie Berger and Ethan Wolf 1995. */
3 /* Commented by Ethan Wolf 1995. */
6 /*************************************************************************
7 * scscore.c contains functions to compute the probabilities for genbnk *
8 * and the positives. The probabilities are computed for genbnk *
9 * directly from the frequencies. For the positives, the 0 frequency *
10 * residues (and residue pairs) other than Prolines are given a non-zero*
11 * probability that is no more than 1/5 of the minimum probability of *
12 * a term with non-zero frequency. This accounts for sampling errors. *
14 * In addition, the PairCoil scoring algorithm is implemented here to *
15 * score subsequences obtained by the function subsequence_advance() *
16 * To obtain this score, residue "propensities" are first calculated *
17 * for each residue in every possible offset. Then each window of *
18 * length PAIRWINDOW is given a score which is the maximum of the sum *
19 * of the residue propensities in that window over all possible window *
20 * offsets. (Note that if a PAIRWINDOW does not fit in the subsequence,*
21 * then the max size window is used, as long as it is at least size *
22 * min(WINDOW,MINWINDOW). *
23 * The subsequence is then given the max score over all the windows *
24 * containe d in it and it is given the offset of that window. *
26 * Additionally, code for doing STOCK scoring is contained here, based *
27 * only on single residue probabilities and windows of size STOCKWINDOW.*
28 *************************************************************************/
42 /* #define Two_to_Three_Ratio 15 */ /* Make it an extern double instead. */
43 /* This guesses for Pr[x in C_2]/Pr[x in C_3] */
46 /** WINDOW is the size of the WINDOW that is currently being scored. *
47 * It is normally size PAIRWINDOW, but changing its value allows *
48 * the computation of scores on shorter sequences (like size 28). */
49 /*** int WINDOW = PAIRWINDOW; ***/
50 extern int WINDOW; /* Now WINDOW is defined in interface.c so can read */
51 /* it in in get_defaults(). **/
53 extern double (*pprobs)[POSNUM];
54 extern double (*pprobp)[NUM_RES_TYPE][POSNUM][POSNUM];
55 extern double gprobs[NUM_RES_TYPE], gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM];
60 /****************************************************************************/
61 /*** For now just average, but should really do weighted avg, or something more complex. ***/
62 double combine_dist_scores(double window_scores[POSNUM][MAXSEQLEN], int i,
63 int functnumber, char lib[MAXFUNCTNUM], int end,
69 for (f=0; f<functnumber; f++)
70 if (!end_effects || ((i+lib[f]+1) < end)) {
71 score +=window_scores[lib[f]][i];
76 return(score/number_dist);
78 return(window_scores[lib[0]][i]);
81 /***********************************************************************/
85 /* These are relative frequences, and logs need to be taken of them. */
86 /* They are used when computing stocks scores when STOCKSTOCK is defined */
87 /* in scconst.h. These frequencies correspond to the frequencies used */
88 /* by stock in the program that they distribute. */
89 static double relative[20][7] ={
90 {3.167, 0.297, 0.398, 3.902, 0.585, 0.501, 0.483},
91 {2.597, 0.098, 0.345, 0.894, 0.514, 0.471, 0.431},
92 {1.665, 0.403, 0.386, 0.949, 0.211, 0.342, 0.360},
93 {2.240, 0.307, 0.480, 1.409, 0.541, 0.772, 0.663},
94 {0.531, 0.076, 0.403, 0.662, 0.189, 0.106, 0.013},
95 {1.417, 0.090, 0.122, 1.659, 0.190, 0.130, 0.155},
96 {0.045, 0.275, 0.578, 0.216, 0.211, 0.426, 0.156},
97 {1.297, 1.551, 1.084, 2.612, 0.377, 1.248, 0.877},
98 {1.375, 2.639, 1.763, 0.191, 1.815, 1.961, 2.795},
99 {0.659, 1.163, 1.210, 0.031, 1.358, 1.937, 1.798},
100 {0.347, 0.275, 0.679, 0.395, 0.294, 0.579, 0.213},
101 {0.262, 3.496, 3.108, 0.998, 5.685, 2.494, 3.048},
102 {0.030, 2.352, 2.268, 0.237, 0.663, 1.620, 1.448},
103 {0.179, 2.114, 1.778, 0.631, 2.550, 1.578, 2.526},
104 {0.835, 1.475, 1.534, 0.039, 1.722, 2.456, 2.280},
105 {0.382, 0.583, 1.052, 0.419, 0.525, 0.916, 0.628},
106 {0.169, 0.702, 0.955, 0.654, 0.791, 0.843, 0.647},
107 {0.824, 0.022, 0.308, 0.152, 0.180, 0.156, 0.044},
108 {0.240, 0.000, 0.000, 0.456, 0.019, 0.000, 0.000},
109 {0.000, 0.008, 0.000, 0.013, 0.000, 0.000, 0.000}};
111 /* Takes logs of relative frequencies for singles */
112 initialize_relative ()
115 for (aa=0; aa<AANUM; ++aa)
116 for (off=0; off<POSNUM; ++off)
117 relative[aa][off] = (relative[aa][off]) ? log(relative[aa][off]) : LOG0VAL;
120 /****************************************************************************/
122 /* Advances past prolines and higher numbered residues in a subsequence, */
123 /* in order to return the biggest subsequence from the start position that */
124 /* contains no bad residues (>=AANUM-1). Here P is assumed to have number */
125 /* AANUM-1 and the higher numbered residues correspond to X,Z,B. */
126 /* st is the position of the last bad residue read (i.e. the previous value */
127 /* for en). The subsequence returned to be scored goes from st to en-1. */
128 /* If the end of the sequence is reached en=seqlen (seqlen-1 is the last */
129 /* residue in the sequence). */
130 /* If ProlineFreeWindows are not used, then prolines will NOT terminate */
131 /* the subsequence, so regions which include prolines will be scored. */
132 int subsequence_advance (int *st, int *en, char seq[], int seqlen,
137 if (ProlineFreeWin) {
138 /* Assumes that Proline == AANUM-1, i.e. last residue in list */
139 /* Read past "bad" residues P (X,Z,B are now okay). */
140 /* Used to be seq[start] >=AANUM-1 when X,Z,B weren't okay. */
141 for (start = *en; (start < seqlen) && (seq[start] == Proline); start++);
143 /* Read in next contiguous subsequence of "good" residues to score on. */
144 /* Used to be seq[end] < AANUM -1 */
145 for (end = start; (end < seqlen) && (seq[end] != Proline) &&
146 (seq[end]<Undefined); end++);
149 /* Used to Read past "bad" residues X,Z,B by seq[start]>=AANUM. */
150 for (start = *en; (start < seqlen) && (seq[start] >= Undefined); start++);
154 /* Read in next subsequence of "good" residues (including P). */
155 /* Used to be seq[end] <AANUM */
156 for (end = start; (end < seqlen) && (seq[end] < Undefined); end++);
159 if (start >= seqlen) return 0;
160 /* [st,en-1] now gives a subsequence to be scored. */
166 /****************************************************************************/
168 /* Takes a position and an offset value for position 0 and returns the offset
169 value for that position. */
171 int getoffs(int pos, int offs)
173 return ((pos+offs)%POSNUM);
177 /** The right propensity is defined as a function of the normalized prob. **/
178 /** terms (pprobp[R1][R2][p2-offs][p2]-gprobp[R1][R2][off]) **/
179 /** minus by (pprobs[R1][p2-offs]-gprobs[R1]). **/
180 /** i.e. the log of (normalized pair prob.)/(normalized single prob.). **/
181 double rpropensity_term(int x, int y, int xoffs, int yoffs,
182 char dist, char seq[])
186 /* First the pair probability normalized by genbnk. */
187 lnprob = pprobp[seq[x]][seq[y]][xoffs][yoffs]
188 - gprobp[seq[x]][seq[y]][dist];
190 /* Then the singles probability normalized by genbnk are included.*/
191 lnprob -= pprobs[seq[x]][xoffs] - gprobs[seq[x]];
198 /*********************************************************************/
199 /** This function is used to calculate the differentiator based on **/
200 /** paircoil algorithm, and is called in get_respropl from */
201 /** get_winscores from scseqadj. **/
203 double lprop_diff_term(int x, int y, int xoffs, int yoffs,
204 char dist, char seq[])
207 extern double many_pprobs[MAX_TABLE_NUMBER][NUM_RES_TYPE][POSNUM],
208 many_pprobp[MAX_TABLE_NUMBER][NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM];
211 /* First the difference in the logs of the probabilities. */
212 lnprob = many_pprobp[0][seq[x]][seq[y]][xoffs][yoffs] -
213 many_pprobp[1][seq[x]][seq[y]][xoffs][yoffs];
214 /* Then the singles probabilities are included.*/
215 lnprob -= many_pprobs[0][seq[y]][yoffs] - many_pprobs[1][seq[y]][yoffs];
220 /***************************************************************************/
223 /** The left propensity terms are similar, but divided by rightmost singles**/
224 /** giving (pprobp[R1][R2][p1][p1+offs]-gprobp[R1][R2][off]) **/
225 /** minus by (pprobs[R2][p1+offs]-gprobs[R2]). **/
226 double lpropensity_term(int x, int y, int xoffs, int yoffs,
227 char dist, char seq[])
231 /*** Note: The weightp and weights stuff is statistical weighting **/
232 /*** based on whether the differences between pos file prob and genbnk **/
233 /*** residues are really significant enough to be useful in the score. **/
234 /*** sigma is defined in stats.c in order to make the weights sharper. **/
237 /* First the pair probability normalized by genbnk. */
238 lnprob = pprobp[seq[x]][seq[y]][xoffs][yoffs]
239 - gprobp[seq[x]][seq[y]][dist];
241 /* Then the singles probability normalized by genbnk are included.*/
242 lnprob -= pprobs[seq[y]][yoffs] - gprobs[seq[y]];
248 /****************************************************************************/
249 /**For the following, SCOREACCUMULATE is defined in options.h to take the */
250 /**min or to take the average of functnum terms. */
251 /****************************************************************************/
253 /****************************************************************************/
254 /** These "r" functions score the sequence from right to left and are */
255 /** comparable to the corresponding "l" functions defined later. */
256 /****************************************************************************/
259 /* Function to compute right residue propensity at the start of subsequence, */
260 /* when there is no residue at distance lib[f]. Uses the pairs that are */
261 /* available and uses singles probabilities if there are no pairs. */
262 /* maxfval is the maximum of the distances lib[f], so any residue within */
263 /* maxfval from the start of the subsequence must have its propensity */
264 /* computed by this function. */
265 getr_sc_en (double respropr[], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM], int maxfval)
268 int f, i, cnt; /* cnt counts the number of lib[f] distances that stay */
269 /* within the subsequence, and so can be included in */
270 /* the propensity calculation. */
271 int x, y, xoffs, yoffs;
273 for (y = st+maxfval; y >= st; y--) {
274 yoffs = getoffs(y,offs);
275 for (f = 0, cnt=0; f < functnum; f++)
276 if ((x = y-lib[f]-1) >= st) { /* If the pair residue x is in the */
277 cnt++; /* subseq, then can include that pair. */
278 xoffs = getoffs(x,offs);
280 lnprob= rpropensity_term(x, y, xoffs, yoffs, lib[f], seq);
283 respropr[y] += lnprob; /** Will need to divide by cnt. **/
286 if (respropr[y] > lnprob) respropr[y] = lnprob;
290 if (!(cnt)) /* no pairs so do singles */
291 respropr[y] = pprobs[seq[y]][yoffs] - gprobs[seq[y]];
294 respropr[y] /= cnt; /* divide by number of pairs done to average them */
302 /* Computes right residue propensities for each position between st and en, */
303 /* going right to left (respropr), and computing the geometric mean of the */
304 /* paritial propensity terms at distance d of the position when AvgDist */
305 /* defined, and the min of these terms when MinDist is defined. */
307 get_respropr (double respropr[], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM])
310 int f, i, maxfval = 0;
311 int x, y, xoffs, yoffs;
313 for (f = 0; f < functnum; f++)
314 if (maxfval < lib[f]) maxfval = lib[f];
316 for (i = st; i < en; i++)
317 respropr[i] = SCOREIDENTITY; /* 0 for avg; HUGE_VAL for min */
319 /* Store residue propensity of each position in respropr */
320 for (f = 0; f < functnum; f++)
321 for (y = en-1, x = y-lib[f]-1; y >= st+maxfval+1; x--, y--) {
322 /* lib dist's off by 1 */
323 xoffs = getoffs(x,offs);
324 yoffs = getoffs(y,offs);
326 lnprob= rpropensity_term(x, y, xoffs, yoffs, lib[f], seq);
328 /* Incorporate new lnprob by taking function over it and old one */
329 SCOREACCUMULATE(respropr[y],lnprob); /* This averages functnum terms */
330 /* or takes their min depending */
331 /* on if AvgDis or MinDist def'd*/
333 /* If can't do all pairs, do as many pairs as possible, or singles */
334 getr_sc_en(respropr,seq,st,en,offs,lib,maxfval);
338 /****************************************************************************/
339 /****************************************************************************/
340 /****************************************************************************/
341 /*****Left to right versions of propensities for scoring. Currently **/
342 /**** scoring is left to right. **/
343 /****************************************************************************/
345 /* Computes the left residue propensities for the positions towards the end of
346 the subsequence where it is not possible to compute all the pair scores.
347 For each such position, all the pairs that can be done are done, and if
348 no pairs can be done, the singles are done. st and en-1 are the first
349 and last postion in the subsequence considered, offs is the frame, lib
350 is the distance-1 values, and maxfval is the max of the distances lib[f].
351 Any residue within maxfval of the end must have its propensity computed by
354 get_sc_en (double respropl[], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM], int maxfval, int differentiator)
357 int f, i, cnt; /* cnt counts the number of pair combos possible at a pos.*/
358 int x, y, xoffs, yoffs;
361 for (x = en-maxfval-1; x < en; x++) {
362 xoffs = getoffs(x,offs);
363 for (f = 0, cnt=0; f < functnum; f++)
364 if ((y = x+lib[f]+1) < en) { /* If the pair residue y is in the */
365 cnt++; /* sequence, then can do that pair */
366 yoffs = getoffs(y,offs);
369 lnprob = lprop_diff_term(x,y,xoffs,yoffs,lib[f],seq);
370 else /* The normal paircoil score term. */
371 lnprob= lpropensity_term(x, y, xoffs, yoffs, lib[f], seq);
374 respropl[x] += lnprob; /* Divide by cnt after go all distances.*/
377 if (respropl[x] > lnprob) respropl[x] = lnprob;
383 if (!(cnt)) /* no pairs so do singles */
384 if (differentiator) {
385 extern double many_pprobs[MAX_TABLE_NUMBER][NUM_RES_TYPE][POSNUM],
386 many_pprobp[MAX_TABLE_NUMBER][NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM];
388 respropl[x] = many_pprobs[0][seq[x]][xoffs] - many_pprobs[1][seq[x]][xoffs];
391 respropl[x] = pprobs[seq[x]][xoffs] - gprobs[seq[x]];
397 respropl[x] /= cnt; /* Average by number of do-able pairs. */
404 /* Computes left residue propensities for each position between st and en,
405 * going left to right. */
406 get_bonnie_respropl (double respropl[], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM], int differentiator)
409 int f, i, maxfval = 0;
410 int x, y, xoffs, yoffs;
412 for (f = 0; f < functnum; f++)
413 if (maxfval < lib[f]) maxfval = lib[f];
416 for (i = st; i < en; i++) {
417 respropl[i] = SCOREIDENTITY; /* 0 for avg; HUGE_VAL for min */
420 /* Store residue propensity of position x in respropl[x] */
421 for (f = 0; f < functnum; f++) {
422 for (x = st, y = x+lib[f]+1; x < en-maxfval-1; x++, y++) {
423 /* lib dist's off by 1 */
424 xoffs = getoffs(x,offs);
425 yoffs = getoffs(y,offs);
428 lnprob = lprop_diff_term(x,y,xoffs,yoffs,lib[f],seq);
430 else /* The normal paircoil score term. */
431 lnprob= lpropensity_term(x, y, xoffs, yoffs, lib[f], seq);
434 /* Incorporate new lnprob by averaging or taking min of it and the */
435 /* terms corresponding to other distances, depending on options.h. */
436 SCOREACCUMULATE(respropl[x],lnprob);
439 /* If can't do all pairs because of end of subsequence, do all the */
440 /* distances that stay within the subsequence for x. */
441 get_sc_en(respropl,seq,st,en,offs,lib,maxfval, differentiator);
446 get_respropl (double respropl[POSNUM][MAXSEQLEN], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM], int differentiator)
449 int f, i, maxfval = 0;
450 int x, y, xoffs, yoffs;
452 for (f = 0; f < functnum; f++) {
453 if (maxfval < lib[f]) maxfval = lib[f];
456 /**** Don't need to initialize since not accumulateing over distances***
457 for (i = st; i < en; i++)
458 respropl[lib[f]][i] = SCOREIDENTITY;
460 /* 0 for avg; HUGE_VAL for min */
465 /* Store residue propensity of position x in respropl[x] */
466 for (f = 0; f < functnum; f++) {
467 for (x = st, y = x+lib[f]+1; x < en-lib[f]-1; x++, y++) {
468 /* lib dist's off by 1 */
469 xoffs = getoffs(x,offs);
470 yoffs = getoffs(y,offs);
473 lnprob = lprop_diff_term(x,y,xoffs,yoffs,lib[f],seq);
475 else /* The normal paircoil score term. */
476 lnprob= lpropensity_term(x, y, xoffs, yoffs, lib[f], seq);
480 /* Incorporate new lnprob by averaging or taking min of it and the */
481 /* terms corresponding to other distances, depending on options.h. */
482 respropl[lib[f]][x] = lnprob;
485 while (x<en) { /* Deal with residues too close to end. */
486 /* Note that since we have a propensity */
487 /* for each distance, when we get to end*/
488 /* we don't want to use singles prob. since */
489 /* they were already used as the y value above */
490 /* in lpropensity_term. Setting this to 0 */
491 /* is like assuming it is random from genbnk. */
492 respropl[lib[f]][x] = 0;
499 /* If can't do all pairs because of end of subsequence, do all the */
500 /* distances that stay within the subsequence for x. */
501 /***** If just computing for each dist, then end is dealt with above. ***/
503 get_sc_en(respropl,seq,st,en,offs,lib,maxfval, differentiator);
509 /*****************Ends Propensity computations.***************************/
510 /*************************************************************************/
511 /* Computes all window scores between st and en for a given offset
512 * and returns them in winscores.
513 * A winscore is the sum of the residue propensities of residues in that
514 * window, given that offset.
515 * A left residue propensity is the relative pair frequency minus the
516 * relative single frequency of the rightmost residue.
517 * lib contains the values dist-1.
518 * SCOREIDENTITY returns 0 if taking geometric mean and HUGEVAL if min.
519 * SCOREACCUMULATE(a,b) either averages the logs
520 * (i.e. takes the geometric mean) of the contribution of b into a
521 * or it stores min of a and b in a, depending on if AvgDist or MinDist
522 * is definied in options.h. */
524 /** Note that windows of size PAIRWINDOW are used if possible. If a
525 * subsequence is shorter than PAIRWINDOW, but as long as MINWINDOW,
526 * then it is scored using a window the size of the subsequence
527 * (for more details see the function scseqadj). */
529 get_bonnie_winscores (double winscores[], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM], int differentiator) /* Differentiator tells it to do the */
530 /* trimer-dimer diff method. */
532 extern double init_class_prob[MAX_CLASS_NUMBER];
534 double scorel/*,scorer*/;
536 /* residue propensities going left to right */
537 double respropl[MAXSEQLEN]/*,respropr[MAXSEQLEN]*/;
540 /* Get residue propensitites going left to right (respropl) */
541 get_bonnie_respropl(respropl,seq,st,en,offs,lib, differentiator);
542 /* get_respropr(respropr,seq,st,en,offs,lib); */
545 /* Compute window scores in linear time */
547 /* Init score to be sum of first WINDOW-1 */
548 for (scorel=/*scorer=*/0, i = st; i < st + WINDOW-1; i++) {
549 scorel += respropl[i];
550 /* scorer += respropr[i]; */
554 /* Fill in winscores */
555 for (i = st, j = st + WINDOW-1; j < en; ++i, ++j) {
556 /* Add the new right side */
557 scorel += respropl[j];
558 /* scorer += respropr[j]; */
561 /* store window score at position i. A few options are given. */
562 /* Currently scoring is left to right, but functions of the scores */
563 /* in both directions can be taken. */
565 if (differentiator) { /** The probability estimate should actually be **/
566 /** the formula in the #else..... **/
568 winscores[i] = scorel; /** Just compute one term in the formula. **/
571 winscores[i] = -getdlog(init_class_prob[0]/init_class_prob[1]
576 winscores[i] = scorel;
580 /**************************/
581 /* winscores[i] = scorer; */
582 /* winscores[i] = (scorel+scorer)/2; */
583 /* winscores[i] = MIN(scorel,scorer);
584 /***************************/
586 /* Take away the left side */
587 scorel -= respropl[i];
588 /* scorer -= respropr[i]; */
592 /************THIS CORRECTS THE BUG ****/
593 for (i= en - (WINDOW-1); i<en; i++) {
594 if (differentiator) {
596 winscores[i] = scorel; /** Just compute one term in the formula. **/
598 winscores[i] = -getdlog(init_class_prob[0]/init_class_prob[1]
603 winscores[i] = scorel;
605 scorel -= respropl[i];
613 /* Computes all window scores between st and en for a given offset
614 * and returns them in winscores.
615 * A winscore is the sum of the residue propensities of residues in that
616 * window, given that offset.
617 * A left residue propensity is the relative pair frequency minus the
618 * relative single frequency of the rightmost residue.
619 * lib contains the values dist-1.
620 * Some things defined in options.h. */
622 /** Note that windows of size PAIRWINDOW are used if possible. If a
623 * subsequence is shorter than PAIRWINDOW, but as long as MINWINDOW,
624 * then it is scored using a window the size of the subsequence
625 * (for more details see the function scseqadj). */
627 get_winscores (double winscores[POSNUM][MAXSEQLEN], char seq[], int st, int en, int offs, char lib[MAXFUNCTNUM], int differentiator) /* Differentiator tells it to do the */
628 /* trimer-dimer diff method. */
630 extern double init_class_prob[MAX_CLASS_NUMBER];
633 double scorel/*,scorer*/;
635 /* residue propensities going left to right */
636 double respropl[POSNUM][MAXSEQLEN]/*,respropr[MAXSEQLEN]*/;
639 /* Get residue propensitites going left to right (respropl) */
640 get_respropl(respropl,seq,st,en,offs,lib, differentiator);
641 /* get_respropr(respropr,seq,st,en,offs,lib); */
644 /* Compute window scores in linear time */
645 for (f = 0; f < functnum; f++) {
647 /* Init score to be sum of first WINDOW-1 */
648 for (scorel=/*scorer=*/0, i = st; i < st + WINDOW-1; i++) {
649 scorel += respropl[lib[f]][i];
650 /* scorer += respropr[i]; */
654 /* Fill in winscores */
655 for (i = st, j = st + WINDOW-1; j < en; ++i, ++j) {
656 /* Add the new right side */
657 scorel += respropl[lib[f]][j];
658 /* scorer += respropr[j]; */
661 /* store window score at position i. A few options are given. */
662 /* Currently scoring is left to right, but functions of the scores */
663 /* in both directions can be taken.
665 if (differentiator) {
667 winscores[lib[f]][i] = scorel; /* Just compute one term in formula. */
669 winscores[lib[f]][i] = -getdlog(init_class_prob[0]/init_class_prob[1]
674 winscores[lib[f]][i] = scorel;
676 /**************************/
677 /* winscores[i] = scorer; */
678 /* winscores[i] = (scorel+scorer)/2; */
679 /* winscores[i] = MIN(scorel,scorer);
680 /***************************/
682 /* Take away the left side */
683 scorel -= respropl[lib[f]][i];
684 /* scorer -= respropr[i]; */
688 /************THIS CORRECTS THE BUG ****/
689 for (i= en - (WINDOW-1); i<en; i++) {
690 if (differentiator) {
692 winscores[lib[f]][i] = scorel; /* Just compute one term in formula.*/
694 winscores[lib[f]][i] = -getdlog(init_class_prob[0]/init_class_prob[1]
699 winscores[lib[f]][i] = scorel;
701 scorel -= respropl[lib[f]][i];
706 double **Array(int size1, int size2) {
709 array=(double**)calloc(size1,sizeof(double*));
711 printf("Failed to allocate array\n");exit(1);
713 for(i=0; i<size1; i++){
714 array[i]=(double*)calloc(size2,sizeof(double));
716 printf("Failed to allocate array\n");exit(1);
722 void free_array(double **array, int size1) {
724 for (i=0; i<size1; i++){
730 /*************************************************************************/
731 /************** The following begins the scoring for PairCoil ****/
732 /*************************************************************************/
735 /* Returns 0 if sequence too short, otherwise returns 1.
737 ** When this is called in calcseq of sc2seq.c
738 * The max window scores for residue i in offset off are stored in
739 * scs[i][off] and the max offset score is in sc2[i].
741 * max_over_windows() is a dynamic program to find the max window score
742 * containing a residue, and is in the file dyn.c.
744 * avg_over_windows() computes the average window score by using prefix
745 * computations to compute the log of the avg of exp(window_scores),
746 * and is also in dyn.c.
748 * differentiator =1 says use the trimer-dimer diff method.
750 ** NOTE that WINDOW is set to PAIRWINDOW in calcseq in sc2seq.c.
751 ** PAIRWINDOW and STOCKWINDOW and MINWINDOW are defined in scconst.h. */
754 int scseqadj(char lib[],
755 char seq[], int seqlen,
756 double scores[MAXSEQLEN][POSNUM],
757 char offsets[MAXSEQLEN],
758 double *maxscore, int mode, int differentiator)
760 { int OLDWINDOW; /* temp var for const default window length */
762 /* double winscores[POSNUM][MAXSEQLEN], scorebuff[POSNUM][MAXSEQLEN]; */
763 double **winscores,**scorebuff;
764 double bonnie_winscores[MAXSEQLEN], bonnie_scorebuff[MAXSEQLEN];
765 double minwinscores[MAXSEQLEN];
768 int ProlineFreeWin = mode & PROLINE_FREE_MODE;
770 winscores=Array(POSNUM,MAXSEQLEN);
771 scorebuff=Array(POSNUM,MAXSEQLEN);
773 if (seqlen <MIN(WINDOW,MINWINDOW)) { /* 0 out score for too short seq. */
774 for (i=0; i<seqlen; i++) {
776 for(j=0; j<POSNUM; j++)
777 scores[i][j]=-HUGE_VAL;
779 *maxscore = -HUGE_VAL;
780 free_array(winscores,POSNUM);
781 free_array(scorebuff,POSNUM);
785 for (j = 0; j < POSNUM; j++)
786 for (i = 0; i < seqlen; scores[i++][j] = -HUGE_VAL);
787 for (i = 0; i < seqlen; offsets[i++] = 0);
791 /* Get next subseq (not containing P's if ProlineFreeWindows defined in */
792 /* options.h. If a window of size WINDOW=PAIRWINDOW fits in the subseq */
793 /* then that window is scored. Otherwise, if a window of size MINWINDOW */
794 /* fits in the subseq, then that window is scored. Otherwise, the subseq */
797 for (en=0; subsequence_advance(&st,&en,seq,seqlen,ProlineFreeWin);) {
798 if (en - st <MIN(WINDOW,MINWINDOW)) continue;
799 /* Don't score short regions. */
803 if (en - st <WINDOW) /* Change window size for subseq length between */
804 WINDOW = en - st; /* MINWINDOW and PAIRWINDOW. */
806 for (offs = 0; offs < POSNUM; offs++) {
807 #ifdef BONNIE_END_EFFECTS
808 get_bonnie_winscores(bonnie_winscores,seq,st,en,offs,lib,
811 get_winscores(winscores,seq,st,en,offs,lib, differentiator);
814 /* Compute residue scores */
815 if ((en-st)>=WINDOW) {
817 if (!differentiator) {
818 #ifdef BONNIE_END_EFFECTS
819 max_over_windows(&(bonnie_scorebuff[st]),&(bonnie_winscores[st]),
821 /** The score of a residue is its max window */
822 /** score over windows containing it for PAIRCOIL */
823 for (i=st; i<en; ++i) {
824 scores[i][offs] = bonnie_scorebuff[i];
830 if (mode & MAX_WINDOW_BEFORE_COMBINE_MODE) {
831 for (f=0; f<functnum; f++)
832 max_over_windows(&(scorebuff[lib[f]][st]),&
833 (winscores[lib[f]][st]),en-st,WINDOW);
834 /** The score of a residue is its max window */
835 /** score over windows containing it for PAIRCOIL */
836 for (i=st; i<en; ++i)
837 scores[i][offs] = combine_dist_scores(scorebuff,i,
840 else { /* Do score like done in PNAS paper. Note that end */
841 /* effects need to be dealt with in combine_dist_scores. */
842 for (i=st; i<en; i++) /* Put the combined score in winscores[0]*/
843 winscores[0][i] = combine_dist_scores(winscores,i,
845 /* Put the maxscore in scorebuff[0] */
846 max_over_windows(&(scorebuff[0][st]),&(winscores[0][st]),
849 for (i=st; i<en; ++i)
850 scores[i][offs] = scorebuff[0][i];
857 else { /* Don't do max over window for differentiator.... */
859 #ifdef BONNIE_END_EFFECTS
860 if (differentiator == 2) { /* get max score **/
861 max_over_windows(&(bonnie_scorebuff[st]),&(bonnie_winscores[st]),
863 for (i=st; i<en; ++i)
864 scores[i][offs] = bonnie_scorebuff[i];
866 else if (differentiator ==3) { /* get min score **/
867 min_over_windows(&(bonnie_scorebuff[st]),&(bonnie_winscores[st]),
869 for (i=st; i<en; ++i)
870 scores[i][offs] = bonnie_scorebuff[i];
872 else if (differentiator ==1) /* Win score for win starting at i */
873 for (i=st; i<en; i++)
874 scores[i][offs] = bonnie_winscores[i];
876 for (i=st; i<en; i++) /* Put the combined score in winscores[0]*/
877 winscores[0][i] = combine_dist_scores(winscores,i,
880 if (differentiator == 2) { /* get max score **/
881 max_over_windows(&(scorebuff[0][st]),&(winscores[0][st]),
883 for (i=st; i<en; ++i)
884 scores[i][offs] = scorebuff[0][i];
886 else if (differentiator ==3){ /* get min score **/
887 min_over_windows(&(scorebuff[0][st]),&(winscores[0][st]),
889 for (i=st; i<en; ++i)
890 scores[i][offs] = scorebuff[0][i];
892 else if (differentiator==1) /* Win score for win starting at i */
893 for (i=st; i<en; i++)
895 combine_dist_scores(winscores,i,functnum,lib,en,0);
903 /* Store the frame which maximizes the score of residue i in offset[i] */
904 for (i=st; i<en; ++i) {
905 double localmax = -HUGE_VAL;
906 for (offs=0; offs<POSNUM; ++offs) {
907 if (localmax < scores[i][offs]) {
909 localmax = scores[i][offs];
913 gmax = localmax; /* maintain max score in subseq */
915 WINDOW = OLDWINDOW; /* restore default value (PAIRWINDOW) to WINDOW */
921 *maxscore = exp(gmax/WINDOW);
922 for (i = 0; i < seqlen; i++)
923 for (offs=0; offs<POSNUM; ++offs)
924 scores[i][offs] = exp(scores[i][offs]/WINDOW);
927 free_array(winscores,POSNUM);
928 free_array(scorebuff,POSNUM);
937 /*************************************************************************/
938 /***** Ends the PairCoil scoring section *****/
939 /*************************************************************************/
944 /*************************************************************************/
945 /*************************************************************************/
946 /*** The following compute the NewCoil stock score. *****/
947 /*************************************************************************/
950 /* Fill in window scores - used by scorestock()*/
951 /**** WINDOW is set to STOCKWINDOW in calcseq in sc2seq.c ***/
952 singles_windows (double winscores[], char seq[], int st, int en, int offs,
958 /* Init score to be sum of first WINDOW-1 */
959 for (score=0, i = st; i < st + WINDOW -1; i++) {
960 xoffs = (i + offs) % POSNUM;
963 score += relative[seq[i]][xoffs]; /** Defined in scscore.c to compute */
964 /* singles prob from our their table */
966 score += pprobs[seq[i]][xoffs] -gprobs[seq[i]];
969 /* Fill in winscores */
971 for (i = st, j = st + WINDOW-1; j < en; ++i, ++j) {
972 /* Add the new right side */
973 xoffs = (j + offs) % POSNUM;
975 score += relative[seq[j]][xoffs]; /** Defined in scscore.c to compute */
976 /* singles prob from our their table */
978 score += pprobs[seq[j]][xoffs] -gprobs[seq[j]]; /* Computes from our */
980 winscores[i] = score;
981 /* Take away the left side */
982 xoffs = (i + offs) % POSNUM;
984 score -= relative[seq[i]][xoffs]; /** Defined in scscore.c to compute */
985 /* singles prob from our their table */
987 score -= pprobs[seq[i]][xoffs] -gprobs[seq[i]]; /* Computes from our */
995 /* Called from calcseq in sc2seq.c. Scores using the Newcoils algorithm. */
996 /*** WINDOW is set to STOCKWINDOW in calcseq ***/
997 int scorestock (char seq[MAXSEQLEN], int seqlen,
998 double scores[MAXSEQLEN][POSNUM],
999 char offsets[MAXSEQLEN],
1000 double *maxscore, int ProlineFreeWin, int stockstock)
1003 double winscores[MAXSEQLEN], scorebuff[MAXSEQLEN];
1007 if (seqlen < WINDOW) return 0;
1009 /* Initialize scores to very negative */
1010 for (j = 0; j < POSNUM; j++)
1011 for (i = 0; i < seqlen; scores[i++][j] = -HUGE_VAL);
1012 for (i = 0; i < seqlen; offsets[i++] = 0);
1015 /* Get first subsequence (en=0), do loop, then get next subseq until get */
1016 /* to end of sequence. */
1017 for (en=0; subsequence_advance(&st,&en,seq,seqlen,ProlineFreeWin);) {
1018 if (en - st < WINDOW) continue; /* Don't score short sequences, just */
1019 /* en loop and get next subseq. */
1021 for (offs = 0; offs < POSNUM; offs++) {
1022 singles_windows(winscores,seq,st,en,offs,stockstock);
1023 /* Get the score from stocks or our table for each */
1024 /* window if offset is offs, where score is */
1025 /* attributed to first residue in the window. */
1027 max_over_windows(&(scorebuff[st]),&(winscores[st]),en-st,WINDOW);
1028 /** The score of a residue is its max score */
1029 /** over windows containing it in offset off. */
1030 /** Temporarily stor this in scorebuff[i]. */
1031 for (i=st; i<en; ++i)
1032 scores[i][offs] = scorebuff[i];
1034 for (i=st; i<en; ++i) { /* Find the best scoring offset in the coil*/
1035 double localmax = -HUGE_VAL;
1036 for (offs=0; offs<POSNUM; ++offs)
1037 if (scores[i][offs]>localmax) {
1039 localmax = scores[i][offs];
1041 if (localmax > gmax)
1048 *maxscore = exp(gmax/WINDOW);
1050 for (i = 0; i < seqlen; i++)
1051 for (offs=0; offs<POSNUM; ++offs)
1052 scores[i][offs] = exp(scores[i][offs]/WINDOW);
1057 /*************************************************************************/
1058 /***** Ends the Stock scoring section *****/
1059 /*************************************************************************/
1065 int score_differentiator_window(char lib[MAXFUNCTNUM],
1066 char *window_residues, int windowlen,
1067 double window_score[POSNUM],
1068 int mode, int differentiator)
1071 double respropl[MAXSEQLEN];
1073 for (off=0; off<POSNUM; off++) {
1074 window_score[off] = 0;
1075 get_bonnie_respropl(respropl,window_residues,0,windowlen,off,lib, differentiator);
1076 for (i=0;i<windowlen; i++) {
1077 window_score[off] += respropl[i];