JPRED-2 Add sources of all binaries (except alscript) to Git
[jpred.git] / sources / multicoil / compute_like.c
1 /* Ethan Wolf 1995   */
2
3 #include <stdio.h>
4 #include <math.h>
5 #include "scio.h"
6 #include "likelihood.h"
7 #include "methods.h"
8 #include "postscript.h"
9 #include "scatter_ps.h"
10 #include "compute_like.h"
11
12
13 /* ax+b is the likelihood line approximation computed in likelihood region */
14 /* from low % to hight %                                                    */
15 void likelihood(long n[2], int mode, int filenum, double ratio, int minplace, 
16                 int maxplace, double *means[2], long *counts[2],
17                 double gauss_mean[2], double sd[2], double down[2], 
18                 FILE *fout, FILE *foutps, FILE *scatter, FILE *scores,
19                 double a[2], double b[2], 
20                 double low_percent, double high_percent, double error_allowed)
21 {
22   int i, f;  
23   double x, hist_height;
24   double  _likelihood[2][201-2*MINSCORE];
25   double  *likelihood[2];
26   double area1=0, area2=0;
27
28   int low[2]={0,0};  /* How many consectutive low % found.  */
29   int high[2]={0,0};  /* How many consecutive high % found. */
30   int low_box[2], high_box[2]; /* These hold the first              */
31         /* and last boxes that are low, high % likely, where these          */
32         /* determined by finding the first box above the gaussian mean such */
33         /* that it has likelihood above low percent and the next two boxes  */
34         /* are also above low-error percent.  Similarly, high% is the first */
35         /* following  box such that it has likelihood < %high and the two   */
36         /* following have likelihood at least high-error */
37   double error=0;
38
39
40   likelihood[0] = &(_likelihood[0][-2*MINSCORE]);
41   likelihood[1] = &(_likelihood[1][-2*MINSCORE]);
42
43
44   if (mode & STOCK_METHOD) {
45     for(i=minplace; i<=maxplace; i++) 
46       likelihood[0][i]= stock_like(ratio, (double)i/2, gauss_mean[0],sd[0],
47                                    down[0],gauss_mean[1], sd[1], down[1]);
48     filenum=1;  /*** For printout purposes.  ***/
49   }
50
51   else
52     for(f=0; f<filenum; f++) {
53       
54       if (mode & RATIO_BOXES_GT_METHOD) {
55         area1= total_area_above_gauss(minplace, maxplace, counts[f], 
56                                       gauss_mean[f],sd[f],down[f], n[f]);
57         area2= total_area(minplace,maxplace, counts[f], n[f]);
58       }
59       if (mode & FRACTION_POS_METHOD) {
60         area1=total_area_above_gauss(minplace, maxplace, counts[f], 
61                                      gauss_mean[f],sd[f],down[f], n[f]);
62         area2=0;
63       }
64       
65
66       for(i=minplace; i<=maxplace; i++) {
67         if (mode & MEAN_MODE) x= means[f][i];
68         else x= (double) i/2;
69
70 /***** We need to convert counts of residue scores into heights that     **/
71 /***** are compatable with the gaussian (where area is assumed to be 1). **/
72 /***** Hence, since each box is .5 wide, we take 2*count/n.              **/
73         hist_height= 2*(double)counts[f][i]/n[f];
74         
75         if (mode & RATIO_BOX_METHOD) 
76           likelihood[f][i]= like_gauss_by_box(x, gauss_mean[f],sd[f],
77                                            down[f], hist_height);
78         if (mode & RATIO_TRAP_METHOD)
79           likelihood[f][i]= like_gauss_by_line(x, gauss_mean[f],sd[f],
80                                             down[f], hist_height);
81         if (mode & RATIO_BOXES_GT_METHOD) 
82           likelihood[f][i]=ratio_PosToNeg_at_least_x(x, gauss_mean[f],
83                       sd[f],down[f], &area1,  &area2, hist_height);
84         if (mode & FRACTION_POS_METHOD) 
85           likelihood[f][i]=fraction_of_positives_lower_than_x(x, gauss_mean[f],
86                       sd[f], down[f], &area2,area1,hist_height);
87
88         if (low[f]<3) { 
89           if ((likelihood[f][i] > low_percent - error) &&
90               ( (double)i/2 > gauss_mean[f]) ) {   /* Guessing low percentage
91                                                       is higher than mean.   */
92             low[f]++;
93             error= error_allowed; /* For 2nd and 3d boxes.*/
94             if (low[f]==3) {
95               low_box[f]=i-2;  /* First of 3 consecutive low-error boxes */
96               error=0;             /* Now find first high percentage.*/
97             }
98           }
99           else {
100             low[f]=0;
101             error=0;             /* Back to finding first low box. */
102           }
103         }
104         else if (high[f]<3)
105           if (likelihood[f][i] >= high_percent - error) {
106             high[f]++;
107             error=error_allowed;
108             if (high[f]==3) 
109               high_box[f]=i-3;  /* Last box < high_percent was three before.*/
110           }
111           else {  
112             high[f]=0;
113             error=0;               /* Back to finding first high box. */
114           }
115       }
116       
117       if (high[f] < 3)  /* didn't find a high_box, so make  it last box. */
118         high_box[f]=maxplace;
119
120       if (low[f]<  3)  /* didn't  find a low_box  so make it the mean. */
121         low_box[f] = (int)(2*gauss_mean[f]);
122
123
124
125       fprintf(fout, "For txt file %d:", f);
126       least_sq_fit(&a[f], &b[f],(double)low_box[f]/2,
127                    (double)high_box[f]/2, means[f],likelihood[f],fout);
128     }
129   
130   for(f=0;f<filenum; f++) {
131     fprintf(fout, "\n\nFor txt file %d:\n", f);
132     for(i=minplace; i<=maxplace; i++) 
133       if (mode & MEAN_MODE)
134         fprintf(fout,"\nBox %lf (mean score %lf) has likelihood %lf",
135                 (double)i/2   , means[f][i], likelihood[f][i]);
136       else
137         fprintf(fout,"\nBox %lf has likelihood %lf",
138                 (double)i/2   , likelihood[f][i]);
139
140
141
142     /***Genetate postscript likelihood plot***/
143     post_script(foutps, minplace, maxplace, likelihood[f], 
144                 (mode & NO_HEADER_MODE), a[f], b[f],
145                 (double)low_box[f]/2,(double)high_box[f]/2 ); 
146
147     /****Generate Scatter Plot****/
148     if (mode & SCATTER_MODE) {
149       scatter_header(scatter,a[f],b[f],10,90);
150       scatter_points(scatter,scores);
151       scatter_trailer(scatter);
152     }
153
154   }
155
156
157 }
158
159
160 /*** Likelihood line is l(x)=ax+b from low_score to high_score.   ***/
161 void post_script(FILE *foutps, int minplace, int maxplace, double *likelihood,
162                  int no_header, double a, double b, double low_score, 
163                  double high_score)
164 {
165   int i;
166
167   if (!no_header)
168     post_script_header(foutps);
169
170
171   for (i=minplace; i<=maxplace; i++)
172     fprintf(foutps,"%lf %lf bar\n",(double)i/2 , likelihood[i]);
173
174   if (!no_header)
175     post_script_like_line(foutps, a, b, low_score, high_score);
176
177   if (!no_header)
178     post_script_trailer(foutps);
179  
180 }
181
182
183
184 /*** In doing the least square fit to line mx+b we get two equations:   */
185 /***    c1m + c2b = a1
186         c2m + c3b = a2
187   where c1 = # of points
188         c2= /sum  x_value of points
189         c3= /sum  x^2 for points
190         a1= /sum y_value of points
191         a2= /sum x*y       for all the points
192 *******/
193 /******
194      Solving for m and b gives:
195      b= a2c2 -a1c3/denom     and     m= a1c2 - a2c1/denom
196      where denom = c2^2 - c1c3
197 *********/
198
199 void least_sq_fit(double *m, double *b, double leftend, double rightend,
200                   double *means, double *likelihood, FILE *fout)
201 {
202   int i;
203   double c1=0,c2=0,c3=0,a1=0,a2=0;
204
205   c1= 2*(rightend - leftend) +1;
206   for(i=2*leftend; i<=2*rightend; i++) {
207     c2 += means[i];
208     c3 += means[i]*means[i];
209     a1 += likelihood[i];
210     a2 += likelihood[i]*means[i];
211   }
212   *b= (a2*c2 - a1*c3)/ (c2*c2 - c1*c3);
213   *m= (a1*c2 - a2*c1)/ (c2*c2 - c1*c3);
214
215   fprintf(fout, "\nThe likelihood curve from %.2lf to %.2lf using means of the scores for x values is approximated by the line:\n", leftend, rightend);
216   fprintf(fout, "            %.4lf * x + %.4lf\n\n",*m,*b);
217 }
218
219
220
221 /***Find an approximation to the score such that there are n1 residues with */
222 /***lower scores and 2*n1/n=(1-percent).  i.e. this mean gives a left side  */
223 /***which contains exactly half of the expected negatives (given by 1-%).   */ 
224 double percent_mean(long *counts, double *means, double percent, 
225                     int  minplace,int maxplace, long number_residues)
226 {         
227   int box;   /* This box will contain the score we want...  */
228   int i;
229   long less_than;  /** Number of scores less_than our current box. */
230   long greater_than;
231   double number_want_less_than, number_short, fraction_of_box;  
232   double left_side, right_side;
233   double score_estimate;
234   double f;   /* The fraction of the box we want.  **/
235
236   number_want_less_than = (double)number_residues * (1-percent)/2;
237
238   i=minplace;
239   less_than=0;
240   while ( (double)less_than < number_want_less_than) {
241     less_than += counts[i];
242     i++;
243   }
244
245   box=i-1;
246   left_side= (double)box/2 - .25;
247   right_side= (double)box/2 +.25;  
248   greater_than = number_residues - less_than;
249   less_than -= counts[box];
250
251   number_short= number_want_less_than - (double) less_than;
252 /*** We want to find the "number_short"th score in box.***/  
253   f= (double) number_short/counts[box];
254
255 /*******OPTION 1*/
256   score_estimate=  left_side +
257                  f* (right_side -left_side);
258 /*********/
259
260 /********OPTION 2: quadratic through (0,left_side) (1,right) (1/2,mean)**
261   score_estimate = f*f*(2*(right_side+left_side) - 4*means[box])
262                  + f*(4*means[box] - 3*left_side-right_side)
263                    + left_side;
264 ***************/
265
266   return(score_estimate);
267 }
268
269
270
271
272 /***** half_box_mean compute the mean of the input file by computing     **/
273 /***** the mean of all points in boxes that have count at least half     **/
274 /***** of the max count                                                  **/
275
276
277 double half_box_mean(int minplace, int maxplace, long *counts, double *means)
278 {
279   int max_count=0, first_half_box, last_half_box, half_count=0, i;
280   double new_gauss_mean=0;
281
282   for (i=minplace; i<=maxplace; ++i)
283     if (counts[i]>max_count) max_count=counts[i];
284   for (first_half_box=minplace; counts[first_half_box] < (double)max_count/2;
285              ++first_half_box);
286   for (last_half_box=maxplace; counts[last_half_box] < (double)max_count/2;
287              --last_half_box);
288
289   for (i=first_half_box; i<=last_half_box; i++) {
290     new_gauss_mean += means[i]* (double)counts[i];
291     half_count += counts[i];
292     }
293   new_gauss_mean /= half_count;
294   
295   fprintf(stderr, "\nGaussian mean based on half_max boxes = %lf\n",
296               new_gauss_mean);
297
298   return(new_gauss_mean);
299 }
300
301