6 #include "likelihood.h"
8 #include "postscript.h"
9 #include "scatter_ps.h"
10 #include "compute_like.h"
13 /* ax+b is the likelihood line approximation computed in likelihood region */
14 /* from low % to hight % */
15 void likelihood(long n[2], int mode, int filenum, double ratio, int minplace,
16 int maxplace, double *means[2], long *counts[2],
17 double gauss_mean[2], double sd[2], double down[2],
18 FILE *fout, FILE *foutps, FILE *scatter, FILE *scores,
19 double a[2], double b[2],
20 double low_percent, double high_percent, double error_allowed)
23 double x, hist_height;
24 double _likelihood[2][201-2*MINSCORE];
25 double *likelihood[2];
26 double area1=0, area2=0;
28 int low[2]={0,0}; /* How many consectutive low % found. */
29 int high[2]={0,0}; /* How many consecutive high % found. */
30 int low_box[2], high_box[2]; /* These hold the first */
31 /* and last boxes that are low, high % likely, where these */
32 /* determined by finding the first box above the gaussian mean such */
33 /* that it has likelihood above low percent and the next two boxes */
34 /* are also above low-error percent. Similarly, high% is the first */
35 /* following box such that it has likelihood < %high and the two */
36 /* following have likelihood at least high-error */
40 likelihood[0] = &(_likelihood[0][-2*MINSCORE]);
41 likelihood[1] = &(_likelihood[1][-2*MINSCORE]);
44 if (mode & STOCK_METHOD) {
45 for(i=minplace; i<=maxplace; i++)
46 likelihood[0][i]= stock_like(ratio, (double)i/2, gauss_mean[0],sd[0],
47 down[0],gauss_mean[1], sd[1], down[1]);
48 filenum=1; /*** For printout purposes. ***/
52 for(f=0; f<filenum; f++) {
54 if (mode & RATIO_BOXES_GT_METHOD) {
55 area1= total_area_above_gauss(minplace, maxplace, counts[f],
56 gauss_mean[f],sd[f],down[f], n[f]);
57 area2= total_area(minplace,maxplace, counts[f], n[f]);
59 if (mode & FRACTION_POS_METHOD) {
60 area1=total_area_above_gauss(minplace, maxplace, counts[f],
61 gauss_mean[f],sd[f],down[f], n[f]);
66 for(i=minplace; i<=maxplace; i++) {
67 if (mode & MEAN_MODE) x= means[f][i];
70 /***** We need to convert counts of residue scores into heights that **/
71 /***** are compatable with the gaussian (where area is assumed to be 1). **/
72 /***** Hence, since each box is .5 wide, we take 2*count/n. **/
73 hist_height= 2*(double)counts[f][i]/n[f];
75 if (mode & RATIO_BOX_METHOD)
76 likelihood[f][i]= like_gauss_by_box(x, gauss_mean[f],sd[f],
77 down[f], hist_height);
78 if (mode & RATIO_TRAP_METHOD)
79 likelihood[f][i]= like_gauss_by_line(x, gauss_mean[f],sd[f],
80 down[f], hist_height);
81 if (mode & RATIO_BOXES_GT_METHOD)
82 likelihood[f][i]=ratio_PosToNeg_at_least_x(x, gauss_mean[f],
83 sd[f],down[f], &area1, &area2, hist_height);
84 if (mode & FRACTION_POS_METHOD)
85 likelihood[f][i]=fraction_of_positives_lower_than_x(x, gauss_mean[f],
86 sd[f], down[f], &area2,area1,hist_height);
89 if ((likelihood[f][i] > low_percent - error) &&
90 ( (double)i/2 > gauss_mean[f]) ) { /* Guessing low percentage
91 is higher than mean. */
93 error= error_allowed; /* For 2nd and 3d boxes.*/
95 low_box[f]=i-2; /* First of 3 consecutive low-error boxes */
96 error=0; /* Now find first high percentage.*/
101 error=0; /* Back to finding first low box. */
105 if (likelihood[f][i] >= high_percent - error) {
109 high_box[f]=i-3; /* Last box < high_percent was three before.*/
113 error=0; /* Back to finding first high box. */
117 if (high[f] < 3) /* didn't find a high_box, so make it last box. */
118 high_box[f]=maxplace;
120 if (low[f]< 3) /* didn't find a low_box so make it the mean. */
121 low_box[f] = (int)(2*gauss_mean[f]);
125 fprintf(fout, "For txt file %d:", f);
126 least_sq_fit(&a[f], &b[f],(double)low_box[f]/2,
127 (double)high_box[f]/2, means[f],likelihood[f],fout);
130 for(f=0;f<filenum; f++) {
131 fprintf(fout, "\n\nFor txt file %d:\n", f);
132 for(i=minplace; i<=maxplace; i++)
133 if (mode & MEAN_MODE)
134 fprintf(fout,"\nBox %lf (mean score %lf) has likelihood %lf",
135 (double)i/2 , means[f][i], likelihood[f][i]);
137 fprintf(fout,"\nBox %lf has likelihood %lf",
138 (double)i/2 , likelihood[f][i]);
142 /***Genetate postscript likelihood plot***/
143 post_script(foutps, minplace, maxplace, likelihood[f],
144 (mode & NO_HEADER_MODE), a[f], b[f],
145 (double)low_box[f]/2,(double)high_box[f]/2 );
147 /****Generate Scatter Plot****/
148 if (mode & SCATTER_MODE) {
149 scatter_header(scatter,a[f],b[f],10,90);
150 scatter_points(scatter,scores);
151 scatter_trailer(scatter);
160 /*** Likelihood line is l(x)=ax+b from low_score to high_score. ***/
161 void post_script(FILE *foutps, int minplace, int maxplace, double *likelihood,
162 int no_header, double a, double b, double low_score,
168 post_script_header(foutps);
171 for (i=minplace; i<=maxplace; i++)
172 fprintf(foutps,"%lf %lf bar\n",(double)i/2 , likelihood[i]);
175 post_script_like_line(foutps, a, b, low_score, high_score);
178 post_script_trailer(foutps);
184 /*** In doing the least square fit to line mx+b we get two equations: */
187 where c1 = # of points
188 c2= /sum x_value of points
189 c3= /sum x^2 for points
190 a1= /sum y_value of points
191 a2= /sum x*y for all the points
194 Solving for m and b gives:
195 b= a2c2 -a1c3/denom and m= a1c2 - a2c1/denom
196 where denom = c2^2 - c1c3
199 void least_sq_fit(double *m, double *b, double leftend, double rightend,
200 double *means, double *likelihood, FILE *fout)
203 double c1=0,c2=0,c3=0,a1=0,a2=0;
205 c1= 2*(rightend - leftend) +1;
206 for(i=2*leftend; i<=2*rightend; i++) {
208 c3 += means[i]*means[i];
210 a2 += likelihood[i]*means[i];
212 *b= (a2*c2 - a1*c3)/ (c2*c2 - c1*c3);
213 *m= (a1*c2 - a2*c1)/ (c2*c2 - c1*c3);
215 fprintf(fout, "\nThe likelihood curve from %.2lf to %.2lf using means of the scores for x values is approximated by the line:\n", leftend, rightend);
216 fprintf(fout, " %.4lf * x + %.4lf\n\n",*m,*b);
221 /***Find an approximation to the score such that there are n1 residues with */
222 /***lower scores and 2*n1/n=(1-percent). i.e. this mean gives a left side */
223 /***which contains exactly half of the expected negatives (given by 1-%). */
224 double percent_mean(long *counts, double *means, double percent,
225 int minplace,int maxplace, long number_residues)
227 int box; /* This box will contain the score we want... */
229 long less_than; /** Number of scores less_than our current box. */
231 double number_want_less_than, number_short, fraction_of_box;
232 double left_side, right_side;
233 double score_estimate;
234 double f; /* The fraction of the box we want. **/
236 number_want_less_than = (double)number_residues * (1-percent)/2;
240 while ( (double)less_than < number_want_less_than) {
241 less_than += counts[i];
246 left_side= (double)box/2 - .25;
247 right_side= (double)box/2 +.25;
248 greater_than = number_residues - less_than;
249 less_than -= counts[box];
251 number_short= number_want_less_than - (double) less_than;
252 /*** We want to find the "number_short"th score in box.***/
253 f= (double) number_short/counts[box];
256 score_estimate= left_side +
257 f* (right_side -left_side);
260 /********OPTION 2: quadratic through (0,left_side) (1,right) (1/2,mean)**
261 score_estimate = f*f*(2*(right_side+left_side) - 4*means[box])
262 + f*(4*means[box] - 3*left_side-right_side)
266 return(score_estimate);
272 /***** half_box_mean compute the mean of the input file by computing **/
273 /***** the mean of all points in boxes that have count at least half **/
274 /***** of the max count **/
277 double half_box_mean(int minplace, int maxplace, long *counts, double *means)
279 int max_count=0, first_half_box, last_half_box, half_count=0, i;
280 double new_gauss_mean=0;
282 for (i=minplace; i<=maxplace; ++i)
283 if (counts[i]>max_count) max_count=counts[i];
284 for (first_half_box=minplace; counts[first_half_box] < (double)max_count/2;
286 for (last_half_box=maxplace; counts[last_half_box] < (double)max_count/2;
289 for (i=first_half_box; i<=last_half_box; i++) {
290 new_gauss_mean += means[i]* (double)counts[i];
291 half_count += counts[i];
293 new_gauss_mean /= half_count;
295 fprintf(stderr, "\nGaussian mean based on half_max boxes = %lf\n",
298 return(new_gauss_mean);