/* Ethan Wolf 1995 */ #include #include #include "methods.h" /*** Note that to get the gaussian on the same scale as the histogram of **/ /*** residue counts, we need to take counts[f][i]/number_residues_in_hist. **/ double gauss_value(double x, double mean, double sd, double down) { double gauss_x; /*** Note: 2*pi=2.506... **/ gauss_x = down * exp( - (x-mean)/sd*(x-mean)/sd /2 ) / (sd*2.5066282746310002); return(gauss_x); } /** The following approximates the gaussian by the line from gauss1 to */ /** gauss2. Let y= hist(x). y1=min{gauss1,gauss2}, y2=max{gauss1,gauss2} */ /** Case1: y>y2 Then area of hist over gauss is: y-y2 + .5*(y2-y1) Case2: y=y2) return( (hist_height-y2 + .5*(y2-y1))/hist_height); else if (hist_height<=y1) return(0); else { over_gauss = .5* ( (hist_height-y1)*(hist_height-y1)/(y2-y1) - - (y2- hist_height)*(y2- hist_height)/(y2-y1)); if (over_gauss>0) return(over_gauss/hist_height); else return(0); } } /*** The following considers the area under the gauss to be gauss(x) so the height of the gaussian at x (x could be the midpoint of the box, or the mean of the scores in the box, etc. ****/ double like_gauss_by_box(double x, double mean, double sd, double down, double hist_height) { double gauss; gauss = gauss_value(x,mean,sd,down); if (hist_height >= gauss) return( (hist_height - gauss)/hist_height); else return(0); } /*** The following function takes two gaussians (pdb- and the positives) **/ /*** and a parameter (estimate of the ratio of negatives to positives in **/ /*** all proteins) and computes likelihood(x) as: gauss_pos(x)/[gauss_pos(x) + ratio*gauss_neg(x)] **/ /* This is the method that STOCK used for likelihoods. **/ double stock_like(double ratio, double x, double mean_neg, double sd_neg, double down_neg, double mean_pos, double sd_pos, double down_pos) { double gauss_neg, gauss_pos; gauss_neg= gauss_value(x,mean_neg,sd_neg,down_neg); gauss_pos= gauss_value(x,mean_pos,sd_pos,down_neg); return(gauss_pos/(gauss_pos + ratio*gauss_neg)); } /**** The following function computes likelihood(x) as: fraction of residues that score AT LEAST x that are "positives" i.e area_pos_score_at_least_x/area_total_score_at_least_x Use total_area_above_gauss() and total_area() to compute initial values for area_pos_score_at_least_x and area_all_score_at_least_x. ****/ double ratio_PosToNeg_at_least_x(double x, double mean, double sd, double down, double *area_pos_score_at_least_x, double *area_all_score_at_least_x, double hist_height) { double gauss, like; like = (*area_pos_score_at_least_x)/(*area_all_score_at_least_x); gauss = gauss_value(x,mean,sd,down); if (hist_height > gauss) *area_pos_score_at_least_x -= hist_height -gauss; *area_all_score_at_least_x -= hist_height; return(like); } /*** This function is not so much a likelihood, as the fraction of "positives" (things above the gaussian) that score x or less. ***/ /*** To compute this we need to first run through and compute the total **/ /*** area above the gaussian. We also keep a running total of the area **/ /*** above the gaussian up to x. **/ double fraction_of_positives_lower_than_x(double x, double mean, double sd, double down, double *area_to_x, double total_area, double hist_height) { double gauss; gauss = gauss_value(x,mean,sd,down); if (hist_height > gauss) *area_to_x += hist_height - gauss; return(*area_to_x/total_area); } /*** hist_height[i] corresponds to the x value i/2 ***/ double total_area_above_gauss(double min_x, double max_x, long *counts, double mean, double sd, double down, long number_residues) { double hist_height; double gauss, area_above=0; int i; for(i=2*min_x; i<= 2*max_x; i++) { gauss = gauss_value((double)i/2,mean,sd,down); /** Need to convert counts to be on same scale as gauss. **/ hist_height = 2*(double)counts[i]/number_residues; if (hist_height > gauss) area_above += hist_height-gauss; } return(area_above); } double total_area(double min_x, double max_x, long *counts, long number_residues) { double hist_height; double area=0; int i; for(i=2*min_x; i<= 2*max_x; i++) { /** Need to convert counts to be on same scale as gauss. **/ hist_height = 2*(double)counts[i]/number_residues; area += hist_height; } return(area); }