+/* Ethan Wolf 1995 */
+
+#include <stdio.h>
+#include <math.h>
+#include "scio.h"
+#include "likelihood.h"
+#include "methods.h"
+#include "postscript.h"
+#include "scatter_ps.h"
+#include "compute_like.h"
+
+
+/* ax+b is the likelihood line approximation computed in likelihood region */
+/* from low % to hight % */
+void likelihood(long n[2], int mode, int filenum, double ratio, int minplace,
+ int maxplace, double *means[2], long *counts[2],
+ double gauss_mean[2], double sd[2], double down[2],
+ FILE *fout, FILE *foutps, FILE *scatter, FILE *scores,
+ double a[2], double b[2],
+ double low_percent, double high_percent, double error_allowed)
+{
+ int i, f;
+ double x, hist_height;
+ double _likelihood[2][201-2*MINSCORE];
+ double *likelihood[2];
+ double area1=0, area2=0;
+
+ int low[2]={0,0}; /* How many consectutive low % found. */
+ int high[2]={0,0}; /* How many consecutive high % found. */
+ int low_box[2], high_box[2]; /* These hold the first */
+ /* and last boxes that are low, high % likely, where these */
+ /* determined by finding the first box above the gaussian mean such */
+ /* that it has likelihood above low percent and the next two boxes */
+ /* are also above low-error percent. Similarly, high% is the first */
+ /* following box such that it has likelihood < %high and the two */
+ /* following have likelihood at least high-error */
+ double error=0;
+
+
+ likelihood[0] = &(_likelihood[0][-2*MINSCORE]);
+ likelihood[1] = &(_likelihood[1][-2*MINSCORE]);
+
+
+ if (mode & STOCK_METHOD) {
+ for(i=minplace; i<=maxplace; i++)
+ likelihood[0][i]= stock_like(ratio, (double)i/2, gauss_mean[0],sd[0],
+ down[0],gauss_mean[1], sd[1], down[1]);
+ filenum=1; /*** For printout purposes. ***/
+ }
+
+ else
+ for(f=0; f<filenum; f++) {
+
+ if (mode & RATIO_BOXES_GT_METHOD) {
+ area1= total_area_above_gauss(minplace, maxplace, counts[f],
+ gauss_mean[f],sd[f],down[f], n[f]);
+ area2= total_area(minplace,maxplace, counts[f], n[f]);
+ }
+ if (mode & FRACTION_POS_METHOD) {
+ area1=total_area_above_gauss(minplace, maxplace, counts[f],
+ gauss_mean[f],sd[f],down[f], n[f]);
+ area2=0;
+ }
+
+
+ for(i=minplace; i<=maxplace; i++) {
+ if (mode & MEAN_MODE) x= means[f][i];
+ else x= (double) i/2;
+
+/***** We need to convert counts of residue scores into heights that **/
+/***** are compatable with the gaussian (where area is assumed to be 1). **/
+/***** Hence, since each box is .5 wide, we take 2*count/n. **/
+ hist_height= 2*(double)counts[f][i]/n[f];
+
+ if (mode & RATIO_BOX_METHOD)
+ likelihood[f][i]= like_gauss_by_box(x, gauss_mean[f],sd[f],
+ down[f], hist_height);
+ if (mode & RATIO_TRAP_METHOD)
+ likelihood[f][i]= like_gauss_by_line(x, gauss_mean[f],sd[f],
+ down[f], hist_height);
+ if (mode & RATIO_BOXES_GT_METHOD)
+ likelihood[f][i]=ratio_PosToNeg_at_least_x(x, gauss_mean[f],
+ sd[f],down[f], &area1, &area2, hist_height);
+ if (mode & FRACTION_POS_METHOD)
+ likelihood[f][i]=fraction_of_positives_lower_than_x(x, gauss_mean[f],
+ sd[f], down[f], &area2,area1,hist_height);
+
+ if (low[f]<3) {
+ if ((likelihood[f][i] > low_percent - error) &&
+ ( (double)i/2 > gauss_mean[f]) ) { /* Guessing low percentage
+ is higher than mean. */
+ low[f]++;
+ error= error_allowed; /* For 2nd and 3d boxes.*/
+ if (low[f]==3) {
+ low_box[f]=i-2; /* First of 3 consecutive low-error boxes */
+ error=0; /* Now find first high percentage.*/
+ }
+ }
+ else {
+ low[f]=0;
+ error=0; /* Back to finding first low box. */
+ }
+ }
+ else if (high[f]<3)
+ if (likelihood[f][i] >= high_percent - error) {
+ high[f]++;
+ error=error_allowed;
+ if (high[f]==3)
+ high_box[f]=i-3; /* Last box < high_percent was three before.*/
+ }
+ else {
+ high[f]=0;
+ error=0; /* Back to finding first high box. */
+ }
+ }
+
+ if (high[f] < 3) /* didn't find a high_box, so make it last box. */
+ high_box[f]=maxplace;
+
+ if (low[f]< 3) /* didn't find a low_box so make it the mean. */
+ low_box[f] = (int)(2*gauss_mean[f]);
+
+
+
+ fprintf(fout, "For txt file %d:", f);
+ least_sq_fit(&a[f], &b[f],(double)low_box[f]/2,
+ (double)high_box[f]/2, means[f],likelihood[f],fout);
+ }
+
+ for(f=0;f<filenum; f++) {
+ fprintf(fout, "\n\nFor txt file %d:\n", f);
+ for(i=minplace; i<=maxplace; i++)
+ if (mode & MEAN_MODE)
+ fprintf(fout,"\nBox %lf (mean score %lf) has likelihood %lf",
+ (double)i/2 , means[f][i], likelihood[f][i]);
+ else
+ fprintf(fout,"\nBox %lf has likelihood %lf",
+ (double)i/2 , likelihood[f][i]);
+
+
+
+ /***Genetate postscript likelihood plot***/
+ post_script(foutps, minplace, maxplace, likelihood[f],
+ (mode & NO_HEADER_MODE), a[f], b[f],
+ (double)low_box[f]/2,(double)high_box[f]/2 );
+
+ /****Generate Scatter Plot****/
+ if (mode & SCATTER_MODE) {
+ scatter_header(scatter,a[f],b[f],10,90);
+ scatter_points(scatter,scores);
+ scatter_trailer(scatter);
+ }
+
+ }
+
+
+}
+
+
+/*** Likelihood line is l(x)=ax+b from low_score to high_score. ***/
+void post_script(FILE *foutps, int minplace, int maxplace, double *likelihood,
+ int no_header, double a, double b, double low_score,
+ double high_score)
+{
+ int i;
+
+ if (!no_header)
+ post_script_header(foutps);
+
+
+ for (i=minplace; i<=maxplace; i++)
+ fprintf(foutps,"%lf %lf bar\n",(double)i/2 , likelihood[i]);
+
+ if (!no_header)
+ post_script_like_line(foutps, a, b, low_score, high_score);
+
+ if (!no_header)
+ post_script_trailer(foutps);
+
+}
+
+
+
+/*** In doing the least square fit to line mx+b we get two equations: */
+/*** c1m + c2b = a1
+ c2m + c3b = a2
+ where c1 = # of points
+ c2= /sum x_value of points
+ c3= /sum x^2 for points
+ a1= /sum y_value of points
+ a2= /sum x*y for all the points
+*******/
+/******
+ Solving for m and b gives:
+ b= a2c2 -a1c3/denom and m= a1c2 - a2c1/denom
+ where denom = c2^2 - c1c3
+*********/
+
+void least_sq_fit(double *m, double *b, double leftend, double rightend,
+ double *means, double *likelihood, FILE *fout)
+{
+ int i;
+ double c1=0,c2=0,c3=0,a1=0,a2=0;
+
+ c1= 2*(rightend - leftend) +1;
+ for(i=2*leftend; i<=2*rightend; i++) {
+ c2 += means[i];
+ c3 += means[i]*means[i];
+ a1 += likelihood[i];
+ a2 += likelihood[i]*means[i];
+ }
+ *b= (a2*c2 - a1*c3)/ (c2*c2 - c1*c3);
+ *m= (a1*c2 - a2*c1)/ (c2*c2 - c1*c3);
+
+ fprintf(fout, "\nThe likelihood curve from %.2lf to %.2lf using means of the scores for x values is approximated by the line:\n", leftend, rightend);
+ fprintf(fout, " %.4lf * x + %.4lf\n\n",*m,*b);
+}
+
+
+
+/***Find an approximation to the score such that there are n1 residues with */
+/***lower scores and 2*n1/n=(1-percent). i.e. this mean gives a left side */
+/***which contains exactly half of the expected negatives (given by 1-%). */
+double percent_mean(long *counts, double *means, double percent,
+ int minplace,int maxplace, long number_residues)
+{
+ int box; /* This box will contain the score we want... */
+ int i;
+ long less_than; /** Number of scores less_than our current box. */
+ long greater_than;
+ double number_want_less_than, number_short, fraction_of_box;
+ double left_side, right_side;
+ double score_estimate;
+ double f; /* The fraction of the box we want. **/
+
+ number_want_less_than = (double)number_residues * (1-percent)/2;
+
+ i=minplace;
+ less_than=0;
+ while ( (double)less_than < number_want_less_than) {
+ less_than += counts[i];
+ i++;
+ }
+
+ box=i-1;
+ left_side= (double)box/2 - .25;
+ right_side= (double)box/2 +.25;
+ greater_than = number_residues - less_than;
+ less_than -= counts[box];
+
+ number_short= number_want_less_than - (double) less_than;
+/*** We want to find the "number_short"th score in box.***/
+ f= (double) number_short/counts[box];
+
+/*******OPTION 1*/
+ score_estimate= left_side +
+ f* (right_side -left_side);
+/*********/
+
+/********OPTION 2: quadratic through (0,left_side) (1,right) (1/2,mean)**
+ score_estimate = f*f*(2*(right_side+left_side) - 4*means[box])
+ + f*(4*means[box] - 3*left_side-right_side)
+ + left_side;
+***************/
+
+ return(score_estimate);
+}
+
+
+
+
+/***** half_box_mean compute the mean of the input file by computing **/
+/***** the mean of all points in boxes that have count at least half **/
+/***** of the max count **/
+
+
+double half_box_mean(int minplace, int maxplace, long *counts, double *means)
+{
+ int max_count=0, first_half_box, last_half_box, half_count=0, i;
+ double new_gauss_mean=0;
+
+ for (i=minplace; i<=maxplace; ++i)
+ if (counts[i]>max_count) max_count=counts[i];
+ for (first_half_box=minplace; counts[first_half_box] < (double)max_count/2;
+ ++first_half_box);
+ for (last_half_box=maxplace; counts[last_half_box] < (double)max_count/2;
+ --last_half_box);
+
+ for (i=first_half_box; i<=last_half_box; i++) {
+ new_gauss_mean += means[i]* (double)counts[i];
+ half_count += counts[i];
+ }
+ new_gauss_mean /= half_count;
+
+ fprintf(stderr, "\nGaussian mean based on half_max boxes = %lf\n",
+ new_gauss_mean);
+
+ return(new_gauss_mean);
+}
+
+