/* Ethan Wolf 1995 */

#include <stdio.h>
#include <math.h>
#include "methods.h"


/*** Note that to get the gaussian on the same scale as the histogram of   **/
/*** residue counts, we need to take counts[f][i]/number_residues_in_hist. **/
double gauss_value(double x, double mean, double sd, double down)
{
  double gauss_x;

   /*** Note: 2*pi=2.506... **/

  gauss_x = down * exp( - (x-mean)/sd*(x-mean)/sd /2 ) / 
                  (sd*2.5066282746310002);
  return(gauss_x);
}

/** The following approximates the gaussian by the line from gauss1 to */
/** gauss2.  Let y= hist(x). y1=min{gauss1,gauss2}, y2=max{gauss1,gauss2}    */
/** Case1:
       y>y2
           Then area of hist over gauss is:
	   y-y2 + .5*(y2-y1)
    Case2:
       y<y1
           THen area of hist over gauss is 0.
    Case3:  
       y1<y<y2
           Then fraction width fraction (y-y1)/(y2-y1) is over  gauss.
             so that contributes  .5 * (y-y1)/(y2-y1) *  (y-y1)  to area.
**********************/


double like_gauss_by_line(double x, double mean, double sd, double down, 
		     double hist_height)
{
  double gauss1, gauss2, y1, y2;
  double over_gauss;

  /*** Just put this in on Jan 15, 1996 cuz I don't think we want to use */
  /*** the down value, since the histogram was just fit to the left */
  /*** side of scores below the mean.  *************/

/*  down=1; */
 /*******************/  

  gauss1 = gauss_value(x-.25,mean,sd,down);
  gauss2= gauss_value(x+.25,mean,sd,down);

  if (gauss1<gauss2) {
    y1=gauss1;  y2=gauss2; }
  else {
    y2=gauss1; y1=gauss2;  }
  
  if (hist_height>=y2)
    return( (hist_height-y2 + .5*(y2-y1))/hist_height);
  else if (hist_height<=y1)
    return(0);
  else {
    over_gauss = .5* ( (hist_height-y1)*(hist_height-y1)/(y2-y1)  -
		      - (y2- hist_height)*(y2- hist_height)/(y2-y1));
    if (over_gauss>0)
      return(over_gauss/hist_height);
    else
      return(0);
  }
}


/*** The following considers the area under the gauss to be
     gauss(x) so the height of the gaussian at x (x could be
     the midpoint of the box, or the mean of the scores
     in the box, etc.              
****/

double like_gauss_by_box(double x, double mean, double sd, double down,
			 double hist_height)
{
  double gauss;
  

  gauss = gauss_value(x,mean,sd,down);
  if (hist_height >= gauss) 
    return( (hist_height - gauss)/hist_height);
  else
    return(0);
}


/*** The following function takes two gaussians (pdb- and the positives) **/
/*** and a parameter (estimate of the ratio of negatives to positives in **/
/*** all proteins) and computes likelihood(x) as:
          gauss_pos(x)/[gauss_pos(x) + ratio*gauss_neg(x)]
**/
/*  This is the method that STOCK used for likelihoods.                 **/

double stock_like(double ratio, double x, double mean_neg, double sd_neg, 
	     double down_neg, double mean_pos, double sd_pos, double down_pos)
{
  double gauss_neg, gauss_pos;

  gauss_neg=  gauss_value(x,mean_neg,sd_neg,down_neg);
  gauss_pos=  gauss_value(x,mean_pos,sd_pos,down_neg);

  return(gauss_pos/(gauss_pos + ratio*gauss_neg));
}


/**** The following function computes likelihood(x) as:
  fraction of residues that score AT LEAST x that are "positives"
 i.e area_pos_score_at_least_x/area_total_score_at_least_x

 Use total_area_above_gauss() and total_area() to compute initial
 values for area_pos_score_at_least_x and area_all_score_at_least_x.
****/

double ratio_PosToNeg_at_least_x(double x, double mean, double sd, double down,
			       double *area_pos_score_at_least_x,
			       double *area_all_score_at_least_x,
			       double hist_height)
{
  double gauss, like;
  
  like = (*area_pos_score_at_least_x)/(*area_all_score_at_least_x);

  gauss = gauss_value(x,mean,sd,down);
  if (hist_height > gauss) 
    *area_pos_score_at_least_x -= hist_height -gauss;

  *area_all_score_at_least_x -= hist_height;

  return(like);
}


/*** This function is not so much a likelihood, as the fraction of
     "positives" (things above the gaussian) that score x or less. ***/
/***  To compute this we need to first run through and compute the total **/
/***  area above the gaussian.  We also keep a running total of the area **/
/***  above the gaussian up to x.   **/

double fraction_of_positives_lower_than_x(double x, double mean, double sd,
		  double down, double *area_to_x, double total_area, 
		  double hist_height)
{
  double gauss;

  gauss = gauss_value(x,mean,sd,down);
  if (hist_height > gauss) 
    *area_to_x += hist_height - gauss;
  
  return(*area_to_x/total_area);
}


/*** hist_height[i] corresponds to the x value i/2 ***/
double total_area_above_gauss(double min_x, double max_x, long *counts,
			      double mean, double sd, double down,
			      long number_residues)
{
  double hist_height;
  double gauss, area_above=0;
  int i;

  for(i=2*min_x; i<= 2*max_x; i++) {
    gauss = gauss_value((double)i/2,mean,sd,down);
    /** Need to convert counts to be on same scale as gauss.   **/
    hist_height = 2*(double)counts[i]/number_residues;
    
    if (hist_height > gauss)
      area_above += hist_height-gauss;
  }
  
  return(area_above);
}

double total_area(double min_x, double max_x, long *counts, 
		  long number_residues)
{
  double hist_height;
  double area=0;
  int i;

  for(i=2*min_x; i<= 2*max_x; i++) {
    /** Need to convert counts to be on same scale as gauss.   **/
    hist_height = 2*(double)counts[i]/number_residues;
    area += hist_height;
  }
  return(area);
}