JPRED-2 Add sources of all binaries (except alscript) to Git
[jpred.git] / sources / multicoil / compute_like.c
diff --git a/sources/multicoil/compute_like.c b/sources/multicoil/compute_like.c
new file mode 100644 (file)
index 0000000..8e0853f
--- /dev/null
@@ -0,0 +1,301 @@
+/* Ethan Wolf 1995   */
+
+#include <stdio.h>
+#include <math.h>
+#include "scio.h"
+#include "likelihood.h"
+#include "methods.h"
+#include "postscript.h"
+#include "scatter_ps.h"
+#include "compute_like.h"
+
+
+/* ax+b is the likelihood line approximation computed in likelihood region */
+/* from low % to hight %                                                    */
+void likelihood(long n[2], int mode, int filenum, double ratio, int minplace, 
+               int maxplace, double *means[2], long *counts[2],
+               double gauss_mean[2], double sd[2], double down[2], 
+               FILE *fout, FILE *foutps, FILE *scatter, FILE *scores,
+               double a[2], double b[2], 
+               double low_percent, double high_percent, double error_allowed)
+{
+  int i, f;  
+  double x, hist_height;
+  double  _likelihood[2][201-2*MINSCORE];
+  double  *likelihood[2];
+  double area1=0, area2=0;
+
+  int low[2]={0,0};  /* How many consectutive low % found.  */
+  int high[2]={0,0};  /* How many consecutive high % found. */
+  int low_box[2], high_box[2]; /* These hold the first              */
+       /* and last boxes that are low, high % likely, where these          */
+        /* determined by finding the first box above the gaussian mean such */
+        /* that it has likelihood above low percent and the next two boxes  */
+        /* are also above low-error percent.  Similarly, high% is the first */
+        /* following  box such that it has likelihood < %high and the two   */
+        /* following have likelihood at least high-error */
+  double error=0;
+
+
+  likelihood[0] = &(_likelihood[0][-2*MINSCORE]);
+  likelihood[1] = &(_likelihood[1][-2*MINSCORE]);
+
+
+  if (mode & STOCK_METHOD) {
+    for(i=minplace; i<=maxplace; i++) 
+      likelihood[0][i]= stock_like(ratio, (double)i/2, gauss_mean[0],sd[0],
+                                  down[0],gauss_mean[1], sd[1], down[1]);
+    filenum=1;  /*** For printout purposes.  ***/
+  }
+
+  else
+    for(f=0; f<filenum; f++) {
+      
+      if (mode & RATIO_BOXES_GT_METHOD) {
+       area1= total_area_above_gauss(minplace, maxplace, counts[f], 
+                                     gauss_mean[f],sd[f],down[f], n[f]);
+       area2= total_area(minplace,maxplace, counts[f], n[f]);
+      }
+      if (mode & FRACTION_POS_METHOD) {
+       area1=total_area_above_gauss(minplace, maxplace, counts[f], 
+                                    gauss_mean[f],sd[f],down[f], n[f]);
+       area2=0;
+      }
+      
+
+      for(i=minplace; i<=maxplace; i++) {
+       if (mode & MEAN_MODE) x= means[f][i];
+       else x= (double) i/2;
+
+/***** We need to convert counts of residue scores into heights that     **/
+/***** are compatable with the gaussian (where area is assumed to be 1). **/
+/***** Hence, since each box is .5 wide, we take 2*count/n.              **/
+       hist_height= 2*(double)counts[f][i]/n[f];
+       
+       if (mode & RATIO_BOX_METHOD) 
+         likelihood[f][i]= like_gauss_by_box(x, gauss_mean[f],sd[f],
+                                          down[f], hist_height);
+       if (mode & RATIO_TRAP_METHOD)
+         likelihood[f][i]= like_gauss_by_line(x, gauss_mean[f],sd[f],
+                                           down[f], hist_height);
+       if (mode & RATIO_BOXES_GT_METHOD) 
+         likelihood[f][i]=ratio_PosToNeg_at_least_x(x, gauss_mean[f],
+                     sd[f],down[f], &area1,  &area2, hist_height);
+       if (mode & FRACTION_POS_METHOD) 
+         likelihood[f][i]=fraction_of_positives_lower_than_x(x, gauss_mean[f],
+                     sd[f], down[f], &area2,area1,hist_height);
+
+       if (low[f]<3) { 
+         if ((likelihood[f][i] > low_percent - error) &&
+             ( (double)i/2 > gauss_mean[f]) ) {   /* Guessing low percentage
+                                                     is higher than mean.   */
+           low[f]++;
+           error= error_allowed; /* For 2nd and 3d boxes.*/
+           if (low[f]==3) {
+             low_box[f]=i-2;  /* First of 3 consecutive low-error boxes */
+             error=0;             /* Now find first high percentage.*/
+           }
+         }
+         else {
+           low[f]=0;
+           error=0;             /* Back to finding first low box. */
+         }
+       }
+       else if (high[f]<3)
+         if (likelihood[f][i] >= high_percent - error) {
+           high[f]++;
+           error=error_allowed;
+           if (high[f]==3) 
+             high_box[f]=i-3;  /* Last box < high_percent was three before.*/
+         }
+         else {  
+           high[f]=0;
+           error=0;               /* Back to finding first high box. */
+         }
+      }
+      
+      if (high[f] < 3)  /* didn't find a high_box, so make  it last box. */
+       high_box[f]=maxplace;
+
+      if (low[f]<  3)  /* didn't  find a low_box  so make it the mean. */
+       low_box[f] = (int)(2*gauss_mean[f]);
+
+
+
+      fprintf(fout, "For txt file %d:", f);
+      least_sq_fit(&a[f], &b[f],(double)low_box[f]/2,
+                  (double)high_box[f]/2, means[f],likelihood[f],fout);
+    }
+  
+  for(f=0;f<filenum; f++) {
+    fprintf(fout, "\n\nFor txt file %d:\n", f);
+    for(i=minplace; i<=maxplace; i++) 
+      if (mode & MEAN_MODE)
+       fprintf(fout,"\nBox %lf (mean score %lf) has likelihood %lf",
+               (double)i/2   , means[f][i], likelihood[f][i]);
+      else
+       fprintf(fout,"\nBox %lf has likelihood %lf",
+               (double)i/2   , likelihood[f][i]);
+
+
+
+    /***Genetate postscript likelihood plot***/
+    post_script(foutps, minplace, maxplace, likelihood[f], 
+               (mode & NO_HEADER_MODE), a[f], b[f],
+               (double)low_box[f]/2,(double)high_box[f]/2 ); 
+
+    /****Generate Scatter Plot****/
+    if (mode & SCATTER_MODE) {
+      scatter_header(scatter,a[f],b[f],10,90);
+      scatter_points(scatter,scores);
+      scatter_trailer(scatter);
+    }
+
+  }
+
+
+}
+
+
+/*** Likelihood line is l(x)=ax+b from low_score to high_score.   ***/
+void post_script(FILE *foutps, int minplace, int maxplace, double *likelihood,
+                int no_header, double a, double b, double low_score, 
+                 double high_score)
+{
+  int i;
+
+  if (!no_header)
+    post_script_header(foutps);
+
+
+  for (i=minplace; i<=maxplace; i++)
+    fprintf(foutps,"%lf %lf bar\n",(double)i/2 , likelihood[i]);
+
+  if (!no_header)
+    post_script_like_line(foutps, a, b, low_score, high_score);
+
+  if (!no_header)
+    post_script_trailer(foutps);
+}
+
+
+
+/*** In doing the least square fit to line mx+b we get two equations:   */
+/***    c1m + c2b = a1
+        c2m + c3b = a2
+  where c1 = # of points
+        c2= /sum  x_value of points
+       c3= /sum  x^2 for points
+        a1= /sum y_value of points
+        a2= /sum x*y       for all the points
+*******/
+/******
+     Solving for m and b gives:
+     b= a2c2 -a1c3/denom     and     m= a1c2 - a2c1/denom
+     where denom = c2^2 - c1c3
+*********/
+
+void least_sq_fit(double *m, double *b, double leftend, double rightend,
+                 double *means, double *likelihood, FILE *fout)
+{
+  int i;
+  double c1=0,c2=0,c3=0,a1=0,a2=0;
+
+  c1= 2*(rightend - leftend) +1;
+  for(i=2*leftend; i<=2*rightend; i++) {
+    c2 += means[i];
+    c3 += means[i]*means[i];
+    a1 += likelihood[i];
+    a2 += likelihood[i]*means[i];
+  }
+  *b= (a2*c2 - a1*c3)/ (c2*c2 - c1*c3);
+  *m= (a1*c2 - a2*c1)/ (c2*c2 - c1*c3);
+
+  fprintf(fout, "\nThe likelihood curve from %.2lf to %.2lf using means of the scores for x values is approximated by the line:\n", leftend, rightend);
+  fprintf(fout, "            %.4lf * x + %.4lf\n\n",*m,*b);
+}
+
+
+
+/***Find an approximation to the score such that there are n1 residues with */
+/***lower scores and 2*n1/n=(1-percent).  i.e. this mean gives a left side  */
+/***which contains exactly half of the expected negatives (given by 1-%).   */ 
+double percent_mean(long *counts, double *means, double percent, 
+                   int  minplace,int maxplace, long number_residues)
+{        
+  int box;   /* This box will contain the score we want...  */
+  int i;
+  long less_than;  /** Number of scores less_than our current box. */
+  long greater_than;
+  double number_want_less_than, number_short, fraction_of_box;  
+  double left_side, right_side;
+  double score_estimate;
+  double f;   /* The fraction of the box we want.  **/
+
+  number_want_less_than = (double)number_residues * (1-percent)/2;
+
+  i=minplace;
+  less_than=0;
+  while ( (double)less_than < number_want_less_than) {
+    less_than += counts[i];
+    i++;
+  }
+
+  box=i-1;
+  left_side= (double)box/2 - .25;
+  right_side= (double)box/2 +.25;  
+  greater_than = number_residues - less_than;
+  less_than -= counts[box];
+
+  number_short= number_want_less_than - (double) less_than;
+/*** We want to find the "number_short"th score in box.***/  
+  f= (double) number_short/counts[box];
+
+/*******OPTION 1*/
+  score_estimate=  left_side +
+                 f* (right_side -left_side);
+/*********/
+
+/********OPTION 2: quadratic through (0,left_side) (1,right) (1/2,mean)**
+  score_estimate = f*f*(2*(right_side+left_side) - 4*means[box])
+                 + f*(4*means[box] - 3*left_side-right_side)
+                  + left_side;
+***************/
+
+  return(score_estimate);
+}
+
+
+
+
+/***** half_box_mean compute the mean of the input file by computing     **/
+/***** the mean of all points in boxes that have count at least half     **/
+/***** of the max count                                                  **/
+
+
+double half_box_mean(int minplace, int maxplace, long *counts, double *means)
+{
+  int max_count=0, first_half_box, last_half_box, half_count=0, i;
+  double new_gauss_mean=0;
+
+  for (i=minplace; i<=maxplace; ++i)
+    if (counts[i]>max_count) max_count=counts[i];
+  for (first_half_box=minplace; counts[first_half_box] < (double)max_count/2;
+             ++first_half_box);
+  for (last_half_box=maxplace; counts[last_half_box] < (double)max_count/2;
+             --last_half_box);
+
+  for (i=first_half_box; i<=last_half_box; i++) {
+    new_gauss_mean += means[i]* (double)counts[i];
+    half_count += counts[i];
+    }
+  new_gauss_mean /= half_count;
+  
+  fprintf(stderr, "\nGaussian mean based on half_max boxes = %lf\n",
+             new_gauss_mean);
+
+  return(new_gauss_mean);
+}
+
+