website/archive/binaries/mac/src/clustalo/src/squid/sre_math.c

   1 /*****************************************************************
   2  * SQUID - a library of functions for biological sequence analysis
   3  * Copyright (C) 1992-2002 Washington University School of Medicine
   4  *
   5  *     This source code is freely distributed under the terms of the
   6  *     GNU General Public License. See the files COPYRIGHT and LICENSE
   7  *     for details.
   8  *****************************************************************/
   9
  10 /* sre_math.c
  11  *
  12  * Portability for and extensions to C math library.
  13  * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp)
  14  */
  15
  16 #include <stdio.h>
  17 #include <stdlib.h>
  18 #include <math.h>
  19 #include "squid.h"
  20
  21
  22 /* Function: Linefit()
  23  *
  24  * Purpose:  Given points x[0..N-1] and y[0..N-1], fit to
  25  *           a straight line y = a + bx.
  26  *           a, b, and the linear correlation coefficient r
  27  *           are filled in for return.
  28  *
  29  * Args:     x     - x values of data
  30  *           y     - y values of data
  31  *           N     - number of data points
  32  *           ret_a - RETURN: intercept
  33  *           ret_b - RETURN: slope
  34  *           ret_r - RETURN: correlation coefficient
  35  *
  36  * Return:   1 on success, 0 on failure.
  37  */
  38 int
  39 Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r)
  40 {
  41   float xavg, yavg;
  42   float sxx, syy, sxy;
  43   int   i;
  44
  45   /* Calculate averages, xavg and yavg
  46    */
  47   xavg = yavg = 0.0;
  48   for (i = 0; i < N; i++)
  49     {
  50       xavg += x[i];
  51       yavg += y[i];
  52     }
  53   xavg /= (float) N;
  54   yavg /= (float) N;
  55
  56   sxx = syy = sxy = 0.0;
  57   for (i = 0; i < N; i++)
  58     {
  59       sxx    += (x[i] - xavg) * (x[i] - xavg);
  60       syy    += (y[i] - yavg) * (y[i] - xavg);
  61       sxy    += (x[i] - xavg) * (y[i] - yavg);
  62     }
  63   *ret_b = sxy / sxx;
  64   *ret_a = yavg - xavg*(*ret_b);
  65   *ret_r = sxy / (sqrt(sxx) * sqrt(syy));
  66   return 1;
  67 }
  68
  69
  70 /* Function: WeightedLinefit()
  71  *
  72  * Purpose:  Given points x[0..N-1] and y[0..N-1] with
  73  *           variances (measurement errors) var[0..N-1],
  74  *           fit to a straight line y = mx + b.
  75  *
  76  * Method:   Algorithm from Numerical Recipes in C, [Press88].
  77  *
  78  * Return:   (void)
  79  *           ret_m contains slope; ret_b contains intercept
  80  */
  81 void
  82 WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b)
  83 {
  84   int    i;
  85   double s;
  86   double sx, sy;
  87   double sxx, sxy;
  88   double delta;
  89   double m, b;
  90
  91   s = sx = sy = sxx = sxy = 0.;
  92   for (i = 0; i < N; i++)
  93     {
  94       s   += 1./var[i];
  95       sx  += x[i] / var[i];
  96       sy  += y[i] / var[i];
  97       sxx += x[i] * x[i] / var[i];
  98       sxy += x[i] * y[i] / var[i];
  99     }
 100
 101   delta = s * sxx - (sx * sx);
 102   b = (sxx * sy - sx * sxy) / delta;
 103   m = (s * sxy - sx * sy) / delta;
 104
 105   *ret_m = m;
 106   *ret_b = b;
 107 }
 108
 109
 110 /* Function: Gammln()
 111  *
 112  * Returns the natural log of the gamma function of x.
 113  * x is > 0.0.
 114  *
 115  * Adapted from a public domain implementation in the
 116  * NCBI core math library. Thanks to John Spouge and
 117  * the NCBI. (According to the NCBI, that's Dr. John
 118  * "Gammas Galore" Spouge to you, pal.)
 119  */
 120 double
 121 Gammln(double x)
 122 {
 123   int i;
 124   double xx, tx;
 125   double tmp, value;
 126   static double cof[11] = {
 127     4.694580336184385e+04,
 128     -1.560605207784446e+05,
 129     2.065049568014106e+05,
 130     -1.388934775095388e+05,
 131     5.031796415085709e+04,
 132     -9.601592329182778e+03,
 133     8.785855930895250e+02,
 134     -3.155153906098611e+01,
 135     2.908143421162229e-01,
 136     -2.319827630494973e-04,
 137     1.251639670050933e-10
 138   };
 139
 140   /* Protect against x=0. We see this in Dirichlet code,
 141    * for terms alpha = 0. This is a severe hack but it is effective
 142    * and (we think?) safe. (due to GJM)
 143    */
 144   if (x <= 0.0) return 999999.;
 145
 146   xx       = x - 1.0;
 147   tx = tmp = xx + 11.0;
 148   value    = 1.0;
 149   for (i = 10; i >= 0; i--)     /* sum least significant terms first */
 150     {
 151       value += cof[i] / tmp;
 152       tmp   -= 1.0;
 153     }
 154   value  = log(value);
 155   tx    += 0.5;
 156   value += 0.918938533 + (xx+0.5)*log(tx) - tx;
 157   return value;
 158 }
 159
 160
 161 /* 2D matrix operations
 162  */
 163 float **
 164 FMX2Alloc(int rows, int cols)
 165 {
 166   float **mx;
 167   int     r;
 168
 169   mx    = (float **) MallocOrDie(sizeof(float *) * rows);
 170   mx[0] = (float *)  MallocOrDie(sizeof(float) * rows * cols);
 171   for (r = 1; r < rows; r++)
 172     mx[r] = mx[0] + r*cols;
 173   return mx;
 174 }
 175 void
 176 FMX2Free(float **mx)
 177 {
 178   free(mx[0]);
 179   free(mx);
 180 }
 181 double **
 182 DMX2Alloc(int rows, int cols)
 183 {
 184   double **mx;
 185   int      r;
 186
 187   mx    = (double **) MallocOrDie(sizeof(double *) * rows);
 188   mx[0] = (double *)  MallocOrDie(sizeof(double) * rows * cols);
 189   for (r = 1; r < rows; r++)
 190     mx[r] = mx[0] + r*cols;
 191   return mx;
 192 }
 193 void
 194 DMX2Free(double **mx)
 195 {
 196   free(mx[0]);
 197   free(mx);
 198 }
 199 /* Function: FMX2Multiply()
 200  *
 201  * Purpose:  Matrix multiplication.
 202  *           Multiply an m x p matrix A by a p x n matrix B,
 203  *           giving an m x n matrix C.
 204  *           Matrix C must be a preallocated matrix of the right
 205  *           size.
 206  */
 207 void
 208 FMX2Multiply(float **A, float **B, float **C, int m, int p, int n)
 209 {
 210   int i, j, k;
 211
 212   for (i = 0; i < m; i++)
 213     for (j = 0; j < n; j++)
 214       {
 215         C[i][j] = 0.;
 216         for (k = 0; k < p; k++)
 217           C[i][j] += A[i][p] * B[p][j];
 218       }
 219 }
 220
 221
 222 /* Function: IncompleteGamma()
 223  *
 224  * Purpose:  Returns 1 - P(a,x) where:
 225  *           P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt
 226  *                  = \frac{\gamma(a,x)}{\Gamma(a)}
 227  *                  = 1 - \frac{\Gamma(a,x)}{\Gamma(a)}
 228  *
 229  *           Used in a chi-squared test: for a X^2 statistic x
 230  *           with v degrees of freedom, call:
 231  *                  p = IncompleteGamma(v/2., x/2.)
 232  *           to get the probability p that a chi-squared value
 233  *           greater than x could be obtained by chance even for
 234  *           a correct model. (i.e. p should be large, say
 235  *           0.95 or more).
 236  *
 237  * Method:   Based on ideas from Numerical Recipes in C, Press et al.,
 238  *           Cambridge University Press, 1988.
 239  *
 240  * Args:     a  - for instance, degrees of freedom / 2     [a > 0]
 241  *           x  - for instance, chi-squared statistic / 2  [x >= 0]
 242  *
 243  * Return:   1 - P(a,x).
 244  */
 245 double
 246 IncompleteGamma(double a, double x)
 247 {
 248   int iter;                     /* iteration counter */
 249
 250   if (a <= 0.) Die("IncompleteGamma(): a must be > 0");
 251   if (x <  0.) Die("IncompleteGamma(): x must be >= 0");
 252
 253   /* For x > a + 1 the following gives rapid convergence;
 254    * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}:
 255    *     use a continued fraction development for \Gamma(a,x).
 256    */
 257   if (x > a+1)
 258     {
 259       double oldp;              /* previous value of p    */
 260       double nu0, nu1;          /* numerators for continued fraction calc   */
 261       double de0, de1;          /* denominators for continued fraction calc */
 262
 263       nu0 = 0.;                 /* A_0 = 0       */
 264       de0 = 1.;                 /* B_0 = 1       */
 265       nu1 = 1.;                 /* A_1 = 1       */
 266       de1 = x;                  /* B_1 = x       */
 267
 268       oldp = nu1;
 269       for (iter = 1; iter < 100; iter++)
 270         {
 271           /* Continued fraction development:
 272            * set A_j = b_j A_j-1 + a_j A_j-2
 273            *     B_j = b_j B_j-1 + a_j B_j-2
 274            * We start with A_2, B_2.
 275            */
 276                                 /* j = even: a_j = iter-a, b_j = 1 */
 277                                 /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
 278           nu0 = nu1 + ((double)iter - a) * nu0;
 279           de0 = de1 + ((double)iter - a) * de0;
 280
 281                                 /* j = odd: a_j = iter, b_j = x */
 282                                 /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
 283           nu1 = x * nu0 + (double) iter * nu1;
 284           de1 = x * de0 + (double) iter * de1;
 285
 286                                 /* rescale */
 287           if (de1 != 0.)
 288             {
 289               nu0 /= de1;
 290               de0 /= de1;
 291               nu1 /= de1;
 292               de1 =  1.;
 293             }
 294                                 /* check for convergence */
 295           if (fabs((nu1-oldp)/nu1) < 1.e-7)
 296             return nu1 * exp(a * log(x) - x - Gammln(a));
 297
 298           oldp = nu1;
 299         }
 300       Die("IncompleteGamma(): failed to converge using continued fraction approx");
 301     }
 302   else /* x <= a+1 */
 303     {
 304       double p;                 /* current sum               */
 305       double val;               /* current value used in sum */
 306
 307       /* For x <= a+1 we use a convergent series instead:
 308        *   P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
 309        * where
 310        *   \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
 311        * which looks appalling but the sum is in fact rearrangeable to
 312        * a simple series without the \Gamma functions:
 313        *   = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
 314        * and it's obvious that this should converge nicely for x <= a+1.
 315        */
 316
 317       p = val = 1. / a;
 318       for (iter = 1; iter < 10000; iter++)
 319         {
 320           val *= x / (a+(double)iter);
 321           p   += val;
 322
 323           if (fabs(val/p) < 1.e-7)
 324             return 1. - p * exp(a * log(x) - x - Gammln(a));
 325         }
 326       Die("IncompleteGamma(): failed to converge using series approx");
 327     }
 328   /*NOTREACHED*/
 329   return 0.;
 330 }
 331