1 /*****************************************************************
2 * SQUID - a library of functions for biological sequence analysis
3 * Copyright (C) 1992-2002 Washington University School of Medicine
5 * This source code is freely distributed under the terms of the
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
8 *****************************************************************/
12 * Portability for and extensions to C math library.
13 * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp)
22 /* Function: Linefit()
24 * Purpose: Given points x[0..N-1] and y[0..N-1], fit to
25 * a straight line y = a + bx.
26 * a, b, and the linear correlation coefficient r
27 * are filled in for return.
29 * Args: x - x values of data
30 * y - y values of data
31 * N - number of data points
32 * ret_a - RETURN: intercept
33 * ret_b - RETURN: slope
34 * ret_r - RETURN: correlation coefficient
36 * Return: 1 on success, 0 on failure.
39 Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r)
45 /* Calculate averages, xavg and yavg
48 for (i = 0; i < N; i++)
56 sxx = syy = sxy = 0.0;
57 for (i = 0; i < N; i++)
59 sxx += (x[i] - xavg) * (x[i] - xavg);
60 syy += (y[i] - yavg) * (y[i] - xavg);
61 sxy += (x[i] - xavg) * (y[i] - yavg);
64 *ret_a = yavg - xavg*(*ret_b);
65 *ret_r = sxy / (sqrt(sxx) * sqrt(syy));
70 /* Function: WeightedLinefit()
72 * Purpose: Given points x[0..N-1] and y[0..N-1] with
73 * variances (measurement errors) var[0..N-1],
74 * fit to a straight line y = mx + b.
76 * Method: Algorithm from Numerical Recipes in C, [Press88].
79 * ret_m contains slope; ret_b contains intercept
82 WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b)
91 s = sx = sy = sxx = sxy = 0.;
92 for (i = 0; i < N; i++)
97 sxx += x[i] * x[i] / var[i];
98 sxy += x[i] * y[i] / var[i];
101 delta = s * sxx - (sx * sx);
102 b = (sxx * sy - sx * sxy) / delta;
103 m = (s * sxy - sx * sy) / delta;
110 /* Function: Gammln()
112 * Returns the natural log of the gamma function of x.
115 * Adapted from a public domain implementation in the
116 * NCBI core math library. Thanks to John Spouge and
117 * the NCBI. (According to the NCBI, that's Dr. John
118 * "Gammas Galore" Spouge to you, pal.)
126 static double cof[11] = {
127 4.694580336184385e+04,
128 -1.560605207784446e+05,
129 2.065049568014106e+05,
130 -1.388934775095388e+05,
131 5.031796415085709e+04,
132 -9.601592329182778e+03,
133 8.785855930895250e+02,
134 -3.155153906098611e+01,
135 2.908143421162229e-01,
136 -2.319827630494973e-04,
137 1.251639670050933e-10
140 /* Protect against x=0. We see this in Dirichlet code,
141 * for terms alpha = 0. This is a severe hack but it is effective
142 * and (we think?) safe. (due to GJM)
144 if (x <= 0.0) return 999999.;
147 tx = tmp = xx + 11.0;
149 for (i = 10; i >= 0; i--) /* sum least significant terms first */
151 value += cof[i] / tmp;
156 value += 0.918938533 + (xx+0.5)*log(tx) - tx;
161 /* 2D matrix operations
164 FMX2Alloc(int rows, int cols)
169 mx = (float **) MallocOrDie(sizeof(float *) * rows);
170 mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols);
171 for (r = 1; r < rows; r++)
172 mx[r] = mx[0] + r*cols;
182 DMX2Alloc(int rows, int cols)
187 mx = (double **) MallocOrDie(sizeof(double *) * rows);
188 mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols);
189 for (r = 1; r < rows; r++)
190 mx[r] = mx[0] + r*cols;
194 DMX2Free(double **mx)
199 /* Function: FMX2Multiply()
201 * Purpose: Matrix multiplication.
202 * Multiply an m x p matrix A by a p x n matrix B,
203 * giving an m x n matrix C.
204 * Matrix C must be a preallocated matrix of the right
208 FMX2Multiply(float **A, float **B, float **C, int m, int p, int n)
212 for (i = 0; i < m; i++)
213 for (j = 0; j < n; j++)
216 for (k = 0; k < p; k++)
217 C[i][j] += A[i][p] * B[p][j];
222 /* Function: IncompleteGamma()
224 * Purpose: Returns 1 - P(a,x) where:
225 * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt
226 * = \frac{\gamma(a,x)}{\Gamma(a)}
227 * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)}
229 * Used in a chi-squared test: for a X^2 statistic x
230 * with v degrees of freedom, call:
231 * p = IncompleteGamma(v/2., x/2.)
232 * to get the probability p that a chi-squared value
233 * greater than x could be obtained by chance even for
234 * a correct model. (i.e. p should be large, say
237 * Method: Based on ideas from Numerical Recipes in C, Press et al.,
238 * Cambridge University Press, 1988.
240 * Args: a - for instance, degrees of freedom / 2 [a > 0]
241 * x - for instance, chi-squared statistic / 2 [x >= 0]
243 * Return: 1 - P(a,x).
246 IncompleteGamma(double a, double x)
248 int iter; /* iteration counter */
250 if (a <= 0.) Die("IncompleteGamma(): a must be > 0");
251 if (x < 0.) Die("IncompleteGamma(): x must be >= 0");
253 /* For x > a + 1 the following gives rapid convergence;
254 * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}:
255 * use a continued fraction development for \Gamma(a,x).
259 double oldp; /* previous value of p */
260 double nu0, nu1; /* numerators for continued fraction calc */
261 double de0, de1; /* denominators for continued fraction calc */
263 nu0 = 0.; /* A_0 = 0 */
264 de0 = 1.; /* B_0 = 1 */
265 nu1 = 1.; /* A_1 = 1 */
266 de1 = x; /* B_1 = x */
269 for (iter = 1; iter < 100; iter++)
271 /* Continued fraction development:
272 * set A_j = b_j A_j-1 + a_j A_j-2
273 * B_j = b_j B_j-1 + a_j B_j-2
274 * We start with A_2, B_2.
276 /* j = even: a_j = iter-a, b_j = 1 */
277 /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
278 nu0 = nu1 + ((double)iter - a) * nu0;
279 de0 = de1 + ((double)iter - a) * de0;
281 /* j = odd: a_j = iter, b_j = x */
282 /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
283 nu1 = x * nu0 + (double) iter * nu1;
284 de1 = x * de0 + (double) iter * de1;
294 /* check for convergence */
295 if (fabs((nu1-oldp)/nu1) < 1.e-7)
296 return nu1 * exp(a * log(x) - x - Gammln(a));
300 Die("IncompleteGamma(): failed to converge using continued fraction approx");
304 double p; /* current sum */
305 double val; /* current value used in sum */
307 /* For x <= a+1 we use a convergent series instead:
308 * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
310 * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
311 * which looks appalling but the sum is in fact rearrangeable to
312 * a simple series without the \Gamma functions:
313 * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
314 * and it's obvious that this should converge nicely for x <= a+1.
318 for (iter = 1; iter < 10000; iter++)
320 val *= x / (a+(double)iter);
323 if (fabs(val/p) < 1.e-7)
324 return 1. - p * exp(a * log(x) - x - Gammln(a));
326 Die("IncompleteGamma(): failed to converge using series approx");