/***************************************************************** * SQUID - a library of functions for biological sequence analysis * Copyright (C) 1992-2002 Washington University School of Medicine * * This source code is freely distributed under the terms of the * GNU General Public License. See the files COPYRIGHT and LICENSE * for details. *****************************************************************/ /* sre_math.c * * Portability for and extensions to C math library. * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp) */ #include #include #include #include "squid.h" /* Function: Linefit() * * Purpose: Given points x[0..N-1] and y[0..N-1], fit to * a straight line y = a + bx. * a, b, and the linear correlation coefficient r * are filled in for return. * * Args: x - x values of data * y - y values of data * N - number of data points * ret_a - RETURN: intercept * ret_b - RETURN: slope * ret_r - RETURN: correlation coefficient * * Return: 1 on success, 0 on failure. */ int Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r) { float xavg, yavg; float sxx, syy, sxy; int i; /* Calculate averages, xavg and yavg */ xavg = yavg = 0.0; for (i = 0; i < N; i++) { xavg += x[i]; yavg += y[i]; } xavg /= (float) N; yavg /= (float) N; sxx = syy = sxy = 0.0; for (i = 0; i < N; i++) { sxx += (x[i] - xavg) * (x[i] - xavg); syy += (y[i] - yavg) * (y[i] - xavg); sxy += (x[i] - xavg) * (y[i] - yavg); } *ret_b = sxy / sxx; *ret_a = yavg - xavg*(*ret_b); *ret_r = sxy / (sqrt(sxx) * sqrt(syy)); return 1; } /* Function: WeightedLinefit() * * Purpose: Given points x[0..N-1] and y[0..N-1] with * variances (measurement errors) var[0..N-1], * fit to a straight line y = mx + b. * * Method: Algorithm from Numerical Recipes in C, [Press88]. * * Return: (void) * ret_m contains slope; ret_b contains intercept */ void WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b) { int i; double s; double sx, sy; double sxx, sxy; double delta; double m, b; s = sx = sy = sxx = sxy = 0.; for (i = 0; i < N; i++) { s += 1./var[i]; sx += x[i] / var[i]; sy += y[i] / var[i]; sxx += x[i] * x[i] / var[i]; sxy += x[i] * y[i] / var[i]; } delta = s * sxx - (sx * sx); b = (sxx * sy - sx * sxy) / delta; m = (s * sxy - sx * sy) / delta; *ret_m = m; *ret_b = b; } /* Function: Gammln() * * Returns the natural log of the gamma function of x. * x is > 0.0. * * Adapted from a public domain implementation in the * NCBI core math library. Thanks to John Spouge and * the NCBI. (According to the NCBI, that's Dr. John * "Gammas Galore" Spouge to you, pal.) */ double Gammln(double x) { int i; double xx, tx; double tmp, value; static double cof[11] = { 4.694580336184385e+04, -1.560605207784446e+05, 2.065049568014106e+05, -1.388934775095388e+05, 5.031796415085709e+04, -9.601592329182778e+03, 8.785855930895250e+02, -3.155153906098611e+01, 2.908143421162229e-01, -2.319827630494973e-04, 1.251639670050933e-10 }; /* Protect against x=0. We see this in Dirichlet code, * for terms alpha = 0. This is a severe hack but it is effective * and (we think?) safe. (due to GJM) */ if (x <= 0.0) return 999999.; xx = x - 1.0; tx = tmp = xx + 11.0; value = 1.0; for (i = 10; i >= 0; i--) /* sum least significant terms first */ { value += cof[i] / tmp; tmp -= 1.0; } value = log(value); tx += 0.5; value += 0.918938533 + (xx+0.5)*log(tx) - tx; return value; } /* 2D matrix operations */ float ** FMX2Alloc(int rows, int cols) { float **mx; int r; mx = (float **) MallocOrDie(sizeof(float *) * rows); mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols); for (r = 1; r < rows; r++) mx[r] = mx[0] + r*cols; return mx; } void FMX2Free(float **mx) { free(mx[0]); free(mx); } double ** DMX2Alloc(int rows, int cols) { double **mx; int r; mx = (double **) MallocOrDie(sizeof(double *) * rows); mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols); for (r = 1; r < rows; r++) mx[r] = mx[0] + r*cols; return mx; } void DMX2Free(double **mx) { free(mx[0]); free(mx); } /* Function: FMX2Multiply() * * Purpose: Matrix multiplication. * Multiply an m x p matrix A by a p x n matrix B, * giving an m x n matrix C. * Matrix C must be a preallocated matrix of the right * size. */ void FMX2Multiply(float **A, float **B, float **C, int m, int p, int n) { int i, j, k; for (i = 0; i < m; i++) for (j = 0; j < n; j++) { C[i][j] = 0.; for (k = 0; k < p; k++) C[i][j] += A[i][p] * B[p][j]; } } /* Function: IncompleteGamma() * * Purpose: Returns 1 - P(a,x) where: * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt * = \frac{\gamma(a,x)}{\Gamma(a)} * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)} * * Used in a chi-squared test: for a X^2 statistic x * with v degrees of freedom, call: * p = IncompleteGamma(v/2., x/2.) * to get the probability p that a chi-squared value * greater than x could be obtained by chance even for * a correct model. (i.e. p should be large, say * 0.95 or more). * * Method: Based on ideas from Numerical Recipes in C, Press et al., * Cambridge University Press, 1988. * * Args: a - for instance, degrees of freedom / 2 [a > 0] * x - for instance, chi-squared statistic / 2 [x >= 0] * * Return: 1 - P(a,x). */ double IncompleteGamma(double a, double x) { int iter; /* iteration counter */ if (a <= 0.) Die("IncompleteGamma(): a must be > 0"); if (x < 0.) Die("IncompleteGamma(): x must be >= 0"); /* For x > a + 1 the following gives rapid convergence; * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}: * use a continued fraction development for \Gamma(a,x). */ if (x > a+1) { double oldp; /* previous value of p */ double nu0, nu1; /* numerators for continued fraction calc */ double de0, de1; /* denominators for continued fraction calc */ nu0 = 0.; /* A_0 = 0 */ de0 = 1.; /* B_0 = 1 */ nu1 = 1.; /* A_1 = 1 */ de1 = x; /* B_1 = x */ oldp = nu1; for (iter = 1; iter < 100; iter++) { /* Continued fraction development: * set A_j = b_j A_j-1 + a_j A_j-2 * B_j = b_j B_j-1 + a_j B_j-2 * We start with A_2, B_2. */ /* j = even: a_j = iter-a, b_j = 1 */ /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */ nu0 = nu1 + ((double)iter - a) * nu0; de0 = de1 + ((double)iter - a) * de0; /* j = odd: a_j = iter, b_j = x */ /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */ nu1 = x * nu0 + (double) iter * nu1; de1 = x * de0 + (double) iter * de1; /* rescale */ if (de1 != 0.) { nu0 /= de1; de0 /= de1; nu1 /= de1; de1 = 1.; } /* check for convergence */ if (fabs((nu1-oldp)/nu1) < 1.e-7) return nu1 * exp(a * log(x) - x - Gammln(a)); oldp = nu1; } Die("IncompleteGamma(): failed to converge using continued fraction approx"); } else /* x <= a+1 */ { double p; /* current sum */ double val; /* current value used in sum */ /* For x <= a+1 we use a convergent series instead: * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)}, * where * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n * which looks appalling but the sum is in fact rearrangeable to * a simple series without the \Gamma functions: * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ... * and it's obvious that this should converge nicely for x <= a+1. */ p = val = 1. / a; for (iter = 1; iter < 10000; iter++) { val *= x / (a+(double)iter); p += val; if (fabs(val/p) < 1.e-7) return 1. - p * exp(a * log(x) - x - Gammln(a)); } Die("IncompleteGamma(): failed to converge using series approx"); } /*NOTREACHED*/ return 0.; }