+++ /dev/null
-/*****************************************************************
- * SQUID - a library of functions for biological sequence analysis
- * Copyright (C) 1992-2002 Washington University School of Medicine
- *
- * This source code is freely distributed under the terms of the
- * GNU General Public License. See the files COPYRIGHT and LICENSE
- * for details.
- *****************************************************************/
-
-/* sre_math.c
- *
- * Portability for and extensions to C math library.
- * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp)
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "squid.h"
-
-
-/* Function: Linefit()
- *
- * Purpose: Given points x[0..N-1] and y[0..N-1], fit to
- * a straight line y = a + bx.
- * a, b, and the linear correlation coefficient r
- * are filled in for return.
- *
- * Args: x - x values of data
- * y - y values of data
- * N - number of data points
- * ret_a - RETURN: intercept
- * ret_b - RETURN: slope
- * ret_r - RETURN: correlation coefficient
- *
- * Return: 1 on success, 0 on failure.
- */
-int
-Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r)
-{
- float xavg, yavg;
- float sxx, syy, sxy;
- int i;
-
- /* Calculate averages, xavg and yavg
- */
- xavg = yavg = 0.0;
- for (i = 0; i < N; i++)
- {
- xavg += x[i];
- yavg += y[i];
- }
- xavg /= (float) N;
- yavg /= (float) N;
-
- sxx = syy = sxy = 0.0;
- for (i = 0; i < N; i++)
- {
- sxx += (x[i] - xavg) * (x[i] - xavg);
- syy += (y[i] - yavg) * (y[i] - xavg);
- sxy += (x[i] - xavg) * (y[i] - yavg);
- }
- *ret_b = sxy / sxx;
- *ret_a = yavg - xavg*(*ret_b);
- *ret_r = sxy / (sqrt(sxx) * sqrt(syy));
- return 1;
-}
-
-
-/* Function: WeightedLinefit()
- *
- * Purpose: Given points x[0..N-1] and y[0..N-1] with
- * variances (measurement errors) var[0..N-1],
- * fit to a straight line y = mx + b.
- *
- * Method: Algorithm from Numerical Recipes in C, [Press88].
- *
- * Return: (void)
- * ret_m contains slope; ret_b contains intercept
- */
-void
-WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b)
-{
- int i;
- double s;
- double sx, sy;
- double sxx, sxy;
- double delta;
- double m, b;
-
- s = sx = sy = sxx = sxy = 0.;
- for (i = 0; i < N; i++)
- {
- s += 1./var[i];
- sx += x[i] / var[i];
- sy += y[i] / var[i];
- sxx += x[i] * x[i] / var[i];
- sxy += x[i] * y[i] / var[i];
- }
-
- delta = s * sxx - (sx * sx);
- b = (sxx * sy - sx * sxy) / delta;
- m = (s * sxy - sx * sy) / delta;
-
- *ret_m = m;
- *ret_b = b;
-}
-
-
-/* Function: Gammln()
- *
- * Returns the natural log of the gamma function of x.
- * x is > 0.0.
- *
- * Adapted from a public domain implementation in the
- * NCBI core math library. Thanks to John Spouge and
- * the NCBI. (According to the NCBI, that's Dr. John
- * "Gammas Galore" Spouge to you, pal.)
- */
-double
-Gammln(double x)
-{
- int i;
- double xx, tx;
- double tmp, value;
- static double cof[11] = {
- 4.694580336184385e+04,
- -1.560605207784446e+05,
- 2.065049568014106e+05,
- -1.388934775095388e+05,
- 5.031796415085709e+04,
- -9.601592329182778e+03,
- 8.785855930895250e+02,
- -3.155153906098611e+01,
- 2.908143421162229e-01,
- -2.319827630494973e-04,
- 1.251639670050933e-10
- };
-
- /* Protect against x=0. We see this in Dirichlet code,
- * for terms alpha = 0. This is a severe hack but it is effective
- * and (we think?) safe. (due to GJM)
- */
- if (x <= 0.0) return 999999.;
-
- xx = x - 1.0;
- tx = tmp = xx + 11.0;
- value = 1.0;
- for (i = 10; i >= 0; i--) /* sum least significant terms first */
- {
- value += cof[i] / tmp;
- tmp -= 1.0;
- }
- value = log(value);
- tx += 0.5;
- value += 0.918938533 + (xx+0.5)*log(tx) - tx;
- return value;
-}
-
-
-/* 2D matrix operations
- */
-float **
-FMX2Alloc(int rows, int cols)
-{
- float **mx;
- int r;
-
- mx = (float **) MallocOrDie(sizeof(float *) * rows);
- mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols);
- for (r = 1; r < rows; r++)
- mx[r] = mx[0] + r*cols;
- return mx;
-}
-void
-FMX2Free(float **mx)
-{
- free(mx[0]);
- free(mx);
-}
-double **
-DMX2Alloc(int rows, int cols)
-{
- double **mx;
- int r;
-
- mx = (double **) MallocOrDie(sizeof(double *) * rows);
- mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols);
- for (r = 1; r < rows; r++)
- mx[r] = mx[0] + r*cols;
- return mx;
-}
-void
-DMX2Free(double **mx)
-{
- free(mx[0]);
- free(mx);
-}
-/* Function: FMX2Multiply()
- *
- * Purpose: Matrix multiplication.
- * Multiply an m x p matrix A by a p x n matrix B,
- * giving an m x n matrix C.
- * Matrix C must be a preallocated matrix of the right
- * size.
- */
-void
-FMX2Multiply(float **A, float **B, float **C, int m, int p, int n)
-{
- int i, j, k;
-
- for (i = 0; i < m; i++)
- for (j = 0; j < n; j++)
- {
- C[i][j] = 0.;
- for (k = 0; k < p; k++)
- C[i][j] += A[i][p] * B[p][j];
- }
-}
-
-
-/* Function: IncompleteGamma()
- *
- * Purpose: Returns 1 - P(a,x) where:
- * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt
- * = \frac{\gamma(a,x)}{\Gamma(a)}
- * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)}
- *
- * Used in a chi-squared test: for a X^2 statistic x
- * with v degrees of freedom, call:
- * p = IncompleteGamma(v/2., x/2.)
- * to get the probability p that a chi-squared value
- * greater than x could be obtained by chance even for
- * a correct model. (i.e. p should be large, say
- * 0.95 or more).
- *
- * Method: Based on ideas from Numerical Recipes in C, Press et al.,
- * Cambridge University Press, 1988.
- *
- * Args: a - for instance, degrees of freedom / 2 [a > 0]
- * x - for instance, chi-squared statistic / 2 [x >= 0]
- *
- * Return: 1 - P(a,x).
- */
-double
-IncompleteGamma(double a, double x)
-{
- int iter; /* iteration counter */
-
- if (a <= 0.) Die("IncompleteGamma(): a must be > 0");
- if (x < 0.) Die("IncompleteGamma(): x must be >= 0");
-
- /* For x > a + 1 the following gives rapid convergence;
- * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}:
- * use a continued fraction development for \Gamma(a,x).
- */
- if (x > a+1)
- {
- double oldp; /* previous value of p */
- double nu0, nu1; /* numerators for continued fraction calc */
- double de0, de1; /* denominators for continued fraction calc */
-
- nu0 = 0.; /* A_0 = 0 */
- de0 = 1.; /* B_0 = 1 */
- nu1 = 1.; /* A_1 = 1 */
- de1 = x; /* B_1 = x */
-
- oldp = nu1;
- for (iter = 1; iter < 100; iter++)
- {
- /* Continued fraction development:
- * set A_j = b_j A_j-1 + a_j A_j-2
- * B_j = b_j B_j-1 + a_j B_j-2
- * We start with A_2, B_2.
- */
- /* j = even: a_j = iter-a, b_j = 1 */
- /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
- nu0 = nu1 + ((double)iter - a) * nu0;
- de0 = de1 + ((double)iter - a) * de0;
-
- /* j = odd: a_j = iter, b_j = x */
- /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
- nu1 = x * nu0 + (double) iter * nu1;
- de1 = x * de0 + (double) iter * de1;
-
- /* rescale */
- if (de1 != 0.)
- {
- nu0 /= de1;
- de0 /= de1;
- nu1 /= de1;
- de1 = 1.;
- }
- /* check for convergence */
- if (fabs((nu1-oldp)/nu1) < 1.e-7)
- return nu1 * exp(a * log(x) - x - Gammln(a));
-
- oldp = nu1;
- }
- Die("IncompleteGamma(): failed to converge using continued fraction approx");
- }
- else /* x <= a+1 */
- {
- double p; /* current sum */
- double val; /* current value used in sum */
-
- /* For x <= a+1 we use a convergent series instead:
- * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
- * where
- * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
- * which looks appalling but the sum is in fact rearrangeable to
- * a simple series without the \Gamma functions:
- * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
- * and it's obvious that this should converge nicely for x <= a+1.
- */
-
- p = val = 1. / a;
- for (iter = 1; iter < 10000; iter++)
- {
- val *= x / (a+(double)iter);
- p += val;
-
- if (fabs(val/p) < 1.e-7)
- return 1. - p * exp(a * log(x) - x - Gammln(a));
- }
- Die("IncompleteGamma(): failed to converge using series approx");
- }
- /*NOTREACHED*/
- return 0.;
-}
-