forester/archive/RIO/others/puzzle_dqo/src/gamma.c

   1 /*
   2  * gamma.c
   3  *
   4  *
   5  * Part of TREE-PUZZLE 5.0 (June 2000)
   6  *
   7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
   8  *                  M. Vingron, and Arndt von Haeseler
   9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
  10  *
  11  * All parts of the source except where indicated are distributed under
  12  * the GNU public licence.  See http://www.opensource.org for details.
  13  */
  14
  15 #include <math.h>
  16 #include "util.h"
  17 #include "gamma.h"
  18
  19 /* private prototypes */
  20 static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha);
  21 static double PointNormal (double prob);
  22 static double PointChi2 (double prob, double v);
  23
  24 /* Gamma density function */
  25 double densityGamma (double x, double shape)
  26 {
  27         return pow (shape, shape) * pow (x, shape-1) /
  28                 exp (shape*x + LnGamma(shape));
  29 }
  30
  31 /* Gamma cdf */
  32 double cdfGamma (double x, double shape)
  33 {
  34         double result;
  35
  36         result = IncompleteGamma (shape*x, shape, LnGamma(shape));
  37
  38         return result;
  39 }
  40
  41 /* Gamma inverse cdf */
  42 double icdfGamma (double y, double shape)
  43 {
  44         double result;
  45
  46         result = PointChi2 (y, 2.0*shape)/(2.0*shape);
  47
  48         /* to avoid -1.0 */
  49         if (result < 0.0)
  50         {
  51                 result = 0.0;
  52         }
  53
  54         return result;
  55 }
  56
  57 /* Gamma n-th moment */
  58 double momentGamma (int n, double shape)
  59 {
  60         int i;
  61         double tmp = 1.0;
  62
  63         for (i = 1; i < n; i++)
  64         {
  65                 tmp *= (shape + i)/shape;
  66         }
  67
  68         return tmp;
  69 }
  70
  71 /* The following code comes from tools.c in Yang's PAML package */
  72
  73 double LnGamma (double alpha)
  74 {
  75 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
  76    Stirling's formula is used for the central polynomial part of the procedure.
  77    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
  78    Communications of the Association for Computing Machinery, 9:684
  79 */
  80    double x=alpha, f=0, z;
  81
  82    if (x<7) {
  83       f=1;  z=x-1;
  84       while (++z<7)  f*=z;
  85       x=z;   f=-log(f);
  86    }
  87    z = 1/(x*x);
  88    return  f + (x-0.5)*log(x) - x + .918938533204673
  89           + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
  90                +.083333333333333)/x;
  91 }
  92
  93 static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha)
  94 {
  95 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper
  96            limit of the integration and alpha is the shape parameter.
  97    returns (-1) if in error
  98    (1) series expansion     if (alpha>x || x<=1)
  99    (2) continued fraction   otherwise
 100    RATNEST FORTRAN by
 101    Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
 102    19: 285-287 (AS32)
 103 */
 104    int i;
 105    double p=alpha, g=ln_gamma_alpha;
 106    double accurate=1e-8, overflow=1e30;
 107    double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
 108
 109    if (x==0) return (0);
 110    if (x<0 || p<=0) return (-1);
 111
 112    factor=exp(p*log(x)-x-g);
 113    if (x>1 && x>=p) goto l30;
 114    /* (1) series expansion */
 115    gin=1;  term=1;  rn=p;
 116  l20:
 117    rn++;
 118    term*=x/rn;   gin+=term;
 119
 120    if (term > accurate) goto l20;
 121    gin*=factor/p;
 122    goto l50;
 123  l30:
 124    /* (2) continued fraction */
 125    a=1-p;   b=a+x+1;  term=0;
 126    pn[0]=1;  pn[1]=x;  pn[2]=x+1;  pn[3]=x*b;
 127    gin=pn[2]/pn[3];
 128  l32:
 129    a++;  b+=2;  term++;   an=a*term;
 130    for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];
 131    if (pn[5] == 0) goto l35;
 132    rn=pn[4]/pn[5];   dif=fabs(gin-rn);
 133    if (dif>accurate) goto l34;
 134    if (dif<=accurate*rn) goto l42;
 135  l34:
 136    gin=rn;
 137  l35:
 138    for (i=0; i<4; i++) pn[i]=pn[i+2];
 139    if (fabs(pn[4]) < overflow) goto l32;
 140    for (i=0; i<4; i++) pn[i]/=overflow;
 141    goto l32;
 142  l42:
 143    gin=1-factor*gin;
 144
 145  l50:
 146    return (gin);
 147 }
 148
 149
 150 /* functions concerning the CDF and percentage points of the gamma and
 151    Chi2 distribution
 152 */
 153 static double PointNormal (double prob)
 154 {
 155 /* returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
 156    returns (-9999) if in error
 157    Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
 158    Applied Statistics 22: 96-97 (AS70)
 159
 160    Newer methods:
 161      Wichura MJ (1988) Algorithm AS 241: the percentage points of the
 162        normal distribution.  37: 477-484.
 163      Beasley JD & Springer SG  (1977).  Algorithm AS 111: the percentage
 164        points of the normal distribution.  26: 118-121.
 165
 166 */
 167    double a0=-.322232431088, a1=-1, a2=-.342242088547, a3=-.0204231210245;
 168    double a4=-.453642210148e-4, b0=.0993484626060, b1=.588581570495;
 169    double b2=.531103462366, b3=.103537752850, b4=.0038560700634;
 170    double y, z=0, p=prob, p1;
 171
 172    p1 = (p<0.5 ? p : 1-p);
 173    if (p1<1e-20) return (-9999);
 174
 175    y = sqrt (log(1/(p1*p1)));
 176    z = y + ((((y*a4+a3)*y+a2)*y+a1)*y+a0) / ((((y*b4+b3)*y+b2)*y+b1)*y+b0);
 177    return (p<0.5 ? -z : z);
 178 }
 179
 180
 181 static double PointChi2 (double prob, double v)
 182 {
 183 /* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
 184    returns -1 if in error.   0.000002<prob<0.999998
 185    RATNEST FORTRAN by
 186        Best DJ & Roberts DE (1975) The percentage points of the
 187        Chi2 distribution.  Applied Statistics 24: 385-388.  (AS91)
 188    Converted into C by Ziheng Yang, Oct. 1993.
 189 */
 190    double e=.5e-6, aa=.6931471805, p=prob, g;
 191    double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6;
 192
 193    if (p<.000002 || p>.999998 || v<=0) return (-1);
 194
 195    g = LnGamma (v/2);
 196    xx=v/2;   c=xx-1;
 197    if (v >= -1.24*log(p)) goto l1;
 198
 199    ch=pow((p*xx*exp(g+xx*aa)), 1/xx);
 200    if (ch-e<0) return (ch);
 201    goto l4;
 202 l1:
 203    if (v>.32) goto l3;
 204    ch=0.4;   a=log(1-p);
 205 l2:
 206    q=ch;  p1=1+ch*(4.67+ch);  p2=ch*(6.73+ch*(6.66+ch));
 207    t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2;
 208    ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t;
 209    if (fabs(q/ch-1)-.01 <= 0) goto l4;
 210    else                       goto l2;
 211
 212 l3:
 213    x=PointNormal (p);
 214    p1=0.222222/v;   ch=v*pow((x*sqrt(p1)+1-p1), 3.0);
 215    if (ch>2.2*v+6)  ch=-2*(log(1-p)-c*log(.5*ch)+g);
 216 l4:
 217
 218    do
 219    {
 220    q=ch;   p1=.5*ch;
 221    if ((t=IncompleteGamma (p1, xx, g))<0) {
 222       return (-1);
 223    }
 224    p2=p-t;
 225    t=p2*exp(xx*aa+g+p1-c*log(ch));
 226    b=t/ch;  a=0.5*t-b*c;
 227
 228    s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420;
 229    s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520;
 230    s3=(210+a*(462+a*(707+932*a)))/2520;
 231    s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040;
 232    s5=(84+264*a+c*(175+606*a))/2520;
 233    s6=(120+c*(346+127*c))/5040;
 234    ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6))))));
 235    }
 236    while (fabs(q/ch-1) > e);
 237
 238    return (ch);
 239 }
 240
 241
 242 /* Incomplete Gamma function Q(a,x)
 243    - this is a cleanroom implementation of NRs gammq(a,x)
 244 */
 245 double IncompleteGammaQ (double a, double x)
 246 {
 247         return 1.0-IncompleteGamma (x, a, LnGamma(a));
 248 }
 249
 250
 251 /* probability that the observed chi-square
 252    exceeds chi2 even if model is correct */
 253 double chi2prob (int deg, double chi2)
 254 {
 255         return IncompleteGammaQ (0.5*deg, 0.5*chi2);
 256 }
 257
 258
 259
 260 /* chi square test
 261    ef      expected frequencies (sum up to 1 !!)
 262    of      observed frequencies (sum up to the number of samples)
 263    numcat  number of categories
 264    returns critical significance level */
 265 double chi2test(double *ef, int *of, int numcat, int *chi2fail)
 266 {
 267         double chi2, criticals, efn;
 268         int i, below1, below5, reducedcat;
 269         int samples;
 270
 271         *chi2fail = FALSE;
 272         reducedcat = numcat;
 273         below1 = 0;
 274         below5 = 0;
 275
 276         /* compute number of samples */
 277         samples = 0;
 278         for (i = 0; i < numcat; i++)
 279                 samples = samples + of[i];
 280
 281         /* compute chi square */
 282         chi2 = 0;
 283         for (i = 0; i < numcat; i++) {
 284                 efn = ef[i]*((double) samples);
 285                 if (efn < 1.0) below1++;
 286                 if (efn < 5.0) below5++;
 287                 if (efn == 0.0) {
 288                         reducedcat--;
 289                         fprintf(stdout, "FPE error: samples=%d, ef[%d]=%f, of[%d]=%d, efn=%f, nc=%d, rc=%d\n",
 290                                         samples, i, ef[i], i, of[i], efn, numcat, reducedcat);
 291                         fprintf(stdout, "PLEASE REPORT THIS ERROR TO DEVELOPERS !!!\n");
 292                         fflush(stdout);
 293                 } else chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn;
 294         }
 295
 296         /* compute significance */
 297         criticals = chi2prob (numcat-1, chi2);
 298
 299         /* no expected frequency category (sum up to # samples) below 1.0 */
 300         if (below1 > 0) *chi2fail = TRUE;
 301         /* no more than 1/5 of the frequency categories below 5.0 */
 302         if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE;
 303
 304         return criticals;
 305 }
 306
 307
 308 /* chi square test
 309    ef      expected frequencies (sum up to 1 !!)
 310    of      observed frequencies (sum up to the number of samples)
 311    numcat  number of categories
 312    returns critical significance level */
 313 double altchi2test(double *ef, int *of, int numcat, int *chi2fail)
 314 {
 315         double chi2, criticals, efn;
 316         int i, below1, below5;
 317         int samples;
 318
 319         *chi2fail = FALSE;
 320         below1 = 0;
 321         below5 = 0;
 322
 323         /* compute number of samples */
 324         samples = 0;
 325         for (i = 0; i < numcat; i++)
 326                 samples = samples + of[i];
 327
 328         /* compute chi square */
 329         chi2 = 0;
 330         for (i = 0; i < numcat; i++) {
 331                 efn = ef[i]*((double) samples);
 332                 if (efn < 1.0) below1++;
 333                 if (efn < 5.0) below5++;
 334                 chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn;
 335         }
 336
 337         /* compute significance */
 338         criticals = chi2prob (numcat-1, chi2);
 339
 340         /* no expected frequency category (sum up to # samples) below 1.0 */
 341         if (below1 > 0) *chi2fail = TRUE;
 342         /* no more than 1/5 of the frequency categories below 5.0 */
 343         if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE;
 344
 345         return criticals;
 346 }