5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
19 /* private prototypes */
20 static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha);
21 static double PointNormal (double prob);
22 static double PointChi2 (double prob, double v);
24 /* Gamma density function */
25 double densityGamma (double x, double shape)
27 return pow (shape, shape) * pow (x, shape-1) /
28 exp (shape*x + LnGamma(shape));
32 double cdfGamma (double x, double shape)
36 result = IncompleteGamma (shape*x, shape, LnGamma(shape));
41 /* Gamma inverse cdf */
42 double icdfGamma (double y, double shape)
46 result = PointChi2 (y, 2.0*shape)/(2.0*shape);
57 /* Gamma n-th moment */
58 double momentGamma (int n, double shape)
63 for (i = 1; i < n; i++)
65 tmp *= (shape + i)/shape;
71 /* The following code comes from tools.c in Yang's PAML package */
73 double LnGamma (double alpha)
75 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
76 Stirling's formula is used for the central polynomial part of the procedure.
77 Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
78 Communications of the Association for Computing Machinery, 9:684
80 double x=alpha, f=0, z;
88 return f + (x-0.5)*log(x) - x + .918938533204673
89 + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
93 static double IncompleteGamma (double x, double alpha, double ln_gamma_alpha)
95 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper
96 limit of the integration and alpha is the shape parameter.
97 returns (-1) if in error
98 (1) series expansion if (alpha>x || x<=1)
99 (2) continued fraction otherwise
101 Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics,
105 double p=alpha, g=ln_gamma_alpha;
106 double accurate=1e-8, overflow=1e30;
107 double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
109 if (x==0) return (0);
110 if (x<0 || p<=0) return (-1);
112 factor=exp(p*log(x)-x-g);
113 if (x>1 && x>=p) goto l30;
114 /* (1) series expansion */
118 term*=x/rn; gin+=term;
120 if (term > accurate) goto l20;
124 /* (2) continued fraction */
125 a=1-p; b=a+x+1; term=0;
126 pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b;
129 a++; b+=2; term++; an=a*term;
130 for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];
131 if (pn[5] == 0) goto l35;
132 rn=pn[4]/pn[5]; dif=fabs(gin-rn);
133 if (dif>accurate) goto l34;
134 if (dif<=accurate*rn) goto l42;
138 for (i=0; i<4; i++) pn[i]=pn[i+2];
139 if (fabs(pn[4]) < overflow) goto l32;
140 for (i=0; i<4; i++) pn[i]/=overflow;
150 /* functions concerning the CDF and percentage points of the gamma and
153 static double PointNormal (double prob)
155 /* returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
156 returns (-9999) if in error
157 Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
158 Applied Statistics 22: 96-97 (AS70)
161 Wichura MJ (1988) Algorithm AS 241: the percentage points of the
162 normal distribution. 37: 477-484.
163 Beasley JD & Springer SG (1977). Algorithm AS 111: the percentage
164 points of the normal distribution. 26: 118-121.
167 double a0=-.322232431088, a1=-1, a2=-.342242088547, a3=-.0204231210245;
168 double a4=-.453642210148e-4, b0=.0993484626060, b1=.588581570495;
169 double b2=.531103462366, b3=.103537752850, b4=.0038560700634;
170 double y, z=0, p=prob, p1;
172 p1 = (p<0.5 ? p : 1-p);
173 if (p1<1e-20) return (-9999);
175 y = sqrt (log(1/(p1*p1)));
176 z = y + ((((y*a4+a3)*y+a2)*y+a1)*y+a0) / ((((y*b4+b3)*y+b2)*y+b1)*y+b0);
177 return (p<0.5 ? -z : z);
181 static double PointChi2 (double prob, double v)
183 /* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
184 returns -1 if in error. 0.000002<prob<0.999998
186 Best DJ & Roberts DE (1975) The percentage points of the
187 Chi2 distribution. Applied Statistics 24: 385-388. (AS91)
188 Converted into C by Ziheng Yang, Oct. 1993.
190 double e=.5e-6, aa=.6931471805, p=prob, g;
191 double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6;
193 if (p<.000002 || p>.999998 || v<=0) return (-1);
197 if (v >= -1.24*log(p)) goto l1;
199 ch=pow((p*xx*exp(g+xx*aa)), 1/xx);
200 if (ch-e<0) return (ch);
206 q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch));
207 t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2;
208 ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t;
209 if (fabs(q/ch-1)-.01 <= 0) goto l4;
214 p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0);
215 if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g);
221 if ((t=IncompleteGamma (p1, xx, g))<0) {
225 t=p2*exp(xx*aa+g+p1-c*log(ch));
228 s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420;
229 s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520;
230 s3=(210+a*(462+a*(707+932*a)))/2520;
231 s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040;
232 s5=(84+264*a+c*(175+606*a))/2520;
233 s6=(120+c*(346+127*c))/5040;
234 ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6))))));
236 while (fabs(q/ch-1) > e);
242 /* Incomplete Gamma function Q(a,x)
243 - this is a cleanroom implementation of NRs gammq(a,x)
245 double IncompleteGammaQ (double a, double x)
247 return 1.0-IncompleteGamma (x, a, LnGamma(a));
251 /* probability that the observed chi-square
252 exceeds chi2 even if model is correct */
253 double chi2prob (int deg, double chi2)
255 return IncompleteGammaQ (0.5*deg, 0.5*chi2);
261 ef expected frequencies (sum up to 1 !!)
262 of observed frequencies (sum up to the number of samples)
263 numcat number of categories
264 returns critical significance level */
265 double chi2test(double *ef, int *of, int numcat, int *chi2fail)
267 double chi2, criticals, efn;
268 int i, below1, below5, reducedcat;
276 /* compute number of samples */
278 for (i = 0; i < numcat; i++)
279 samples = samples + of[i];
281 /* compute chi square */
283 for (i = 0; i < numcat; i++) {
284 efn = ef[i]*((double) samples);
285 if (efn < 1.0) below1++;
286 if (efn < 5.0) below5++;
289 fprintf(stdout, "FPE error: samples=%d, ef[%d]=%f, of[%d]=%d, efn=%f, nc=%d, rc=%d\n",
290 samples, i, ef[i], i, of[i], efn, numcat, reducedcat);
291 fprintf(stdout, "PLEASE REPORT THIS ERROR TO DEVELOPERS !!!\n");
293 } else chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn;
296 /* compute significance */
297 criticals = chi2prob (numcat-1, chi2);
299 /* no expected frequency category (sum up to # samples) below 1.0 */
300 if (below1 > 0) *chi2fail = TRUE;
301 /* no more than 1/5 of the frequency categories below 5.0 */
302 if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE;
309 ef expected frequencies (sum up to 1 !!)
310 of observed frequencies (sum up to the number of samples)
311 numcat number of categories
312 returns critical significance level */
313 double altchi2test(double *ef, int *of, int numcat, int *chi2fail)
315 double chi2, criticals, efn;
316 int i, below1, below5;
323 /* compute number of samples */
325 for (i = 0; i < numcat; i++)
326 samples = samples + of[i];
328 /* compute chi square */
330 for (i = 0; i < numcat; i++) {
331 efn = ef[i]*((double) samples);
332 if (efn < 1.0) below1++;
333 if (efn < 5.0) below5++;
334 chi2 = chi2 + ((double) of[i]-efn)*((double) of[i]-efn)/efn;
337 /* compute significance */
338 criticals = chi2prob (numcat-1, chi2);
340 /* no expected frequency category (sum up to # samples) below 1.0 */
341 if (below1 > 0) *chi2fail = TRUE;
342 /* no more than 1/5 of the frequency categories below 5.0 */
343 if (below5 > (int) floor(samples/5.0)) *chi2fail = TRUE;