5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
29 /******************************************************************************/
30 /* discrete Gamma-distribution and related stuff */
31 /******************************************************************************/
33 /* compare general base frequencies with frequencies of taxon i with chi square */
34 double homogentest(int taxon)
36 return chi2test(Freqtpm, Basecomp[taxon], gettpmradix(), &chi2fail);
40 /* discrete Gamma according to Yang 1994 (JME 39:306-314) */
41 void YangDiscreteGamma (double shape, int c, dvector x)
48 for (i = 0; i < c; i++)
50 /* corresponding rates */
51 x[i] = icdfGamma ( (2.0*i+1.0)/twoc, shape);
56 /* rescale for avarage rate of 1.0 */
57 for (i = 0; i < c; i++)
63 /* compute rates of each category when rates are Gamma-distributed */
76 for (i = 0; i < numcats; i++)
80 alpha = (1.0 - Geta)/Geta;
82 YangDiscreteGamma (alpha, numcats, Rates);
84 /* if invariable sites are present */
85 for (i = 0; i < numcats; i++)
86 Rates[i] = Rates[i]/(1.0-fracinv);
88 /* check for very small rates */
89 for (i = 0; i < numcats; i++)
90 if (Rates[i] < 0.000001) Rates[i] = 0.000001;
95 /******************************************************************************/
96 /* parameter estimation */
97 /******************************************************************************/
99 /* compute sample mean and standard deviation of sample mean */
100 void computestat(double *data, int n, double *mean, double *err)
106 for (i = 0; i < n; i++) sum += data[i];
107 (*mean) = sum/(double) n;
110 for (i = 0; i < n; i++) sum += (data[i] - (*mean))*(data[i] - (*mean));
112 (*err) = sqrt(sum)/sqrt((double)(n-1)*n); /* unbiased estimator */
114 (*err) = 0.0; /* if n == 1 */
117 /* compute ML value of quartet (a,b,c,d) */
118 double quartetml(int a, int b, int c, int d)
122 /* compute ML for all topologies */
123 if (approxp_optn) { /* approximate parameter mode */
124 d1 = quartet_alklhd(a,b,c,d); /* (a,b)-(c,d) */
125 d2 = quartet_alklhd(a,c,b,d); /* (a,c)-(b,d) */
126 d3 = quartet_alklhd(a,d,b,c); /* (a,d)-(b,c) */
128 d1 = quartet_lklhd(a,b,c,d); /* (a,b)-(c,d) */
129 d2 = quartet_lklhd(a,c,b,d); /* (a,c)-(b,d) */
130 d3 = quartet_lklhd(a,d,b,c); /* (a,d)-(b,c) */
133 /* looking for max(d1, d2, d3) */
134 if (d1 < d2) { /* d2 > d1 */
135 if (d2 < d3) { /* d3 > d2 > d1 */
138 } else { /* d2 >= d3 > d1 */
142 } else { /* d1 >= d2 */
143 if (d1 < d3) { /* d3 > d1 >= d2 */
146 } else { /* d1 >= d2 && d1 >= d3 */
153 /* optimization function TSparam - quartets */
154 double opttsq(double x)
156 if (x < MINTS) TSparam = MINTS;
157 else if (x > MAXTS) TSparam = MAXTS;
160 distupdate(qca, qcb, qcc, qcd);
161 return (-quartetml(qca, qcb, qcc, qcd));
164 /* optimization function YRparam - quartets */
165 double optyrq(double x)
167 if (x < MINYR) YRparam = MINYR;
168 else if (x > MAXYR) YRparam = MAXYR;
171 distupdate(qca, qcb, qcc, qcd);
172 return (-quartetml(qca, qcb, qcc, qcd));
175 /* estimate substitution process parameters - random quartets */
176 void optimseqevolparamsq()
178 double tsmeanold, yrmeanold;
179 dvector tslist, yrlist;
182 uli minqts, maxqts, n;
185 taxon = new_ivector(4);
187 /* number of quartets to be investigated */
188 minqts = (uli) floor(0.25 * MINPERTAXUM * Maxspc) + 1;
189 maxqts = (uli) floor(0.25 * MAXPERTAXUM * Maxspc) + 1;
195 tslist = new_dvector(maxqts);
196 yrlist = new_dvector(maxqts);
198 /* initialize averages */
204 /* investigate maxqts random quartets */
205 for (n = 0; n < maxqts; n++) {
207 /* choose random quartet */
208 chooser(Maxspc, 4, taxon);
211 * optimize parameters on this quartet
219 /* initialize start values with average value */
220 if ((SH_optn || nuc_optn) && optim_optn && (data_optn == 0)) TSparam = tsmean;
221 if ((nuc_optn && TN_optn) && optim_optn && (data_optn == 0)) YRparam = yrmean;
225 (SH_optn || nuc_optn) && optim_optn && (data_optn == 0),
226 MINTS, &TSparam, MAXTS, opttsq, &tserr,
227 (nuc_optn && TN_optn) && optim_optn && (data_optn == 0),
228 MINYR, &YRparam, MAXYR, optyrq, &yrerr);
235 computestat(tslist, n+1 , &tsmean, &tserr);
236 computestat(yrlist, n+1 , &yrmean, &yrerr);
238 /* check whether the means are converging */
240 if ((fabs(tsmean-tsmeanold) < TSDIFF) &&
241 (fabs(yrmean-yrmeanold) < YRDIFF))
245 /* investigate at least minqts quartets */
246 if (n > minqts-2 && (fin || n > maxqts-2)) break;
249 /* round estimated numbers to 2 digits after the decimal point */
250 if (tserr != 0.0) tsmean = floor(100.0*tsmean+0.5)/100.0;
251 if (yrerr != 0.0) yrmean = floor(100.0*yrmean+0.5)/100.0;
253 /* update ML engine */
261 /* optimization function TSparam - tree */
262 double opttst(double x)
266 if (x < MINTS) TSparam = MINTS;
267 else if (x > MAXTS) TSparam = MAXTS;
271 if (approxp_optn) result = usertree_alklhd();
272 else result = usertree_lklhd();
277 /* optimization function YRparam - tree */
278 double optyrt(double x)
282 if (x < MINYR) YRparam = MINYR;
283 else if (x > MAXYR) YRparam = MAXYR;
287 if (approxp_optn) result = usertree_alklhd();
288 else result = usertree_lklhd();
294 /* optimize substitution process parameters - tree */
295 void optimseqevolparamst()
298 (SH_optn || nuc_optn) && optim_optn && (data_optn == 0),
299 MINTS, &TSparam, MAXTS, opttst, &tserr,
300 (nuc_optn && TN_optn) && optim_optn && (data_optn == 0),
301 MINYR, &YRparam, MAXYR, optyrt, &yrerr);
305 /* optimization function fracinv */
306 double optfi(double x)
310 if (x < MINFI) fracinv = MINFI;
311 else if (x > MAXFI) fracinv = MAXFI;
315 if (approxp_optn) result = usertree_alklhd();
316 else result = usertree_lklhd();
322 /* optimization function Geta */
323 double optge(double x)
327 if (x < MINGE) Geta = MINGE;
328 else if (x > MAXGE) Geta = MAXGE;
334 if (approxp_optn) result = usertree_alklhd();
335 else result = usertree_lklhd();
341 /* optimize rate heterogeneity parameters */
342 void optimrateparams()
346 MINFI, &fracinv, fracconst, optfi, &fierr,
348 MINGE, &Geta, MAXGE, optge, &geerr);