5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
16 /******************************************************************************/
17 /* definitions and prototypes */
18 /******************************************************************************/
31 #ifndef PARALLEL /* because printf() runs significantly faster */
32 /* than fprintf(stdout) on an Apple McIntosh */
34 # define FPRINTF printf
37 # define FPRINTF fprintf
38 # define STDOUTFILE STDOUT,
42 /******************************************************************************/
43 /* compacting sequence data information */
44 /******************************************************************************/
47 /***************************** internal functions *****************************/
50 /* make all frequencies a little different */
51 void convfreq(dvector freqemp)
54 double freq, maxfreq, sum;
59 for (i = 0; i < tpmradix; i++) {
61 if (freq < MINFREQ) freqemp[i] = MINFREQ;
68 freqemp[maxi] += 1.0 - sum;
70 for (i = 0; i < tpmradix - 1; i++) {
71 for (j = i + 1; j < tpmradix; j++) {
72 if (freqemp[i] == freqemp[j]) {
73 freqemp[i] += MINFDIFF/2.0;
74 freqemp[j] -= MINFDIFF/2.0;
80 /* sort site patters of original input data */
81 void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
84 int i, j, k, l, n, pass;
89 awork = new_ivector(maxsite);
90 count = new_ivector(tpmradix+1);
91 for (i = 0; i < maxsite; i++)
93 for (pass = maxspc - 1; pass >= 0; pass--) {
94 for (j = 0; j < tpmradix+1; j++)
96 for (i = 0; i < maxsite; i++)
97 count[(int) seqchar[pass][ali[i]]]++;
98 for (j = 1; j < tpmradix+1; j++)
99 count[j] += count[j-1];
100 for (i = maxsite-1; i >= 0; i--)
101 awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
102 for (i = 0; i < maxsite; i++)
108 for (j = 1; j < maxsite; j++) {
111 for (i = 0; i < maxspc; i++) {
112 if (seqchar[i][l] != seqchar[i][k]) {
122 void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
123 ivector weight, int maxspc, int maxsite, int numptrn)
126 int agree_flag; /* boolean */
131 for (i = 0; i < maxspc; i++) {
132 seqconint[i][n] = seqchar[i][k];
136 for (j = 1; j < maxsite; j++) {
139 for (i = 0; i < maxspc; i++) {
140 if (seqconint[i][n] != seqchar[i][k]) {
145 if (agree_flag == FALSE) {
147 for (i = 0; i < maxspc; i++) {
148 seqconint[i][n] = seqchar[i][k];
159 /* Problem in condenceseq */
160 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
165 void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
166 int *numconst, int *numconstpat)
168 int character, s, i, constflag;
172 for (s = 0; s < numptrn; s++) { /* check all patterns */
175 character = seqpat[0][s];
176 for (i = 1; i < maxspc; i++) {
177 if (seqpat[i][s] != character) {
182 if (character != tpmradix && constflag) {
183 (*numconst) = (*numconst) + weight[s];
190 /***************************** exported functions *****************************/
197 convfreq(Freqtpm); /* make all frequencies slightly different */
198 ali = new_ivector(Maxsite);
199 radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
200 Seqpat = new_cmatrix(Maxspc, Numptrn);
201 constpat = new_ivector(Numptrn);
202 Weight = new_ivector(Numptrn);
203 condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
205 countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
206 fracconstpat = (double) Numconstpat / (double) Numptrn;
207 fracconst = (double) Numconst / (double) Maxsite;
211 /******************************************************************************/
212 /* computation of Pij(t) */
213 /******************************************************************************/
216 /***************************** internal functions *****************************/
219 void elmhes(dmatrix a, ivector ordr, int n)
225 for (i = 0; i < n; i++)
227 for (m = 2; m < n; m++) {
230 for (j = m; j <= n; j++) {
231 if (fabs(a[j - 1][m - 2]) > fabs(x)) {
236 ordr[m - 1] = i; /* vector */
238 for (j = m - 2; j < n; j++) {
240 a[i - 1][j] = a[m - 1][j];
243 for (j = 0; j < n; j++) {
245 a[j][i - 1] = a[j][m - 1];
250 for (i = m; i < n; i++) {
255 for (j = m - 1; j < n; j++)
256 a[i][j] -= y * a[m - 1][j];
257 for (j = 0; j < n; j++)
258 a[j][m - 1] += y * a[j][i];
266 void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
271 for (i = 0; i < n; i++) {
272 for (j = i + 1; j < n; j++) {
280 for (m = n - 1; m >= 2; m--) {
281 for (i = m; i < n; i++)
282 zz[i][m - 1] = a[i][m - 2];
285 for (j = m - 1; j < n; j++) {
286 zz[m - 1][j] = zz[i - 1][j];
289 zz[i - 1][m - 1] = 1.0;
295 void mcdiv(double ar, double ai, double br, double bi,
296 double *cr, double *ci)
298 double s, ars, ais, brs, bis;
301 s = fabs(br) + fabs(bi);
306 s = brs * brs + bis * bis;
307 *cr = (ars * brs + ais * bis) / s;
308 *ci = (ais * brs - ars * bis) / s;
312 void hqr2(int n, int low, int hgh, dmatrix h,
313 dmatrix zz, dvector wr, dvector wi)
315 int i, j, k, l=0, m, en, na, itn, its;
316 double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
317 int notlas; /* boolean */
322 /* store isolated roots and compute matrix norm */
323 for (i = 0; i < n; i++) {
324 for (j = k - 1; j < n; j++)
325 norm += fabs(h[i][j]);
327 if (i + 1 < low || i + 1 > hgh) {
335 while (en >= low) { /* search for next eigenvalues */
339 /* look for single small sub-diagonal element */
340 for (l = en; l > low; l--) {
341 s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
345 tst2 = tst1 + fabs(h[l - 1][l - 2]);
351 x = h[en - 1][en - 1]; /* form shift */
352 if (l == en || l == na)
355 /* all eigenvalues have not converged */
356 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
359 y = h[na - 1][na - 1];
360 w = h[en - 1][na - 1] * h[na - 1][en - 1];
361 /* form exceptional shift */
362 if (its == 10 || its == 20) {
364 for (i = low - 1; i < en; i++)
366 s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
373 /* look for two consecutive small sub-diagonal elements */
374 for (m = en - 2; m >= l; m--) {
378 p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
379 q = h[m][m] - z - r - s;
381 s = fabs(p) + fabs(q) + fabs(r);
388 (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
389 tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
393 for (i = m + 2; i <= en; i++) {
394 h[i - 1][i - 3] = 0.0;
396 h[i - 1][i - 4] = 0.0;
398 for (k = m; k <= na; k++) {
406 x = fabs(p) + fabs(q) + fabs(r);
414 if (p < 0.0) /* sign */
415 s = - sqrt(p * p + q * q + r * r);
417 s = sqrt(p * p + q * q + r * r);
419 h[k - 1][k - 2] = -s * x;
422 h[k - 1][k - 2] = -h[k - 1][k - 2];
431 for (j = k - 1; j < n; j++) { /* row modification */
432 p = h[k - 1][j] + q * h[k][j];
433 h[k - 1][j] -= p * x;
436 j = (en < (k + 3)) ? en : (k + 3); /* min */
437 for (i = 0; i < j; i++) { /* column modification */
438 p = x * h[i][k - 1] + y * h[i][k];
442 /* accumulate transformations */
443 for (i = low - 1; i < hgh; i++) {
444 p = x * zz[i][k - 1] + y * zz[i][k];
449 for (j = k - 1; j < n; j++) { /* row modification */
450 p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
451 h[k - 1][j] -= p * x;
453 h[k + 1][j] -= p * z;
455 j = (en < (k + 3)) ? en : (k + 3); /* min */
456 for (i = 0; i < j; i++) { /* column modification */
457 p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
460 h[i][k + 1] -= p * r;
462 /* accumulate transformations */
463 for (i = low - 1; i < hgh; i++) {
464 p = x * zz[i][k - 1] + y * zz[i][k] +
468 zz[i][k + 1] -= p * r;
473 } /* while infinite loop */
474 if (l == en) { /* one root found */
475 h[en - 1][en - 1] = x + t;
476 wr[en - 1] = h[en - 1][en - 1];
481 y = h[na - 1][na - 1];
482 w = h[en - 1][na - 1] * h[na - 1][en - 1];
486 h[en - 1][en - 1] = x + t;
487 x = h[en - 1][en - 1];
488 h[na - 1][na - 1] = y + t;
489 if (q >= 0.0) { /* real pair */
490 if (p < 0.0) /* sign */
495 wr[en - 1] = wr[na - 1];
497 wr[en - 1] = x - w / z;
500 x = h[en - 1][na - 1];
501 s = fabs(x) + fabs(z);
504 r = sqrt(p * p + q * q);
507 for (j = na - 1; j < n; j++) { /* row modification */
509 h[na - 1][j] = q * z + p * h[en - 1][j];
510 h[en - 1][j] = q * h[en - 1][j] - p * z;
512 for (i = 0; i < en; i++) { /* column modification */
514 h[i][na - 1] = q * z + p * h[i][en - 1];
515 h[i][en - 1] = q * h[i][en - 1] - p * z;
517 /* accumulate transformations */
518 for (i = low - 1; i < hgh; i++) {
520 zz[i][na - 1] = q * z + p * zz[i][en - 1];
521 zz[i][en - 1] = q * zz[i][en - 1] - p * z;
523 } else { /* complex pair */
530 } /* while en >= low */
531 /* backsubstitute to find vectors of upper triangular form */
533 for (en = n; en >= 1; en--) {
537 if (q == 0.0) {/* real vector */
539 h[en - 1][en - 1] = 1.0;
541 for (i = en - 2; i >= 0; i--) {
544 for (j = m - 1; j < en; j++)
545 r += h[i][j] * h[j][en - 1];
559 } while (tst2 > tst1);
561 h[i][en - 1] = -(r / t);
562 } else { /* solve real equations */
565 q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
566 t = (x * s - z * r) / q;
568 if (fabs(x) > fabs(z))
569 h[i + 1][en - 1] = (-r - w * t) / x;
571 h[i + 1][en - 1] = (-s - y * t) / z;
573 /* overflow control */
574 t = fabs(h[i][en - 1]);
577 tst2 = tst1 + 1.0 / tst1;
579 for (j = i; j < en; j++)
586 } else if (q > 0.0) {
588 if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
589 h[na - 1][na - 1] = q / h[en - 1][na - 1];
590 h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
593 mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
594 &h[na - 1][na - 1], &h[na - 1][en - 1]);
595 h[en - 1][na - 1] = 0.0;
596 h[en - 1][en - 1] = 1.0;
598 for (i = en - 3; i >= 0; i--) {
602 for (j = m - 1; j < en; j++) {
603 ra += h[i][j] * h[j][na - 1];
604 sa += h[i][j] * h[j][en - 1];
613 mcdiv(-ra, -sa, w, q, &h[i][na - 1],
615 else { /* solve complex equations */
618 vr = (wr[i] - p) * (wr[i] - p);
619 vr = vr + wi[i] * wi[i] - q * q;
620 vi = (wr[i] - p) * 2.0 * q;
621 if (vr == 0.0 && vi == 0.0) {
622 tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
628 } while (tst2 > tst1);
630 mcdiv(x * r - z * ra + q * sa,
631 x * s - z * sa - q * ra, vr, vi,
632 &h[i][na - 1], &h[i][en - 1]);
633 if (fabs(x) > fabs(z) + fabs(q)) {
635 [na - 1] = (q * h[i][en - 1] -
636 w * h[i][na - 1] - ra) / x;
637 h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
638 q * h[i][na - 1]) / x;
640 mcdiv(-r - y * h[i][na - 1],
641 -s - y * h[i][en - 1], z, q,
642 &h[i + 1][na - 1], &h[i + 1][en - 1]);
644 /* overflow control */
645 t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
646 fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
649 tst2 = tst1 + 1.0 / tst1;
651 for (j = i; j < en; j++) {
662 /* end back substitution. vectors of isolated roots */
663 for (i = 0; i < n; i++) {
664 if (i + 1 < low || i + 1 > hgh) {
665 for (j = i; j < n; j++)
669 /* multiply by transformation matrix to give vectors of
670 * original full matrix. */
671 for (j = n - 1; j >= low - 1; j--) {
672 m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
673 for (i = low - 1; i < hgh; i++) {
675 for (k = low - 1; k < m; k++)
676 z += zz[i][k] * h[k][j];
685 /* make rate matrix with 0.01 expected substitutions per unit time */
686 void onepamratematrix(dmatrix a)
689 double delta, temp, sum;
692 for (i = 0; i < tpmradix; i++)
694 for (j = 0; j < tpmradix; j++)
696 a[i][j] = Freqtpm[j]*a[i][j];
700 m = new_dvector(tpmradix);
701 for (i = 0, sum = 0.0; i < tpmradix; i++)
703 for (j = 0, temp = 0.0; j < tpmradix; j++)
705 m[i] = temp; /* row sum */
706 sum += temp*Freqtpm[i]; /* exp. rate */
708 delta = 0.01 / sum; /* 0.01 subst. per unit time */
709 for (i = 0; i < tpmradix; i++) {
710 for (j = 0; j < tpmradix; j++) {
712 a[i][j] = delta * a[i][j];
714 a[i][j] = delta * (-m[i]);
721 void eigensystem(dvector eval, dmatrix evec)
730 ordr = new_ivector(tpmradix);
731 evali = new_dvector(tpmradix);
732 forg = new_dvector(tpmradix);
733 a = new_dmatrix(tpmradix,tpmradix);
734 b = new_dmatrix(tpmradix,tpmradix);
736 rtfdata(a, forg); /* get relative transition matrix and frequencies */
738 onepamratematrix(a); /* make 1 PAM rate matrix */
741 for (i = 0; i < tpmradix; i++)
742 for (j = 0; j < tpmradix; j++)
745 elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
746 eltran(a, evec, ordr, tpmradix);
747 hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
749 /* check eigenvalue equation */
751 for (j = 0; j < tpmradix; j++) {
752 for (i = 0, zero = 0.0; i < tpmradix; i++) {
753 for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
754 zero -= eval[j] * evec[i][j];
755 if (fabs(zero) > 1.0e-5)
760 FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
770 void luinverse(dmatrix inmat, dmatrix imtrx, int size)
772 double eps = 1.0e-20; /* ! */
773 int i, j, k, l, maxi=0, idx, ix, jx;
774 double sum, tmp, maxb, aw;
780 index = new_ivector(tpmradix);
781 omtrx = new_dmatrix(tpmradix,tpmradix);
783 /* copy inmat to omtrx */
784 for (i = 0; i < tpmradix; i++)
785 for (j = 0; j < tpmradix; j++)
786 omtrx[i][j] = inmat[i][j];
788 wk = (double *) malloc((unsigned)size * sizeof(double));
790 for (i = 0; i < size; i++) {
792 for (j = 0; j < size; j++) {
793 if (fabs(omtrx[i][j]) > maxb)
794 maxb = fabs(omtrx[i][j]);
797 /* Singular matrix */
798 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
803 for (j = 0; j < size; j++) {
804 for (i = 0; i < j; i++) {
806 for (k = 0; k < i; k++)
807 sum -= omtrx[i][k] * omtrx[k][j];
811 for (i = j; i < size; i++) {
813 for (k = 0; k < j; k++)
814 sum -= omtrx[i][k] * omtrx[k][j];
816 tmp = wk[i] * fabs(sum);
823 for (k = 0; k < size; k++) {
824 tmp = omtrx[maxi][k];
825 omtrx[maxi][k] = omtrx[j][k];
832 if (omtrx[j][j] == 0.0)
835 tmp = 1.0 / omtrx[j][j];
836 for (i = j + 1; i < size; i++)
840 for (jx = 0; jx < size; jx++) {
841 for (ix = 0; ix < size; ix++)
845 for (i = 0; i < size; i++) {
850 for (j = l; j < i; j++)
851 sum -= omtrx[i][j] * wk[j];
852 } else if (sum != 0.0)
856 for (i = size - 1; i >= 0; i--) {
858 for (j = i + 1; j < size; j++)
859 sum -= omtrx[i][j] * wk[j];
860 wk[i] = sum / omtrx[i][i];
862 for (ix = 0; ix < size; ix++)
863 imtrx[ix][jx] = wk[ix];
872 void checkevector(dmatrix evec, dmatrix ivec, int nn)
874 int i, j, ia, ib, ic, error;
879 matx = new_dmatrix(nn, nn);
880 /* multiply matrix of eigenvectors and its inverse */
881 for (ia = 0; ia < nn; ia++) {
882 for (ic = 0; ic < nn; ic++) {
884 for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
888 /* check whether the unitary matrix is obtained */
890 for (i = 0; i < nn; i++) {
891 for (j = 0; j < nn; j++) {
893 if (fabs(matx[i][j] - 1.0) > 1.0e-5)
896 if (fabs(matx[i][j]) > 1.0e-5)
902 FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
908 /***************************** exported functions *****************************/
911 /* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
914 eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
915 luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
916 checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
921 void tprobmtrx(double arc, dmatrix tpr)
923 register int i, j, k;
924 register double temp;
927 for (k = 0; k < tpmradix; k++) {
928 temp = exp(arc * Eval[k]);
929 for (j = 0; j < tpmradix; j++)
930 iexp[k][j] = Ievc[k][j] * temp;
932 for (i = 0; i < tpmradix; i++) {
933 for (j = 0; j < tpmradix; j++) {
935 for (k = 0; k < tpmradix; k++)
936 temp += Evec[i][k] * iexp[k][j];
937 tpr[i][j] = fabs(temp);
943 /******************************************************************************/
944 /* estimation of maximum likelihood distances */
945 /******************************************************************************/
947 /* compute total log-likelihood
948 input: likelihoods for each site and non-zero rate
949 output: total log-likelihood (incl. zero rate category) */
950 double comptotloglkl(dmatrix cdl)
953 double loglkl, fv, fv2, sitelkl;
957 fv2 = (1.0-fracinv)/(double) numcats;
961 for (k = 0; k < Numptrn; k++) {
963 /* compute likelihood for pattern k */
964 sitelkl = cdl[0][k]*fv;
965 if (constpat[k] == TRUE)
966 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
968 /* total log-likelihood */
969 loglkl += log(sitelkl)*Weight[k];
975 for (k = 0; k < Numptrn; k++) {
977 /* this general routine works always but it's better
978 to run it only when it's really necessary */
980 /* compute likelihood for pattern k */
982 for (r = 0; r < numcats; r++)
983 sitelkl += cdl[r][k];
984 sitelkl = fv2*sitelkl;
985 if (constpat[k] == TRUE)
986 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
988 /* total log-likelihood */
989 loglkl += log(sitelkl)*Weight[k];
999 /* computes the site log-likelihoods
1000 input: likelihoods for each site and non-zero rate
1001 output: log-likelihood for each site */
1002 void allsitelkl(dmatrix cdl, dvector aslkl)
1005 double fv, fv2, sitelkl;
1008 fv2 = (1.0-fracinv)/(double) numcats;
1012 for (k = 0; k < Numptrn; k++) {
1014 /* compute likelihood for pattern k */
1015 sitelkl = cdl[0][k]*fv;
1016 if (constpat[k] == TRUE)
1017 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1019 /* site log-likelihood */
1020 aslkl[k] = log(sitelkl);
1025 for (k = 0; k < Numptrn; k++) {
1027 /* this general routine works always but it's better
1028 to run it only when it's really necessary */
1030 /* compute likelihood for pattern k */
1032 for (r = 0; r < numcats; r++)
1033 sitelkl += cdl[r][k];
1034 sitelkl = fv2*sitelkl;
1035 if (constpat[k] == TRUE)
1036 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1038 /* total log-likelihood */
1039 aslkl[k] = log(sitelkl);
1046 /***************************** internal functions *****************************/
1048 /* compute negative log-likelihood of distance arc between sequences seqchi/j */
1049 double pairlkl(double arc)
1052 double loglkl, fv, sitelkl;
1056 for (r = 0; r < numcats; r++)
1057 /* compute tpm for rate category r */
1058 tprobmtrx(arc*Rates[r], ltprobr[r]);
1065 for (k = 0; k < Numptrn; k++) {
1067 /* compute likelihood for site k */
1070 if (ci != tpmradix && cj != tpmradix)
1071 sitelkl = ltprobr[0][ci][cj]*fv;
1074 if (ci == cj && ci != tpmradix)
1075 sitelkl += fracinv*Freqtpm[ci];
1077 /* total log-likelihood */
1078 loglkl += log(sitelkl)*Weight[k];
1084 for (k = 0; k < Numptrn; k++) {
1086 /* this general routine works always but it's better
1087 to run it only when it's really necessary */
1089 /* compute likelihood for site k */
1092 if (ci != tpmradix && cj != tpmradix) {
1094 for (r = 0; r < numcats; r++)
1095 sitelkl += ltprobr[r][ci][cj];
1096 sitelkl = fv*sitelkl/(double) numcats;
1099 if (ci == cj && ci != tpmradix)
1100 sitelkl += fracinv*Freqtpm[ci];
1102 /* total log-likelihood */
1103 loglkl += log(sitelkl)*Weight[k];
1109 /* return negative log-likelihood as we use a minimizing procedure */
1114 /***************************** exported functions *****************************/
1118 /******************************************************************************/
1120 /* maximum likelihood distance between sequence i and j */
1121 /* CZ changed 05/16/01 */
1122 double mldistance( int i ) {
1123 double dist, fx, f2x;
1125 /* use old distance as start value */
1126 dist = Distanmat[ i ];
1128 if ( dist == 0.0 ) {
1132 seqchi = Seqpat[ Maxspc - 1 ];
1133 seqchj = Seqpat[ i ];
1135 if (dist <= MINARC) dist = MINARC+1.0;
1136 if (dist >= MAXARC) dist = MAXARC-1.0;
1138 dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x);
1145 /* initialize distance matrix */
1146 /* CZ changed 05/16/01 */
1148 int i, k, diff, x, y;
1151 for (i = 0; i < Maxspc - 1 ; i++) {
1154 seqchj = Seqpat[Maxspc - 1];
1156 /* count observed differences */
1158 for (k = 0; k < Numptrn; k++) {
1169 /* use generalized JC correction to get first estimate
1170 (for the SH model the observed distance is used) */
1171 /* observed distance */
1172 obs = (double) diff / (double) Maxsite;
1173 temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1174 if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1175 /* use JC corrected distance */
1176 Distanmat[i] = -100.0*(tpmradix-1.0)/tpmradix * log(temp);
1178 /* use observed distance */
1179 Distanmat[i] = obs * 100.0;
1180 if (Distanmat[i] < MINARC) Distanmat[i] = MINARC;
1181 if (Distanmat[i] > MAXARC) Distanmat[i] = MAXARC;
1190 /* compute distance matrix */
1191 /* CZ changed 05/16/01 */
1192 void computedistan() {
1195 for ( i = 0; i < Maxspc - 1; i++ ) {
1196 Distanmat[ i ] = mldistance( i );
1201 /******************************************************************************/
1207 /******************************************************************************/
1208 /* computation of maximum likelihood edge lengths for a given tree */
1209 /******************************************************************************/
1212 /***************************** internal functions *****************************/
1215 /* multiply partial likelihoods */
1216 void productpartials(Node *op)
1224 while (cp->isop->isop != op) {
1227 for (r = 0; r < numcats; r++)
1228 for (i = 0; i < Numptrn; i++)
1229 for (j = 0; j < tpmradix; j++)
1230 opc[r][i][j] *= cpc[r][i][j];
1235 /* compute internal partial likelihoods */
1236 void partialsinternal(Node *op)
1242 if (clockmode == 1) { /* clocklike branch lengths */
1243 for (r = 0; r < numcats; r++) {
1244 tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1246 } else { /* non-clocklike branch lengths */
1247 for (r = 0; r < numcats; r++) {
1248 tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1252 oprob = op->partials;
1253 cprob = op->kinp->isop->partials;
1254 for (r = 0; r < numcats; r++) {
1255 for (k = 0; k < Numptrn; k++) {
1256 for (i = 0; i < tpmradix; i++) {
1258 for (j = 0; j < tpmradix; j++)
1259 sum += ltprobr[r][i][j] * cprob[r][k][j];
1260 oprob[r][k][i] = sum;
1267 /* compute external partial likelihoods */
1268 void partialsexternal(Node *op)
1274 if (clockmode == 1) { /* clocklike branch lengths */
1275 for (r = 0; r < numcats; r++) {
1276 tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1278 } else { /* nonclocklike branch lengths */
1279 for (r = 0; r < numcats; r++) {
1280 tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1284 oprob = op->partials;
1285 dseqi = op->kinp->eprob;
1286 for (r = 0; r < numcats; r++) {
1287 for (k = 0; k < Numptrn; k++) {
1288 if ((j = dseqi[k]) == tpmradix) {
1289 for (i = 0; i < tpmradix; i++)
1290 oprob[r][k][i] = 1.0;
1292 for (i = 0; i < tpmradix; i++)
1293 oprob[r][k][i] = ltprobr[r][i][j];
1300 /* compute all partial likelihoods */
1301 void initpartials(Tree *tr)
1305 cp = rp = tr->rootp;
1307 cp = cp->isop->kinp;
1308 if (cp->isop == NULL) { /* external node */
1309 cp = cp->kinp; /* not descen */
1310 partialsexternal(cp);
1311 } else { /* internal node */
1313 productpartials(cp->kinp->isop);
1314 partialsinternal(cp);
1321 /* compute log-likelihood given internal branch with length arc
1322 between partials partiali and partials partialj */
1323 double intlkl(double arc)
1329 cdl = Ctree->condlkl;
1330 for (r = 0; r < numcats; r++) {
1331 tprobmtrx(arc*Rates[r], ltprobr[r]);
1333 for (r = 0; r < numcats; r++) {
1334 for (s = 0; s < Numptrn; s++) {
1336 for (i = 0; i < tpmradix; i++) {
1338 for (j = 0; j < tpmradix; j++)
1339 slk += partialj[r][s][j] * ltprobr[r][i][j];
1340 sumlk += Freqtpm[i] * partiali[r][s][i] * slk;
1346 /* compute total log-likelihood for current tree */
1347 Ctree->lklhd = comptotloglkl(cdl);
1349 return -(Ctree->lklhd); /* we use a minimizing procedure */
1353 /* optimize internal branch */
1354 void optinternalbranch(Node *op)
1356 double arc, fx, f2x;
1358 partiali = op->isop->partials;
1359 partialj = op->kinp->isop->partials;
1360 arc = op->length; /* nonclocklike branch lengths */
1361 if (arc <= MINARC) arc = MINARC+1.0;
1362 if (arc >= MAXARC) arc = MAXARC-1.0;
1363 arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x);
1364 op->kinp->length = arc;
1367 /* variance of branch length */
1369 if (1.0/(MAXARC*MAXARC) < f2x)
1370 op->varlen = 1.0/f2x;
1372 op->varlen = MAXARC*MAXARC;
1376 /* compute log-likelihood given external branch with length arc
1377 between partials partiali and sequence seqchi */
1378 double extlkl(double arc)
1385 cdl = Ctree->condlkl;
1386 for (r = 0; r < numcats; r++) {
1387 tprobmtrx(arc*Rates[r], ltprobr[r]);
1389 for (r = 0; r < numcats; r++) {
1390 for (s = 0; s < Numptrn; s++) {
1391 opb = partiali[r][s];
1393 if ((j = seqchi[s]) != tpmradix) {
1394 for (i = 0; i < tpmradix; i++)
1395 sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1397 for (i = 0; i < tpmradix; i++)
1398 sumlk += Freqtpm[i] * opb[i];
1404 /* compute total log-likelihood for current tree */
1405 Ctree->lklhd = comptotloglkl(cdl);
1407 return -(Ctree->lklhd); /* we use a minimizing procedure */
1410 /* optimize external branch */
1411 void optexternalbranch(Node *op)
1413 double arc, fx, f2x;
1415 partiali = op->isop->partials;
1416 seqchi = op->kinp->eprob;
1417 arc = op->length; /* nonclocklike branch lengths */
1418 if (arc <= MINARC) arc = MINARC+1.0;
1419 if (arc >= MAXARC) arc = MAXARC-1.0;
1420 arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x);
1421 op->kinp->length = arc;
1424 /* variance of branch length */
1426 if (1.0/(MAXARC*MAXARC) < f2x)
1427 op->varlen = 1.0/f2x;
1429 op->varlen = MAXARC*MAXARC;
1433 /* finish likelihoods for each rate and site */
1434 void finishlkl(Node *op)
1437 double arc, sumlk, slk;
1440 partiali = op->isop->partials;
1441 partialj = op->kinp->isop->partials;
1442 cdl = Ctree->condlkl;
1443 arc = op->length; /* nonclocklike branch lengths */
1444 for (r = 0; r < numcats; r++) {
1445 tprobmtrx(arc*Rates[r], ltprobr[r]);
1447 for (r = 0; r < numcats; r++) {
1448 for (k = 0; k < Numptrn; k++) {
1450 for (i = 0; i < tpmradix; i++) {
1452 for (j = 0; j < tpmradix; j++)
1453 slk += partialj[r][k][j] * ltprobr[r][i][j];
1454 sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1462 /***************************** exported functions *****************************/
1465 /* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1466 double optlkl(Tree *tr)
1472 clockmode = 0; /* nonclocklike branch lengths */
1476 for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1478 cp = rp = tr->rootp;
1480 cp = cp->isop->kinp;
1481 productpartials(cp->kinp->isop);
1482 if (cp->isop == NULL) { /* external node */
1483 cp = cp->kinp; /* not descen */
1485 lendiff = cp->length;
1486 optexternalbranch(cp);
1487 lendiff = fabs(lendiff - cp->length);
1488 if (lendiff < EPSILON) nconv++;
1491 partialsexternal(cp);
1492 } else { /* internal node */
1494 partialsinternal(cp);
1497 lendiff = cp->length;
1498 optinternalbranch(cp);
1499 lendiff = fabs(lendiff - cp->length);
1500 if (lendiff < EPSILON) nconv++;
1503 /* eventually compute likelihoods for each site */
1504 if ((cp->number == Numibrnch-1 && lendiff < EPSILON) ||
1505 Numit == MAXIT-1) finishlkl(cp);
1507 partialsinternal(cp);
1510 if (nconv >= Numbrnch) { /* convergence */
1512 cp = rp; /* get out of here */
1517 /* compute total log-likelihood for current tree */
1518 return comptotloglkl(tr->condlkl);
1522 /* compute likelihood of tree for given branch lengths */
1523 double treelkl(Tree *tr)
1531 /* compute for each site and rate log-likelihoods */
1534 productpartials(cp->isop);
1535 prob1 = cp->partials;
1536 prob2 = cp->isop->partials;
1538 for (r = 0; r < numcats; r++) {
1539 for (k = 0; k < Numptrn; k++) {
1541 for (i = 0; i < tpmradix; i++)
1542 sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1547 /* return total log-likelihood for current tree */
1548 return comptotloglkl(cdl);
1552 /******************************************************************************/
1553 /* least-squares estimate of branch lengths */
1554 /******************************************************************************/
1557 /***************************** internal functions *****************************/
1560 void luequation(dmatrix amat, dvector yvec, int size)
1562 double eps = 1.0e-20; /* ! */
1563 int i, j, k, l, maxi=0, idx;
1564 double sum, tmp, maxb, aw;
1569 wk = new_dvector(size);
1570 index = new_ivector(size);
1572 for (i = 0; i < size; i++) {
1574 for (j = 0; j < size; j++) {
1575 if (fabs(amat[i][j]) > maxb)
1576 maxb = fabs(amat[i][j]);
1579 /* Singular matrix */
1580 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1585 for (j = 0; j < size; j++) {
1586 for (i = 0; i < j; i++) {
1588 for (k = 0; k < i; k++)
1589 sum -= amat[i][k] * amat[k][j];
1593 for (i = j; i < size; i++) {
1595 for (k = 0; k < j; k++)
1596 sum -= amat[i][k] * amat[k][j];
1598 tmp = wk[i] * fabs(sum);
1605 for (k = 0; k < size; k++) {
1606 tmp = amat[maxi][k];
1607 amat[maxi][k] = amat[j][k];
1614 if (amat[j][j] == 0.0)
1616 if (j != size - 1) {
1617 tmp = 1.0 / amat[j][j];
1618 for (i = j + 1; i < size; i++)
1623 for (i = 0; i < size; i++) {
1626 yvec[idx] = yvec[i];
1628 for (j = l; j < i; j++)
1629 sum -= amat[i][j] * yvec[j];
1630 } else if (sum != 0.0)
1634 for (i = size - 1; i >= 0; i--) {
1636 for (j = i + 1; j < size; j++)
1637 sum -= amat[i][j] * yvec[j];
1638 yvec[i] = sum / amat[i][i];
1640 free_ivector(index);
1645 /* least square estimation of branch lengths
1646 used for the approximate ML and as starting point
1647 in the calculation of the exact value of the ML */
1648 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1650 int i, i1, j, j1, j2, k, numbrnch, numpair;
1651 double sum, leng, alllen, rss;
1653 dmatrix atmt, atamt;
1656 numbrnch = numspc + numibrnch;
1657 numpair = (numspc * (numspc - 1)) / 2;
1658 atmt = new_dmatrix(numbrnch, numpair);
1659 atamt = new_dmatrix(numbrnch, numbrnch);
1662 for (i = 0; i < numspc; i++) {
1663 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1665 for (j2 = 0; j2 < j1; j2++, j++) {
1669 for (j2 = 0; j2 < j1; j2++, j++) {
1678 for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
1679 pths = ibp[i1]->paths;
1680 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1681 for (j2 = 0; j2 < j1; j2++, j++) {
1682 if (pths[j1] != pths[j2])
1689 for (i = 0; i < numbrnch; i++) {
1690 for (j = 0; j <= i; j++) {
1691 for (k = 0, sum = 0.0; k < numpair; k++)
1692 sum += atmt[i][k] * atmt[j][k];
1697 for (i = 0; i < numbrnch; i++) {
1698 for (k = 0, sum = 0.0; k < numpair; k++)
1699 sum += atmt[i][k] * distanvec[k];
1702 luequation(atamt, Brnlength, numbrnch);
1703 for (i = 0, rss = 0.0; i < numpair; i++) {
1705 for (j = 0; j < numbrnch; j++) {
1706 if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
1707 sum -= Brnlength[j];
1711 tr->rssleast = sqrt(rss);
1713 for (i = 0; i < numspc; i++) {
1714 leng = Brnlength[i];
1716 if (leng < MINARC) leng = MINARC;
1717 if (leng > MAXARC) leng = MAXARC;
1718 if (clockmode) { /* clock */
1719 ebp[i]->lengthc = leng;
1720 ebp[i]->kinp->lengthc = leng;
1721 } else { /* no clock */
1722 ebp[i]->length = leng;
1723 ebp[i]->kinp->length = leng;
1725 Brnlength[i] = leng;
1727 for (i = 0, j = numspc; i < numibrnch; i++, j++) {
1728 leng = Brnlength[j];
1730 if (leng < MINARC) leng = MINARC;
1731 if (leng > MAXARC) leng = MAXARC;
1732 if (clockmode) { /* clock */
1733 ibp[i]->lengthc = leng;
1734 ibp[i]->kinp->lengthc = leng;
1735 } else { /* no clock */
1736 ibp[i]->length = leng;
1737 ibp[i]->kinp->length = leng;
1739 Brnlength[j] = leng;
1742 free_dmatrix(atamt);