5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
16 /******************************************************************************/
17 /* definitions and prototypes */
18 /******************************************************************************/
31 #ifndef PARALLEL /* because printf() runs significantly faster */
32 /* than fprintf(stdout) on an Apple McIntosh */
34 # define FPRINTF printf
37 # define FPRINTF fprintf
38 # define STDOUTFILE STDOUT,
42 /******************************************************************************/
43 /* compacting sequence data information */
44 /******************************************************************************/
47 /***************************** internal functions *****************************/
50 /* make all frequencies a little different */
51 void convfreq(dvector freqemp)
54 double freq, maxfreq, sum;
59 for (i = 0; i < tpmradix; i++) {
61 if (freq < MINFREQ) freqemp[i] = MINFREQ;
68 freqemp[maxi] += 1.0 - sum;
70 for (i = 0; i < tpmradix - 1; i++) {
71 for (j = i + 1; j < tpmradix; j++) {
72 if (freqemp[i] == freqemp[j]) {
73 freqemp[i] += MINFDIFF/2.0;
74 freqemp[j] -= MINFDIFF/2.0;
80 /* sort site patters of original input data */
81 void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
84 int i, j, k, l, n, pass;
89 awork = new_ivector(maxsite);
90 count = new_ivector(tpmradix+1);
91 for (i = 0; i < maxsite; i++)
93 for (pass = maxspc - 1; pass >= 0; pass--) {
94 for (j = 0; j < tpmradix+1; j++)
96 for (i = 0; i < maxsite; i++)
97 count[(int) seqchar[pass][ali[i]]]++;
98 for (j = 1; j < tpmradix+1; j++)
99 count[j] += count[j-1];
100 for (i = maxsite-1; i >= 0; i--)
101 awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
102 for (i = 0; i < maxsite; i++)
108 for (j = 1; j < maxsite; j++) {
111 for (i = 0; i < maxspc; i++) {
112 if (seqchar[i][l] != seqchar[i][k]) {
122 void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
123 ivector weight, int maxspc, int maxsite, int numptrn)
126 int agree_flag; /* boolean */
131 for (i = 0; i < maxspc; i++) {
132 seqconint[i][n] = seqchar[i][k];
136 for (j = 1; j < maxsite; j++) {
139 for (i = 0; i < maxspc; i++) {
140 if (seqconint[i][n] != seqchar[i][k]) {
145 if (agree_flag == FALSE) {
147 for (i = 0; i < maxspc; i++) {
148 seqconint[i][n] = seqchar[i][k];
159 /* Problem in condenceseq */
160 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
165 void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
166 int *numconst, int *numconstpat)
168 int character, s, i, constflag;
172 for (s = 0; s < numptrn; s++) { /* check all patterns */
175 character = seqpat[0][s];
176 for (i = 1; i < maxspc; i++) {
177 if (seqpat[i][s] != character) {
182 if (character != tpmradix && constflag) {
183 (*numconst) = (*numconst) + weight[s];
190 /***************************** exported functions *****************************/
197 convfreq(Freqtpm); /* make all frequencies slightly different */
198 ali = new_ivector(Maxsite);
199 radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
200 Seqpat = new_cmatrix(Maxspc, Numptrn);
201 constpat = new_ivector(Numptrn);
202 Weight = new_ivector(Numptrn);
203 condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
205 countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
206 fracconstpat = (double) Numconstpat / (double) Numptrn;
207 fracconst = (double) Numconst / (double) Maxsite;
211 /******************************************************************************/
212 /* computation of Pij(t) */
213 /******************************************************************************/
216 /***************************** internal functions *****************************/
219 void elmhes(dmatrix a, ivector ordr, int n)
225 for (i = 0; i < n; i++)
227 for (m = 2; m < n; m++) {
230 for (j = m; j <= n; j++) {
231 if (fabs(a[j - 1][m - 2]) > fabs(x)) {
236 ordr[m - 1] = i; /* vector */
238 for (j = m - 2; j < n; j++) {
240 a[i - 1][j] = a[m - 1][j];
243 for (j = 0; j < n; j++) {
245 a[j][i - 1] = a[j][m - 1];
250 for (i = m; i < n; i++) {
255 for (j = m - 1; j < n; j++)
256 a[i][j] -= y * a[m - 1][j];
257 for (j = 0; j < n; j++)
258 a[j][m - 1] += y * a[j][i];
266 void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
271 for (i = 0; i < n; i++) {
272 for (j = i + 1; j < n; j++) {
280 for (m = n - 1; m >= 2; m--) {
281 for (i = m; i < n; i++)
282 zz[i][m - 1] = a[i][m - 2];
285 for (j = m - 1; j < n; j++) {
286 zz[m - 1][j] = zz[i - 1][j];
289 zz[i - 1][m - 1] = 1.0;
295 void mcdiv(double ar, double ai, double br, double bi,
296 double *cr, double *ci)
298 double s, ars, ais, brs, bis;
301 s = fabs(br) + fabs(bi);
306 s = brs * brs + bis * bis;
307 *cr = (ars * brs + ais * bis) / s;
308 *ci = (ais * brs - ars * bis) / s;
312 void hqr2(int n, int low, int hgh, dmatrix h,
313 dmatrix zz, dvector wr, dvector wi)
315 int i, j, k, l=0, m, en, na, itn, its;
316 double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
317 int notlas; /* boolean */
322 /* store isolated roots and compute matrix norm */
323 for (i = 0; i < n; i++) {
324 for (j = k - 1; j < n; j++)
325 norm += fabs(h[i][j]);
327 if (i + 1 < low || i + 1 > hgh) {
335 while (en >= low) { /* search for next eigenvalues */
339 /* look for single small sub-diagonal element */
340 for (l = en; l > low; l--) {
341 s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
345 tst2 = tst1 + fabs(h[l - 1][l - 2]);
351 x = h[en - 1][en - 1]; /* form shift */
352 if (l == en || l == na)
355 /* all eigenvalues have not converged */
356 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
359 y = h[na - 1][na - 1];
360 w = h[en - 1][na - 1] * h[na - 1][en - 1];
361 /* form exceptional shift */
362 if (its == 10 || its == 20) {
364 for (i = low - 1; i < en; i++)
366 s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
373 /* look for two consecutive small sub-diagonal elements */
374 for (m = en - 2; m >= l; m--) {
378 p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
379 q = h[m][m] - z - r - s;
381 s = fabs(p) + fabs(q) + fabs(r);
388 (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
389 tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
393 for (i = m + 2; i <= en; i++) {
394 h[i - 1][i - 3] = 0.0;
396 h[i - 1][i - 4] = 0.0;
398 for (k = m; k <= na; k++) {
406 x = fabs(p) + fabs(q) + fabs(r);
414 if (p < 0.0) /* sign */
415 s = - sqrt(p * p + q * q + r * r);
417 s = sqrt(p * p + q * q + r * r);
419 h[k - 1][k - 2] = -s * x;
422 h[k - 1][k - 2] = -h[k - 1][k - 2];
431 for (j = k - 1; j < n; j++) { /* row modification */
432 p = h[k - 1][j] + q * h[k][j];
433 h[k - 1][j] -= p * x;
436 j = (en < (k + 3)) ? en : (k + 3); /* min */
437 for (i = 0; i < j; i++) { /* column modification */
438 p = x * h[i][k - 1] + y * h[i][k];
442 /* accumulate transformations */
443 for (i = low - 1; i < hgh; i++) {
444 p = x * zz[i][k - 1] + y * zz[i][k];
449 for (j = k - 1; j < n; j++) { /* row modification */
450 p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
451 h[k - 1][j] -= p * x;
453 h[k + 1][j] -= p * z;
455 j = (en < (k + 3)) ? en : (k + 3); /* min */
456 for (i = 0; i < j; i++) { /* column modification */
457 p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
460 h[i][k + 1] -= p * r;
462 /* accumulate transformations */
463 for (i = low - 1; i < hgh; i++) {
464 p = x * zz[i][k - 1] + y * zz[i][k] +
468 zz[i][k + 1] -= p * r;
473 } /* while infinite loop */
474 if (l == en) { /* one root found */
475 h[en - 1][en - 1] = x + t;
476 wr[en - 1] = h[en - 1][en - 1];
481 y = h[na - 1][na - 1];
482 w = h[en - 1][na - 1] * h[na - 1][en - 1];
486 h[en - 1][en - 1] = x + t;
487 x = h[en - 1][en - 1];
488 h[na - 1][na - 1] = y + t;
489 if (q >= 0.0) { /* real pair */
490 if (p < 0.0) /* sign */
495 wr[en - 1] = wr[na - 1];
497 wr[en - 1] = x - w / z;
500 x = h[en - 1][na - 1];
501 s = fabs(x) + fabs(z);
504 r = sqrt(p * p + q * q);
507 for (j = na - 1; j < n; j++) { /* row modification */
509 h[na - 1][j] = q * z + p * h[en - 1][j];
510 h[en - 1][j] = q * h[en - 1][j] - p * z;
512 for (i = 0; i < en; i++) { /* column modification */
514 h[i][na - 1] = q * z + p * h[i][en - 1];
515 h[i][en - 1] = q * h[i][en - 1] - p * z;
517 /* accumulate transformations */
518 for (i = low - 1; i < hgh; i++) {
520 zz[i][na - 1] = q * z + p * zz[i][en - 1];
521 zz[i][en - 1] = q * zz[i][en - 1] - p * z;
523 } else { /* complex pair */
530 } /* while en >= low */
531 /* backsubstitute to find vectors of upper triangular form */
533 for (en = n; en >= 1; en--) {
537 if (q == 0.0) {/* real vector */
539 h[en - 1][en - 1] = 1.0;
541 for (i = en - 2; i >= 0; i--) {
544 for (j = m - 1; j < en; j++)
545 r += h[i][j] * h[j][en - 1];
559 } while (tst2 > tst1);
561 h[i][en - 1] = -(r / t);
562 } else { /* solve real equations */
565 q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
566 t = (x * s - z * r) / q;
568 if (fabs(x) > fabs(z))
569 h[i + 1][en - 1] = (-r - w * t) / x;
571 h[i + 1][en - 1] = (-s - y * t) / z;
573 /* overflow control */
574 t = fabs(h[i][en - 1]);
577 tst2 = tst1 + 1.0 / tst1;
579 for (j = i; j < en; j++)
586 } else if (q > 0.0) {
588 if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
589 h[na - 1][na - 1] = q / h[en - 1][na - 1];
590 h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
593 mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
594 &h[na - 1][na - 1], &h[na - 1][en - 1]);
595 h[en - 1][na - 1] = 0.0;
596 h[en - 1][en - 1] = 1.0;
598 for (i = en - 3; i >= 0; i--) {
602 for (j = m - 1; j < en; j++) {
603 ra += h[i][j] * h[j][na - 1];
604 sa += h[i][j] * h[j][en - 1];
613 mcdiv(-ra, -sa, w, q, &h[i][na - 1],
615 else { /* solve complex equations */
618 vr = (wr[i] - p) * (wr[i] - p);
619 vr = vr + wi[i] * wi[i] - q * q;
620 vi = (wr[i] - p) * 2.0 * q;
621 if (vr == 0.0 && vi == 0.0) {
622 tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
628 } while (tst2 > tst1);
630 mcdiv(x * r - z * ra + q * sa,
631 x * s - z * sa - q * ra, vr, vi,
632 &h[i][na - 1], &h[i][en - 1]);
633 if (fabs(x) > fabs(z) + fabs(q)) {
635 [na - 1] = (q * h[i][en - 1] -
636 w * h[i][na - 1] - ra) / x;
637 h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
638 q * h[i][na - 1]) / x;
640 mcdiv(-r - y * h[i][na - 1],
641 -s - y * h[i][en - 1], z, q,
642 &h[i + 1][na - 1], &h[i + 1][en - 1]);
644 /* overflow control */
645 t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
646 fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
649 tst2 = tst1 + 1.0 / tst1;
651 for (j = i; j < en; j++) {
662 /* end back substitution. vectors of isolated roots */
663 for (i = 0; i < n; i++) {
664 if (i + 1 < low || i + 1 > hgh) {
665 for (j = i; j < n; j++)
669 /* multiply by transformation matrix to give vectors of
670 * original full matrix. */
671 for (j = n - 1; j >= low - 1; j--) {
672 m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
673 for (i = low - 1; i < hgh; i++) {
675 for (k = low - 1; k < m; k++)
676 z += zz[i][k] * h[k][j];
685 /* make rate matrix with 0.01 expected substitutions per unit time */
686 void onepamratematrix(dmatrix a)
689 double delta, temp, sum;
692 for (i = 0; i < tpmradix; i++)
694 for (j = 0; j < tpmradix; j++)
696 a[i][j] = Freqtpm[j]*a[i][j];
700 m = new_dvector(tpmradix);
701 for (i = 0, sum = 0.0; i < tpmradix; i++)
703 for (j = 0, temp = 0.0; j < tpmradix; j++)
705 m[i] = temp; /* row sum */
706 sum += temp*Freqtpm[i]; /* exp. rate */
708 delta = 0.01 / sum; /* 0.01 subst. per unit time */
709 for (i = 0; i < tpmradix; i++) {
710 for (j = 0; j < tpmradix; j++) {
712 a[i][j] = delta * a[i][j];
714 a[i][j] = delta * (-m[i]);
721 void eigensystem(dvector eval, dmatrix evec)
730 ordr = new_ivector(tpmradix);
731 evali = new_dvector(tpmradix);
732 forg = new_dvector(tpmradix);
733 a = new_dmatrix(tpmradix,tpmradix);
734 b = new_dmatrix(tpmradix,tpmradix);
736 rtfdata(a, forg); /* get relative transition matrix and frequencies */
738 onepamratematrix(a); /* make 1 PAM rate matrix */
741 for (i = 0; i < tpmradix; i++)
742 for (j = 0; j < tpmradix; j++)
745 elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
746 eltran(a, evec, ordr, tpmradix);
747 hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
749 /* check eigenvalue equation */
751 for (j = 0; j < tpmradix; j++) {
752 for (i = 0, zero = 0.0; i < tpmradix; i++) {
753 for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
754 zero -= eval[j] * evec[i][j];
755 if (fabs(zero) > 1.0e-5)
760 FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
770 void luinverse(dmatrix inmat, dmatrix imtrx, int size)
772 double eps = 1.0e-20; /* ! */
773 int i, j, k, l, maxi=0, idx, ix, jx;
774 double sum, tmp, maxb, aw;
780 index = new_ivector(tpmradix);
781 omtrx = new_dmatrix(tpmradix,tpmradix);
783 /* copy inmat to omtrx */
784 for (i = 0; i < tpmradix; i++)
785 for (j = 0; j < tpmradix; j++)
786 omtrx[i][j] = inmat[i][j];
788 wk = (double *) malloc((unsigned)size * sizeof(double));
790 for (i = 0; i < size; i++) {
792 for (j = 0; j < size; j++) {
793 if (fabs(omtrx[i][j]) > maxb)
794 maxb = fabs(omtrx[i][j]);
797 /* Singular matrix */
798 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
803 for (j = 0; j < size; j++) {
804 for (i = 0; i < j; i++) {
806 for (k = 0; k < i; k++)
807 sum -= omtrx[i][k] * omtrx[k][j];
811 for (i = j; i < size; i++) {
813 for (k = 0; k < j; k++)
814 sum -= omtrx[i][k] * omtrx[k][j];
816 tmp = wk[i] * fabs(sum);
823 for (k = 0; k < size; k++) {
824 tmp = omtrx[maxi][k];
825 omtrx[maxi][k] = omtrx[j][k];
832 if (omtrx[j][j] == 0.0)
835 tmp = 1.0 / omtrx[j][j];
836 for (i = j + 1; i < size; i++)
840 for (jx = 0; jx < size; jx++) {
841 for (ix = 0; ix < size; ix++)
845 for (i = 0; i < size; i++) {
850 for (j = l; j < i; j++)
851 sum -= omtrx[i][j] * wk[j];
852 } else if (sum != 0.0)
856 for (i = size - 1; i >= 0; i--) {
858 for (j = i + 1; j < size; j++)
859 sum -= omtrx[i][j] * wk[j];
860 wk[i] = sum / omtrx[i][i];
862 for (ix = 0; ix < size; ix++)
863 imtrx[ix][jx] = wk[ix];
872 void checkevector(dmatrix evec, dmatrix ivec, int nn)
874 int i, j, ia, ib, ic, error;
879 matx = new_dmatrix(nn, nn);
880 /* multiply matrix of eigenvectors and its inverse */
881 for (ia = 0; ia < nn; ia++) {
882 for (ic = 0; ic < nn; ic++) {
884 for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
888 /* check whether the unitary matrix is obtained */
890 for (i = 0; i < nn; i++) {
891 for (j = 0; j < nn; j++) {
893 if (fabs(matx[i][j] - 1.0) > 1.0e-5)
896 if (fabs(matx[i][j]) > 1.0e-5)
902 FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
908 /***************************** exported functions *****************************/
911 /* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
914 eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
915 luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
916 checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
921 void tprobmtrx(double arc, dmatrix tpr)
923 register int i, j, k;
924 register double temp;
927 for (k = 0; k < tpmradix; k++) {
928 temp = exp(arc * Eval[k]);
929 for (j = 0; j < tpmradix; j++)
930 iexp[k][j] = Ievc[k][j] * temp;
932 for (i = 0; i < tpmradix; i++) {
933 for (j = 0; j < tpmradix; j++) {
935 for (k = 0; k < tpmradix; k++)
936 temp += Evec[i][k] * iexp[k][j];
937 tpr[i][j] = fabs(temp);
943 /******************************************************************************/
944 /* estimation of maximum likelihood distances */
945 /******************************************************************************/
947 /* compute total log-likelihood
948 input: likelihoods for each site and non-zero rate
949 output: total log-likelihood (incl. zero rate category) */
950 double comptotloglkl(dmatrix cdl)
953 double loglkl, fv, fv2, sitelkl;
957 fv2 = (1.0-fracinv)/(double) numcats;
961 for (k = 0; k < Numptrn; k++) {
963 /* compute likelihood for pattern k */
964 sitelkl = cdl[0][k]*fv;
965 if (constpat[k] == TRUE)
966 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
968 /* total log-likelihood */
969 loglkl += log(sitelkl)*Weight[k];
975 for (k = 0; k < Numptrn; k++) {
977 /* this general routine works always but it's better
978 to run it only when it's really necessary */
980 /* compute likelihood for pattern k */
982 for (r = 0; r < numcats; r++)
983 sitelkl += cdl[r][k];
984 sitelkl = fv2*sitelkl;
985 if (constpat[k] == TRUE)
986 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
988 /* total log-likelihood */
989 loglkl += log(sitelkl)*Weight[k];
999 /* computes the site log-likelihoods
1000 input: likelihoods for each site and non-zero rate
1001 output: log-likelihood for each site */
1002 void allsitelkl(dmatrix cdl, dvector aslkl)
1005 double fv, fv2, sitelkl;
1008 fv2 = (1.0-fracinv)/(double) numcats;
1012 for (k = 0; k < Numptrn; k++) {
1014 /* compute likelihood for pattern k */
1015 sitelkl = cdl[0][k]*fv;
1016 if (constpat[k] == TRUE)
1017 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1019 /* site log-likelihood */
1020 aslkl[k] = log(sitelkl);
1025 for (k = 0; k < Numptrn; k++) {
1027 /* this general routine works always but it's better
1028 to run it only when it's really necessary */
1030 /* compute likelihood for pattern k */
1032 for (r = 0; r < numcats; r++)
1033 sitelkl += cdl[r][k];
1034 sitelkl = fv2*sitelkl;
1035 if (constpat[k] == TRUE)
1036 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1038 /* total log-likelihood */
1039 aslkl[k] = log(sitelkl);
1046 /***************************** internal functions *****************************/
1048 /* compute negative log-likelihood of distance arc between sequences seqchi/j */
1049 double pairlkl(double arc)
1052 double loglkl, fv, sitelkl;
1056 for (r = 0; r < numcats; r++)
1057 /* compute tpm for rate category r */
1058 tprobmtrx(arc*Rates[r], ltprobr[r]);
1065 for (k = 0; k < Numptrn; k++) {
1067 /* compute likelihood for site k */
1070 if (ci != tpmradix && cj != tpmradix)
1071 sitelkl = ltprobr[0][ci][cj]*fv;
1074 if (ci == cj && ci != tpmradix)
1075 sitelkl += fracinv*Freqtpm[ci];
1077 /* total log-likelihood */
1078 loglkl += log(sitelkl)*Weight[k];
1084 for (k = 0; k < Numptrn; k++) {
1086 /* this general routine works always but it's better
1087 to run it only when it's really necessary */
1089 /* compute likelihood for site k */
1092 if (ci != tpmradix && cj != tpmradix) {
1094 for (r = 0; r < numcats; r++)
1095 sitelkl += ltprobr[r][ci][cj];
1096 sitelkl = fv*sitelkl/(double) numcats;
1099 if (ci == cj && ci != tpmradix)
1100 sitelkl += fracinv*Freqtpm[ci];
1102 /* total log-likelihood */
1103 loglkl += log(sitelkl)*Weight[k];
1109 /* return negative log-likelihood as we use a minimizing procedure */
1114 /***************************** exported functions *****************************/
1117 /* maximum likelihood distance between sequence i and j */
1118 double mldistance(int i, int j)
1120 double dist, fx, f2x;
1122 if (i == j) return 0.0;
1124 /* use old distance as start value */
1125 dist = Distanmat[i][j];
1127 if (dist == 0.0) return 0.0;
1132 if (dist <= MINARC) dist = MINARC+1.0;
1133 if (dist >= MAXARC) dist = MAXARC-1.0;
1135 dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x);
1141 /* initialize distance matrix */
1144 int i, j, k, diff, x, y;
1147 for (i = 0; i < Maxspc; i++) {
1148 Distanmat[i][i] = 0.0;
1149 for (j = i + 1; j < Maxspc; j++) {
1153 /* count observed differences */
1155 for (k = 0; k < Numptrn; k++) {
1164 Distanmat[i][j] = 0.0;
1166 /* use generalized JC correction to get first estimate
1167 (for the SH model the observed distance is used) */
1168 /* observed distance */
1169 obs = (double) diff / (double) Maxsite;
1170 temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1171 if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1172 /* use JC corrected distance */
1173 Distanmat[i][j] = -100.0*(tpmradix-1.0)/tpmradix * log(temp);
1175 /* use observed distance */
1176 Distanmat[i][j] = obs * 100.0;
1177 if (Distanmat[i][j] < MINARC) Distanmat[i][j] = MINARC;
1178 if (Distanmat[i][j] > MAXARC) Distanmat[i][j] = MAXARC;
1180 Distanmat[j][i] = Distanmat[i][j];
1185 /* compute distance matrix */
1186 void computedistan()
1190 for (i = 0; i < Maxspc; i++)
1191 for (j = i; j < Maxspc; j++) {
1192 Distanmat[i][j] = mldistance(i, j);
1193 Distanmat[j][i] = Distanmat[i][j];
1198 /******************************************************************************/
1199 /* computation of maximum likelihood edge lengths for a given tree */
1200 /******************************************************************************/
1203 /***************************** internal functions *****************************/
1206 /* multiply partial likelihoods */
1207 void productpartials(Node *op)
1215 while (cp->isop->isop != op) {
1218 for (r = 0; r < numcats; r++)
1219 for (i = 0; i < Numptrn; i++)
1220 for (j = 0; j < tpmradix; j++)
1221 opc[r][i][j] *= cpc[r][i][j];
1226 /* compute internal partial likelihoods */
1227 void partialsinternal(Node *op)
1233 if (clockmode == 1) { /* clocklike branch lengths */
1234 for (r = 0; r < numcats; r++) {
1235 tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1237 } else { /* non-clocklike branch lengths */
1238 for (r = 0; r < numcats; r++) {
1239 tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1243 oprob = op->partials;
1244 cprob = op->kinp->isop->partials;
1245 for (r = 0; r < numcats; r++) {
1246 for (k = 0; k < Numptrn; k++) {
1247 for (i = 0; i < tpmradix; i++) {
1249 for (j = 0; j < tpmradix; j++)
1250 sum += ltprobr[r][i][j] * cprob[r][k][j];
1251 oprob[r][k][i] = sum;
1258 /* compute external partial likelihoods */
1259 void partialsexternal(Node *op)
1265 if (clockmode == 1) { /* clocklike branch lengths */
1266 for (r = 0; r < numcats; r++) {
1267 tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1269 } else { /* nonclocklike branch lengths */
1270 for (r = 0; r < numcats; r++) {
1271 tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1275 oprob = op->partials;
1276 dseqi = op->kinp->eprob;
1277 for (r = 0; r < numcats; r++) {
1278 for (k = 0; k < Numptrn; k++) {
1279 if ((j = dseqi[k]) == tpmradix) {
1280 for (i = 0; i < tpmradix; i++)
1281 oprob[r][k][i] = 1.0;
1283 for (i = 0; i < tpmradix; i++)
1284 oprob[r][k][i] = ltprobr[r][i][j];
1291 /* compute all partial likelihoods */
1292 void initpartials(Tree *tr)
1296 cp = rp = tr->rootp;
1298 cp = cp->isop->kinp;
1299 if (cp->isop == NULL) { /* external node */
1300 cp = cp->kinp; /* not descen */
1301 partialsexternal(cp);
1302 } else { /* internal node */
1304 productpartials(cp->kinp->isop);
1305 partialsinternal(cp);
1312 /* compute log-likelihood given internal branch with length arc
1313 between partials partiali and partials partialj */
1314 double intlkl(double arc)
1320 cdl = Ctree->condlkl;
1321 for (r = 0; r < numcats; r++) {
1322 tprobmtrx(arc*Rates[r], ltprobr[r]);
1324 for (r = 0; r < numcats; r++) {
1325 for (s = 0; s < Numptrn; s++) {
1327 for (i = 0; i < tpmradix; i++) {
1329 for (j = 0; j < tpmradix; j++)
1330 slk += partialj[r][s][j] * ltprobr[r][i][j];
1331 sumlk += Freqtpm[i] * partiali[r][s][i] * slk;
1337 /* compute total log-likelihood for current tree */
1338 Ctree->lklhd = comptotloglkl(cdl);
1340 return -(Ctree->lklhd); /* we use a minimizing procedure */
1344 /* optimize internal branch */
1345 void optinternalbranch(Node *op)
1347 double arc, fx, f2x;
1349 partiali = op->isop->partials;
1350 partialj = op->kinp->isop->partials;
1351 arc = op->length; /* nonclocklike branch lengths */
1352 if (arc <= MINARC) arc = MINARC+1.0;
1353 if (arc >= MAXARC) arc = MAXARC-1.0;
1354 arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x);
1355 op->kinp->length = arc;
1358 /* variance of branch length */
1360 if (1.0/(MAXARC*MAXARC) < f2x)
1361 op->varlen = 1.0/f2x;
1363 op->varlen = MAXARC*MAXARC;
1367 /* compute log-likelihood given external branch with length arc
1368 between partials partiali and sequence seqchi */
1369 double extlkl(double arc)
1376 cdl = Ctree->condlkl;
1377 for (r = 0; r < numcats; r++) {
1378 tprobmtrx(arc*Rates[r], ltprobr[r]);
1380 for (r = 0; r < numcats; r++) {
1381 for (s = 0; s < Numptrn; s++) {
1382 opb = partiali[r][s];
1384 if ((j = seqchi[s]) != tpmradix) {
1385 for (i = 0; i < tpmradix; i++)
1386 sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1388 for (i = 0; i < tpmradix; i++)
1389 sumlk += Freqtpm[i] * opb[i];
1395 /* compute total log-likelihood for current tree */
1396 Ctree->lklhd = comptotloglkl(cdl);
1398 return -(Ctree->lklhd); /* we use a minimizing procedure */
1401 /* optimize external branch */
1402 void optexternalbranch(Node *op)
1404 double arc, fx, f2x;
1406 partiali = op->isop->partials;
1407 seqchi = op->kinp->eprob;
1408 arc = op->length; /* nonclocklike branch lengths */
1409 if (arc <= MINARC) arc = MINARC+1.0;
1410 if (arc >= MAXARC) arc = MAXARC-1.0;
1411 arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x);
1412 op->kinp->length = arc;
1415 /* variance of branch length */
1417 if (1.0/(MAXARC*MAXARC) < f2x)
1418 op->varlen = 1.0/f2x;
1420 op->varlen = MAXARC*MAXARC;
1424 /* finish likelihoods for each rate and site */
1425 void finishlkl(Node *op)
1428 double arc, sumlk, slk;
1431 partiali = op->isop->partials;
1432 partialj = op->kinp->isop->partials;
1433 cdl = Ctree->condlkl;
1434 arc = op->length; /* nonclocklike branch lengths */
1435 for (r = 0; r < numcats; r++) {
1436 tprobmtrx(arc*Rates[r], ltprobr[r]);
1438 for (r = 0; r < numcats; r++) {
1439 for (k = 0; k < Numptrn; k++) {
1441 for (i = 0; i < tpmradix; i++) {
1443 for (j = 0; j < tpmradix; j++)
1444 slk += partialj[r][k][j] * ltprobr[r][i][j];
1445 sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1453 /***************************** exported functions *****************************/
1456 /* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1457 double optlkl(Tree *tr)
1463 clockmode = 0; /* nonclocklike branch lengths */
1467 for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1469 cp = rp = tr->rootp;
1471 cp = cp->isop->kinp;
1472 productpartials(cp->kinp->isop);
1473 if (cp->isop == NULL) { /* external node */
1474 cp = cp->kinp; /* not descen */
1476 lendiff = cp->length;
1477 optexternalbranch(cp);
1478 lendiff = fabs(lendiff - cp->length);
1479 if (lendiff < EPSILON) nconv++;
1482 partialsexternal(cp);
1483 } else { /* internal node */
1485 partialsinternal(cp);
1488 lendiff = cp->length;
1489 optinternalbranch(cp);
1490 lendiff = fabs(lendiff - cp->length);
1491 if (lendiff < EPSILON) nconv++;
1494 /* eventually compute likelihoods for each site */
1495 if ((cp->number == Numibrnch-1 && lendiff < EPSILON) ||
1496 Numit == MAXIT-1) finishlkl(cp);
1498 partialsinternal(cp);
1501 if (nconv >= Numbrnch) { /* convergence */
1503 cp = rp; /* get out of here */
1508 /* compute total log-likelihood for current tree */
1509 return comptotloglkl(tr->condlkl);
1513 /* compute likelihood of tree for given branch lengths */
1514 double treelkl(Tree *tr)
1522 /* compute for each site and rate log-likelihoods */
1525 productpartials(cp->isop);
1526 prob1 = cp->partials;
1527 prob2 = cp->isop->partials;
1529 for (r = 0; r < numcats; r++) {
1530 for (k = 0; k < Numptrn; k++) {
1532 for (i = 0; i < tpmradix; i++)
1533 sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1538 /* return total log-likelihood for current tree */
1539 return comptotloglkl(cdl);
1543 /******************************************************************************/
1544 /* least-squares estimate of branch lengths */
1545 /******************************************************************************/
1548 /***************************** internal functions *****************************/
1551 void luequation(dmatrix amat, dvector yvec, int size)
1553 double eps = 1.0e-20; /* ! */
1554 int i, j, k, l, maxi=0, idx;
1555 double sum, tmp, maxb, aw;
1560 wk = new_dvector(size);
1561 index = new_ivector(size);
1563 for (i = 0; i < size; i++) {
1565 for (j = 0; j < size; j++) {
1566 if (fabs(amat[i][j]) > maxb)
1567 maxb = fabs(amat[i][j]);
1570 /* Singular matrix */
1571 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1576 for (j = 0; j < size; j++) {
1577 for (i = 0; i < j; i++) {
1579 for (k = 0; k < i; k++)
1580 sum -= amat[i][k] * amat[k][j];
1584 for (i = j; i < size; i++) {
1586 for (k = 0; k < j; k++)
1587 sum -= amat[i][k] * amat[k][j];
1589 tmp = wk[i] * fabs(sum);
1596 for (k = 0; k < size; k++) {
1597 tmp = amat[maxi][k];
1598 amat[maxi][k] = amat[j][k];
1605 if (amat[j][j] == 0.0)
1607 if (j != size - 1) {
1608 tmp = 1.0 / amat[j][j];
1609 for (i = j + 1; i < size; i++)
1614 for (i = 0; i < size; i++) {
1617 yvec[idx] = yvec[i];
1619 for (j = l; j < i; j++)
1620 sum -= amat[i][j] * yvec[j];
1621 } else if (sum != 0.0)
1625 for (i = size - 1; i >= 0; i--) {
1627 for (j = i + 1; j < size; j++)
1628 sum -= amat[i][j] * yvec[j];
1629 yvec[i] = sum / amat[i][i];
1631 free_ivector(index);
1636 /* least square estimation of branch lengths
1637 used for the approximate ML and as starting point
1638 in the calculation of the exact value of the ML */
1639 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1641 int i, i1, j, j1, j2, k, numbrnch, numpair;
1642 double sum, leng, alllen, rss;
1644 dmatrix atmt, atamt;
1647 numbrnch = numspc + numibrnch;
1648 numpair = (numspc * (numspc - 1)) / 2;
1649 atmt = new_dmatrix(numbrnch, numpair);
1650 atamt = new_dmatrix(numbrnch, numbrnch);
1653 for (i = 0; i < numspc; i++) {
1654 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1656 for (j2 = 0; j2 < j1; j2++, j++) {
1660 for (j2 = 0; j2 < j1; j2++, j++) {
1669 for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
1670 pths = ibp[i1]->paths;
1671 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1672 for (j2 = 0; j2 < j1; j2++, j++) {
1673 if (pths[j1] != pths[j2])
1680 for (i = 0; i < numbrnch; i++) {
1681 for (j = 0; j <= i; j++) {
1682 for (k = 0, sum = 0.0; k < numpair; k++)
1683 sum += atmt[i][k] * atmt[j][k];
1688 for (i = 0; i < numbrnch; i++) {
1689 for (k = 0, sum = 0.0; k < numpair; k++)
1690 sum += atmt[i][k] * distanvec[k];
1693 luequation(atamt, Brnlength, numbrnch);
1694 for (i = 0, rss = 0.0; i < numpair; i++) {
1696 for (j = 0; j < numbrnch; j++) {
1697 if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
1698 sum -= Brnlength[j];
1702 tr->rssleast = sqrt(rss);
1704 for (i = 0; i < numspc; i++) {
1705 leng = Brnlength[i];
1707 if (leng < MINARC) leng = MINARC;
1708 if (leng > MAXARC) leng = MAXARC;
1709 if (clockmode) { /* clock */
1710 ebp[i]->lengthc = leng;
1711 ebp[i]->kinp->lengthc = leng;
1712 } else { /* no clock */
1713 ebp[i]->length = leng;
1714 ebp[i]->kinp->length = leng;
1716 Brnlength[i] = leng;
1718 for (i = 0, j = numspc; i < numibrnch; i++, j++) {
1719 leng = Brnlength[j];
1721 if (leng < MINARC) leng = MINARC;
1722 if (leng > MAXARC) leng = MAXARC;
1723 if (clockmode) { /* clock */
1724 ibp[i]->lengthc = leng;
1725 ibp[i]->kinp->lengthc = leng;
1726 } else { /* no clock */
1727 ibp[i]->length = leng;
1728 ibp[i]->kinp->length = leng;
1730 Brnlength[j] = leng;
1733 free_dmatrix(atamt);