initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_dqo / src / ml1.c
1 /*
2  * ml1.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 /******************************************************************************/
17 /* definitions and prototypes                                                 */
18 /******************************************************************************/
19
20 #define EXTERN extern
21
22 /* prototypes */
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 #include <ctype.h>
27 #include "util.h"
28 #include "ml.h"
29
30 #define STDOUT     stdout
31 #ifndef PARALLEL             /* because printf() runs significantly faster */
32                              /* than fprintf(stdout) on an Apple McIntosh  */
33                              /* (HS) */
34 #       define FPRINTF    printf
35 #       define STDOUTFILE
36 #else
37 #       define FPRINTF    fprintf
38 #       define STDOUTFILE STDOUT,
39 #endif
40
41
42 /******************************************************************************/
43 /* compacting sequence data information                                       */
44 /******************************************************************************/
45
46
47 /***************************** internal functions *****************************/
48
49
50 /* make all frequencies a little different */
51 void convfreq(dvector freqemp)
52 {
53         int i, j, maxi=0;
54         double freq, maxfreq, sum;
55
56
57         sum = 0.0;
58         maxfreq = 0.0;
59         for (i = 0; i < tpmradix; i++) {
60                 freq = freqemp[i];
61                 if (freq < MINFREQ) freqemp[i] = MINFREQ;
62                 if (freq > maxfreq) {
63                         maxfreq = freq;
64                         maxi = i;
65                 }
66                 sum += freqemp[i];
67         }
68         freqemp[maxi] += 1.0 - sum;
69         
70         for (i = 0; i < tpmradix - 1; i++) {
71                 for (j = i + 1; j < tpmradix; j++) {
72                         if (freqemp[i] == freqemp[j]) {
73                                 freqemp[i] += MINFDIFF/2.0;
74                                 freqemp[j] -= MINFDIFF/2.0;
75                         }
76                 }
77         }
78 }
79
80 /* sort site patters of original input data */
81 void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
82         int *numptrn)
83 {
84         int i, j, k, l, n, pass;
85         int *awork;
86         int *count;
87
88
89         awork = new_ivector(maxsite);
90         count = new_ivector(tpmradix+1);
91         for (i = 0; i < maxsite; i++)
92                 ali[i] = i;
93         for (pass = maxspc - 1; pass >= 0; pass--) {
94                 for (j = 0; j < tpmradix+1; j++)
95                         count[j] = 0;
96                 for (i = 0; i < maxsite; i++)
97                         count[(int) seqchar[pass][ali[i]]]++;
98                 for (j = 1; j < tpmradix+1; j++)
99                         count[j] += count[j-1];
100                 for (i = maxsite-1; i >= 0; i--)
101                         awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
102                 for (i = 0; i < maxsite; i++)
103                         ali[i] = awork[i];
104         }
105         free_ivector(awork);
106         free_ivector(count);
107         n = 1;
108         for (j = 1; j < maxsite; j++) {
109                 k = ali[j];
110                 l = ali[j-1];
111                 for (i = 0; i < maxspc; i++) {
112                         if (seqchar[i][l] != seqchar[i][k]) {
113                                 n++;
114                                 break;
115                         }
116                 }
117         }
118         *numptrn = n;
119 }
120
121
122 void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
123         ivector weight, int maxspc, int maxsite, int numptrn)
124 {
125         int i, j, k, n;
126         int agree_flag; /* boolean */
127
128
129         n = 0;
130         k = ali[n];
131         for (i = 0; i < maxspc; i++) {
132                 seqconint[i][n] = seqchar[i][k];
133         }
134         weight[n] = 1;
135         Alias[k] = 0;
136         for (j = 1; j < maxsite; j++) {
137                 k = ali[j];
138                 agree_flag = TRUE;
139                 for (i = 0; i < maxspc; i++) {
140                         if (seqconint[i][n] != seqchar[i][k]) {
141                                 agree_flag = FALSE;
142                                 break;
143                         }
144                 }
145                 if (agree_flag == FALSE) {
146                         n++;
147                         for (i = 0; i < maxspc; i++) {
148                                 seqconint[i][n] = seqchar[i][k];
149                         }
150                         weight[n] = 1;
151                         Alias[k] = n;
152                 } else {
153                         weight[n]++;
154                         Alias[k] = n;
155                 }
156         }
157         n++;
158         if (numptrn != n) {
159                 /* Problem in condenceseq */
160                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
161                 exit(1);
162         }
163 }
164
165 void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
166         int *numconst, int *numconstpat)
167 {
168         int character, s, i, constflag;
169
170         *numconst    = 0;
171         *numconstpat = 0;
172         for (s = 0; s < numptrn; s++) { /* check all patterns */
173                 constpat[s] = FALSE;
174                 constflag = TRUE;
175                 character = seqpat[0][s];
176                 for (i = 1; i < maxspc; i++) {
177                         if (seqpat[i][s] != character) {
178                                 constflag = FALSE;
179                                 break;
180                         }
181                 }
182                 if (character != tpmradix && constflag) {
183                         (*numconst) = (*numconst) + weight[s];
184                         (*numconstpat)++;
185                         constpat[s] = TRUE;
186                 }
187         }
188 }
189
190 /***************************** exported functions *****************************/
191
192
193 void evaluateseqs()
194 {       
195         ivector ali;
196
197         convfreq(Freqtpm); /* make all frequencies slightly different */
198         ali = new_ivector(Maxsite);
199         radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
200         Seqpat = new_cmatrix(Maxspc, Numptrn);
201         constpat = new_ivector(Numptrn);
202         Weight = new_ivector(Numptrn);
203         condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
204         free_ivector(ali);
205         countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
206         fracconstpat = (double) Numconstpat / (double) Numptrn; 
207         fracconst    = (double) Numconst / (double) Maxsite;    
208 }
209
210
211 /******************************************************************************/
212 /* computation of Pij(t)                                                      */
213 /******************************************************************************/
214
215
216 /***************************** internal functions *****************************/
217
218
219 void elmhes(dmatrix a, ivector ordr, int n)
220 {
221         int m, j, i;
222         double y, x;
223
224
225         for (i = 0; i < n; i++)
226                 ordr[i] = 0;
227         for (m = 2; m < n; m++) {
228                 x = 0.0;
229                 i = m;
230                 for (j = m; j <= n; j++) {
231                         if (fabs(a[j - 1][m - 2]) > fabs(x)) {
232                                 x = a[j - 1][m - 2];
233                                 i = j;
234                         }
235                 }
236                 ordr[m - 1] = i;      /* vector */
237                 if (i != m) {
238                         for (j = m - 2; j < n; j++) {
239                                 y = a[i - 1][j];
240                                 a[i - 1][j] = a[m - 1][j];
241                                 a[m - 1][j] = y;
242                         }
243                         for (j = 0; j < n; j++) {
244                                 y = a[j][i - 1];
245                                 a[j][i - 1] = a[j][m - 1];
246                                 a[j][m - 1] = y;
247                         }
248                 }
249                 if (x != 0.0) {
250                         for (i = m; i < n; i++) {
251                                 y = a[i][m - 2];
252                                 if (y != 0.0) {
253                                         y /= x;
254                                         a[i][m - 2] = y;
255                                         for (j = m - 1; j < n; j++)
256                                                 a[i][j] -= y * a[m - 1][j];
257                                         for (j = 0; j < n; j++)
258                                                 a[j][m - 1] += y * a[j][i];
259                                 }
260                         }
261                 }
262         }
263 }
264
265
266 void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
267 {
268         int i, j, m;
269
270
271         for (i = 0; i < n; i++) {
272                 for (j = i + 1; j < n; j++) {
273                         zz[i][j] = 0.0;
274                         zz[j][i] = 0.0;
275                 }
276                 zz[i][i] = 1.0;
277         }
278         if (n <= 2)
279                 return;
280         for (m = n - 1; m >= 2; m--) {
281                 for (i = m; i < n; i++)
282                         zz[i][m - 1] = a[i][m - 2];
283                 i = ordr[m - 1];
284                 if (i != m) {
285                         for (j = m - 1; j < n; j++) {
286                                 zz[m - 1][j] = zz[i - 1][j];
287                                 zz[i - 1][j] = 0.0;
288                         }
289                         zz[i - 1][m - 1] = 1.0;
290                 }
291         }
292 }
293
294
295 void mcdiv(double ar, double ai, double br, double bi,
296         double *cr, double *ci)
297 {
298         double s, ars, ais, brs, bis;
299
300
301         s = fabs(br) + fabs(bi);
302         ars = ar / s;
303         ais = ai / s;
304         brs = br / s;
305         bis = bi / s;
306         s = brs * brs + bis * bis;
307         *cr = (ars * brs + ais * bis) / s;
308         *ci = (ais * brs - ars * bis) / s;
309 }
310
311
312 void hqr2(int n, int low, int hgh, dmatrix h,
313         dmatrix zz, dvector wr, dvector wi)
314 {
315         int i, j, k, l=0, m, en, na, itn, its;
316         double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
317         int notlas; /* boolean */
318
319
320         norm = 0.0;
321         k = 1;
322         /* store isolated roots and compute matrix norm */
323         for (i = 0; i < n; i++) {
324                 for (j = k - 1; j < n; j++)
325                         norm += fabs(h[i][j]);
326                 k = i + 1;
327                 if (i + 1 < low || i + 1 > hgh) {
328                         wr[i] = h[i][i];
329                         wi[i] = 0.0;
330                 }
331         }
332         en = hgh;
333         t = 0.0;
334         itn = n * 30;
335         while (en >= low) {     /* search for next eigenvalues */
336                 its = 0;
337                 na = en - 1;
338                 while (en >= 1) {
339                         /* look for single small sub-diagonal element */
340                         for (l = en; l > low; l--) {
341                                 s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
342                                 if (s == 0.0)
343                                         s = norm;
344                                 tst1 = s;
345                                 tst2 = tst1 + fabs(h[l - 1][l - 2]);
346                                 if (tst2 == tst1)
347                                         goto L100;
348                         }
349                         l = low;
350         L100:
351                         x = h[en - 1][en - 1];  /* form shift */
352                         if (l == en || l == na)
353                                 break;
354                         if (itn == 0) {
355                                 /* all eigenvalues have not converged */
356                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
357                                 exit(1);
358                         }
359                         y = h[na - 1][na - 1];
360                         w = h[en - 1][na - 1] * h[na - 1][en - 1];
361                         /* form exceptional shift */
362                         if (its == 10 || its == 20) {
363                                 t += x;
364                                 for (i = low - 1; i < en; i++)
365                                         h[i][i] -= x;
366                                 s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
367                                 x = 0.75 * s;
368                                 y = x;
369                                 w = -0.4375 * s * s;
370                         }
371                         its++;
372                         itn--;
373                         /* look for two consecutive small sub-diagonal elements */
374                         for (m = en - 2; m >= l; m--) {
375                                 z = h[m - 1][m - 1];
376                                 r = x - z;
377                                 s = y - z;
378                                 p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
379                                 q = h[m][m] - z - r - s;
380                                 r = h[m + 1][m];
381                                 s = fabs(p) + fabs(q) + fabs(r);
382                                 p /= s;
383                                 q /= s;
384                                 r /= s;
385                                 if (m == l)
386                                         break;
387                                 tst1 = fabs(p) *
388                                                 (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
389                                 tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
390                                 if (tst2 == tst1)
391                                         break;
392                         }
393                         for (i = m + 2; i <= en; i++) {
394                                 h[i - 1][i - 3] = 0.0;
395                                 if (i != m + 2)
396                                         h[i - 1][i - 4] = 0.0;
397                         }
398                         for (k = m; k <= na; k++) {
399                                 notlas = (k != na);
400                                 if (k != m) {
401                                         p = h[k - 1][k - 2];
402                                         q = h[k][k - 2];
403                                         r = 0.0;
404                                         if (notlas)
405                                                 r = h[k + 1][k - 2];
406                                         x = fabs(p) + fabs(q) + fabs(r);
407                                         if (x != 0.0) {
408                                                 p /= x;
409                                                 q /= x;
410                                                 r /= x;
411                                         }
412                                 }
413                                 if (x != 0.0) {
414                                         if (p < 0.0) /* sign */
415                                                 s = - sqrt(p * p + q * q + r * r);
416                                         else
417                                                 s = sqrt(p * p + q * q + r * r);
418                                         if (k != m)
419                                                 h[k - 1][k - 2] = -s * x;
420                                         else {
421                                                 if (l != m)
422                                                         h[k - 1][k - 2] = -h[k - 1][k - 2];
423                                         }
424                                         p += s;
425                                         x = p / s;
426                                         y = q / s;
427                                         z = r / s;
428                                         q /= p;
429                                         r /= p;
430                                         if (!notlas) {
431                                                 for (j = k - 1; j < n; j++) {   /* row modification */
432                                                         p = h[k - 1][j] + q * h[k][j];
433                                                         h[k - 1][j] -= p * x;
434                                                         h[k][j] -= p * y;
435                                                 }
436                                                 j = (en < (k + 3)) ? en : (k + 3); /* min */
437                                                 for (i = 0; i < j; i++) {       /* column modification */
438                                                         p = x * h[i][k - 1] + y * h[i][k];
439                                                         h[i][k - 1] -= p;
440                                                         h[i][k] -= p * q;
441                                                 }
442                                                 /* accumulate transformations */
443                                                 for (i = low - 1; i < hgh; i++) {
444                                                         p = x * zz[i][k - 1] + y * zz[i][k];
445                                                         zz[i][k - 1] -= p;
446                                                         zz[i][k] -= p * q;
447                                                 }
448                                         } else {
449                                                 for (j = k - 1; j < n; j++) {   /* row modification */
450                                                         p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
451                                                         h[k - 1][j] -= p * x;
452                                                         h[k][j] -= p * y;
453                                                         h[k + 1][j] -= p * z;
454                                                 }
455                                                 j = (en < (k + 3)) ? en : (k + 3); /* min */
456                                                 for (i = 0; i < j; i++) {       /* column modification */
457                                                         p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
458                                                         h[i][k - 1] -= p;
459                                                         h[i][k] -= p * q;
460                                                         h[i][k + 1] -= p * r;
461                                                 }
462                                                 /* accumulate transformations */
463                                                 for (i = low - 1; i < hgh; i++) {
464                                                         p = x * zz[i][k - 1] + y * zz[i][k] +
465                                                                 z * zz[i][k + 1];
466                                                         zz[i][k - 1] -= p;
467                                                         zz[i][k] -= p * q;
468                                                         zz[i][k + 1] -= p * r;
469                                                 }
470                                         }
471                                 }
472                         }              /* for k */
473                 }                      /* while infinite loop */
474                 if (l == en) {         /* one root found */
475                         h[en - 1][en - 1] = x + t;
476                         wr[en - 1] = h[en - 1][en - 1];
477                         wi[en - 1] = 0.0;
478                         en = na;
479                         continue;
480                 }
481                 y = h[na - 1][na - 1];
482                 w = h[en - 1][na - 1] * h[na - 1][en - 1];
483                 p = (y - x) / 2.0;
484                 q = p * p + w;
485                 z = sqrt(fabs(q));
486                 h[en - 1][en - 1] = x + t;
487                 x = h[en - 1][en - 1];
488                 h[na - 1][na - 1] = y + t;
489                 if (q >= 0.0) {        /* real pair */
490                         if (p < 0.0) /* sign */
491                                 z = p - fabs(z);
492                         else
493                                 z = p + fabs(z);
494                         wr[na - 1] = x + z;
495                         wr[en - 1] = wr[na - 1];
496                         if (z != 0.0)
497                                 wr[en - 1] = x - w / z;
498                         wi[na - 1] = 0.0;
499                         wi[en - 1] = 0.0;
500                         x = h[en - 1][na - 1];
501                         s = fabs(x) + fabs(z);
502                         p = x / s;
503                         q = z / s;
504                         r = sqrt(p * p + q * q);
505                         p /= r;
506                         q /= r;
507                         for (j = na - 1; j < n; j++) {  /* row modification */
508                                 z = h[na - 1][j];
509                                 h[na - 1][j] = q * z + p * h[en - 1][j];
510                                 h[en - 1][j] = q * h[en - 1][j] - p * z;
511                         }
512                         for (i = 0; i < en; i++) {      /* column modification */
513                                 z = h[i][na - 1];
514                                 h[i][na - 1] = q * z + p * h[i][en - 1];
515                                 h[i][en - 1] = q * h[i][en - 1] - p * z;
516                         }
517                         /* accumulate transformations */
518                         for (i = low - 1; i < hgh; i++) {
519                                 z = zz[i][na - 1];
520                                 zz[i][na - 1] = q * z + p * zz[i][en - 1];
521                                 zz[i][en - 1] = q * zz[i][en - 1] - p * z;
522                         }
523                 } else {               /* complex pair */
524                         wr[na - 1] = x + p;
525                         wr[en - 1] = x + p;
526                         wi[na - 1] = z;
527                         wi[en - 1] = -z;
528                 }
529                 en -= 2;
530         }                              /* while en >= low */
531         /* backsubstitute to find vectors of upper triangular form */
532         if (norm != 0.0) {
533                 for (en = n; en >= 1; en--) {
534                         p = wr[en - 1];
535                         q = wi[en - 1];
536                         na = en - 1;
537                         if (q == 0.0) {/* real vector */
538                                 m = en;
539                                 h[en - 1][en - 1] = 1.0;
540                                 if (na != 0) {
541                                         for (i = en - 2; i >= 0; i--) {
542                                                 w = h[i][i] - p;
543                                                 r = 0.0;
544                                                 for (j = m - 1; j < en; j++)
545                                                         r += h[i][j] * h[j][en - 1];
546                                                 if (wi[i] < 0.0) {
547                                                         z = w;
548                                                         s = r;
549                                                 } else {
550                                                         m = i + 1;
551                                                         if (wi[i] == 0.0) {
552                                                                 t = w;
553                                                                 if (t == 0.0) {
554                                                                         tst1 = norm;
555                                                                         t = tst1;
556                                                                         do {
557                                                                                 t = 0.01 * t;
558                                                                                 tst2 = norm + t;
559                                                                         } while (tst2 > tst1);
560                                                                 }
561                                                                 h[i][en - 1] = -(r / t);
562                                                         } else {        /* solve real equations */
563                                                                 x = h[i][i + 1];
564                                                                 y = h[i + 1][i];
565                                                                 q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
566                                                                 t = (x * s - z * r) / q;
567                                                                 h[i][en - 1] = t;
568                                                                 if (fabs(x) > fabs(z))
569                                                                         h[i + 1][en - 1] = (-r - w * t) / x;
570                                                                 else
571                                                                         h[i + 1][en - 1] = (-s - y * t) / z;
572                                                         }
573                                                         /* overflow control */
574                                                         t = fabs(h[i][en - 1]);
575                                                         if (t != 0.0) {
576                                                                 tst1 = t;
577                                                                 tst2 = tst1 + 1.0 / tst1;
578                                                                 if (tst2 <= tst1) {
579                                                                         for (j = i; j < en; j++)
580                                                                                 h[j][en - 1] /= t;
581                                                                 }
582                                                         }
583                                                 }
584                                         }
585                                 }
586                         } else if (q > 0.0) {
587                                 m = na;
588                                 if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
589                                         h[na - 1][na - 1] = q / h[en - 1][na - 1];
590                                         h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
591                                                                                                                 h[en - 1][na - 1];
592                                 } else
593                                         mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
594                                                 &h[na - 1][na - 1], &h[na - 1][en - 1]);
595                                 h[en - 1][na - 1] = 0.0;
596                                 h[en - 1][en - 1] = 1.0;
597                                 if (en != 2) {
598                                         for (i = en - 3; i >= 0; i--) {
599                                                 w = h[i][i] - p;
600                                                 ra = 0.0;
601                                                 sa = 0.0;
602                                                 for (j = m - 1; j < en; j++) {
603                                                         ra += h[i][j] * h[j][na - 1];
604                                                         sa += h[i][j] * h[j][en - 1];
605                                                 }
606                                                 if (wi[i] < 0.0) {
607                                                         z = w;
608                                                         r = ra;
609                                                         s = sa;
610                                                 } else {
611                                                         m = i + 1;
612                                                         if (wi[i] == 0.0)
613                                                                 mcdiv(-ra, -sa, w, q, &h[i][na - 1],
614                                                                         &h[i][en - 1]);
615                                                         else {  /* solve complex equations */
616                                                                 x = h[i][i + 1];
617                                                                 y = h[i + 1][i];
618                                                                 vr = (wr[i] - p) * (wr[i] - p);
619                                                                 vr = vr + wi[i] * wi[i] - q * q;        
620                                                                 vi = (wr[i] - p) * 2.0 * q;
621                                                                 if (vr == 0.0 && vi == 0.0) {
622                                                                         tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
623                                                                                                    fabs(y) + fabs(z));
624                                                                         vr = tst1;
625                                                                         do {
626                                                                                 vr = 0.01 * vr;
627                                                                                 tst2 = tst1 + vr;
628                                                                         } while (tst2 > tst1);
629                                                                 }
630                                                                 mcdiv(x * r - z * ra + q * sa,
631                                                                          x * s - z * sa - q * ra, vr, vi,
632                                                                      &h[i][na - 1], &h[i][en - 1]);
633                                                                 if (fabs(x) > fabs(z) + fabs(q)) {
634                                                                         h[i + 1]
635                                                                                 [na - 1] = (q * h[i][en - 1] -
636                                                                                                         w * h[i][na - 1] - ra) / x;
637                                                                         h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
638                                                                                                                 q * h[i][na - 1]) / x;
639                                                                 } else
640                                                                         mcdiv(-r - y * h[i][na - 1],
641                                                                                  -s - y * h[i][en - 1], z, q,
642                                                                              &h[i + 1][na - 1], &h[i + 1][en - 1]);
643                                                         }
644                                                         /* overflow control */
645                                                         t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
646                                                                  fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
647                                                         if (t != 0.0) {
648                                                                 tst1 = t;
649                                                                 tst2 = tst1 + 1.0 / tst1;
650                                                                 if (tst2 <= tst1) {
651                                                                         for (j = i; j < en; j++) {
652                                                                                 h[j][na - 1] /= t;
653                                                                                 h[j][en - 1] /= t;
654                                                                         }
655                                                                 }
656                                                         }
657                                                 }
658                                         }
659                                 }
660                         }
661                 }
662                 /* end back substitution. vectors of isolated roots */
663                 for (i = 0; i < n; i++) {
664                         if (i + 1 < low || i + 1 > hgh) {
665                                 for (j = i; j < n; j++)
666                                         zz[i][j] = h[i][j];
667                         }
668                 }
669                 /* multiply by transformation matrix to give vectors of
670                  * original full matrix. */
671                 for (j = n - 1; j >= low - 1; j--) {
672                         m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
673                         for (i = low - 1; i < hgh; i++) {
674                                 z = 0.0;
675                                 for (k = low - 1; k < m; k++)
676                                         z += zz[i][k] * h[k][j];
677                                 zz[i][j] = z;
678                         }
679                 }
680         }
681         return;
682 }
683
684
685 /* make rate matrix with 0.01 expected substitutions per unit time */
686 void onepamratematrix(dmatrix a)
687 {
688         int i, j;
689         double delta, temp, sum;
690         dvector m;
691
692         for (i = 0; i < tpmradix; i++)
693         {
694                 for (j = 0; j < tpmradix; j++)
695                 {
696                         a[i][j] = Freqtpm[j]*a[i][j];
697                 }
698         }
699
700         m = new_dvector(tpmradix);
701         for (i = 0, sum = 0.0; i < tpmradix; i++)
702         {
703                 for (j = 0, temp = 0.0; j < tpmradix; j++)
704                         temp += a[i][j];
705                 m[i] = temp; /* row sum */
706                 sum += temp*Freqtpm[i]; /* exp. rate */
707         }
708         delta = 0.01 / sum; /* 0.01 subst. per unit time */
709         for (i = 0; i < tpmradix; i++) {
710                 for (j = 0; j < tpmradix; j++) {
711                         if (i != j)
712                                 a[i][j] = delta * a[i][j];
713                         else
714                                 a[i][j] = delta * (-m[i]);
715                 }
716         }
717         free_dvector(m);
718 }
719
720
721 void eigensystem(dvector eval, dmatrix evec)
722 {
723         dvector evali, forg;
724         dmatrix a, b;
725         ivector ordr;
726         int i, j, k, error;
727         double zero;
728
729
730         ordr = new_ivector(tpmradix);
731         evali = new_dvector(tpmradix);
732         forg = new_dvector(tpmradix);
733         a = new_dmatrix(tpmradix,tpmradix);
734         b = new_dmatrix(tpmradix,tpmradix);
735
736         rtfdata(a, forg); /* get relative transition matrix and frequencies */
737         
738         onepamratematrix(a); /* make 1 PAM rate matrix */
739         
740         /* copy a to b */
741         for (i = 0; i < tpmradix; i++)
742                 for (j = 0; j < tpmradix; j++)
743                         b[i][j] = a[i][j];
744
745         elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
746         eltran(a, evec, ordr, tpmradix);
747         hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
748         
749         /* check eigenvalue equation */
750         error = FALSE;
751         for (j = 0; j < tpmradix; j++) {
752                 for (i = 0, zero = 0.0; i < tpmradix; i++) {
753                         for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
754                         zero -= eval[j] * evec[i][j];
755                         if (fabs(zero) > 1.0e-5)
756                                 error = TRUE;   
757                 }
758         }
759         if (error)
760                 FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
761
762         free_ivector(ordr);
763         free_dvector(evali);
764         free_dvector(forg);
765         free_dmatrix(a);
766         free_dmatrix(b);
767 }
768
769
770 void luinverse(dmatrix inmat, dmatrix imtrx, int size)
771 {
772     double eps = 1.0e-20; /* ! */
773         int i, j, k, l, maxi=0, idx, ix, jx;
774         double sum, tmp, maxb, aw;
775         ivector index;
776         double *wk;
777         dmatrix omtrx;
778
779         
780         index = new_ivector(tpmradix);
781         omtrx = new_dmatrix(tpmradix,tpmradix);
782         
783         /* copy inmat to omtrx */
784         for (i = 0; i < tpmradix; i++)
785                 for (j = 0; j < tpmradix; j++)
786                         omtrx[i][j] = inmat[i][j];
787         
788         wk = (double *) malloc((unsigned)size * sizeof(double));
789         aw = 1.0;
790         for (i = 0; i < size; i++) {
791                 maxb = 0.0;
792                 for (j = 0; j < size; j++) {
793                         if (fabs(omtrx[i][j]) > maxb)
794                                 maxb = fabs(omtrx[i][j]);
795                 }
796                 if (maxb == 0.0) {
797                         /* Singular matrix */
798                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
799                         exit(1);
800                 }
801                 wk[i] = 1.0 / maxb;
802         }
803         for (j = 0; j < size; j++) {
804                 for (i = 0; i < j; i++) {
805                         sum = omtrx[i][j];
806                         for (k = 0; k < i; k++)
807                                 sum -= omtrx[i][k] * omtrx[k][j];
808                         omtrx[i][j] = sum;
809                 }
810                 maxb = 0.0;
811                 for (i = j; i < size; i++) {
812                         sum = omtrx[i][j];
813                         for (k = 0; k < j; k++)
814                                 sum -= omtrx[i][k] * omtrx[k][j];
815                         omtrx[i][j] = sum;
816                         tmp = wk[i] * fabs(sum);
817                         if (tmp >= maxb) {
818                                 maxb = tmp;
819                                 maxi = i;
820                         }
821                 }
822                 if (j != maxi) {
823                         for (k = 0; k < size; k++) {
824                                 tmp = omtrx[maxi][k];
825                                 omtrx[maxi][k] = omtrx[j][k];
826                                 omtrx[j][k] = tmp;
827                         }
828                         aw = -aw;
829                         wk[maxi] = wk[j];
830                 }
831                 index[j] = maxi;
832                 if (omtrx[j][j] == 0.0)
833                         omtrx[j][j] = eps;
834                 if (j != size - 1) {
835                         tmp = 1.0 / omtrx[j][j];
836                         for (i = j + 1; i < size; i++)
837                                 omtrx[i][j] *= tmp;
838                 }
839         }
840         for (jx = 0; jx < size; jx++) {
841                 for (ix = 0; ix < size; ix++)
842                         wk[ix] = 0.0;
843                 wk[jx] = 1.0;
844                 l = -1;
845                 for (i = 0; i < size; i++) {
846                         idx = index[i];
847                         sum = wk[idx];
848                         wk[idx] = wk[i];
849                         if (l != -1) {
850                                 for (j = l; j < i; j++)
851                                         sum -= omtrx[i][j] * wk[j];
852                         } else if (sum != 0.0)
853                                 l = i;
854                         wk[i] = sum;
855                 }
856                 for (i = size - 1; i >= 0; i--) {
857                         sum = wk[i];
858                         for (j = i + 1; j < size; j++)
859                                 sum -= omtrx[i][j] * wk[j];
860                         wk[i] = sum / omtrx[i][i];
861                 }
862                 for (ix = 0; ix < size; ix++)
863                         imtrx[ix][jx] = wk[ix];
864         }
865         free((char *)wk);
866         wk = NULL;
867         free_ivector(index);
868         free_dmatrix(omtrx);
869 }
870
871
872 void checkevector(dmatrix evec, dmatrix ivec, int nn)
873 {
874         int i, j, ia, ib, ic, error;
875         dmatrix matx;
876         double sum;
877
878
879         matx = new_dmatrix(nn, nn);
880         /* multiply matrix of eigenvectors and its inverse */
881         for (ia = 0; ia < nn; ia++) {
882                 for (ic = 0; ic < nn; ic++) {
883                         sum = 0.0;
884                         for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
885                         matx[ia][ic] = sum;
886                 }
887         }
888         /* check whether the unitary matrix is obtained */
889         error = FALSE;
890         for (i = 0; i < nn; i++) {
891                 for (j = 0; j < nn; j++) {
892                         if (i == j) {
893                                 if (fabs(matx[i][j] - 1.0) > 1.0e-5)
894                                         error = TRUE;
895                         } else {
896                                 if (fabs(matx[i][j]) > 1.0e-5)
897                                         error = TRUE;
898                         }
899                 }
900         }
901         if (error) {
902                 FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
903         }
904         free_dmatrix(matx);
905 }
906
907
908 /***************************** exported functions *****************************/
909
910
911 /* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
912 void tranprobmat()
913 {
914         eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
915         luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
916         checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
917 }
918
919
920 /* compute P(t) */
921 void tprobmtrx(double arc, dmatrix tpr)
922 {
923         register int i, j, k;
924         register double temp;
925
926
927         for (k = 0; k < tpmradix; k++) {
928                 temp = exp(arc * Eval[k]);
929                 for (j = 0; j < tpmradix; j++)
930                         iexp[k][j] = Ievc[k][j] * temp;
931         }
932         for (i = 0; i < tpmradix; i++) {
933                 for (j = 0; j < tpmradix; j++) {
934                         temp = 0.0;
935                         for (k = 0; k < tpmradix; k++)
936                                 temp += Evec[i][k] * iexp[k][j];
937                         tpr[i][j] = fabs(temp);
938                 }
939         }
940 }
941
942
943 /******************************************************************************/
944 /* estimation of maximum likelihood distances                                 */
945 /******************************************************************************/
946
947 /* compute total log-likelihood
948    input: likelihoods for each site and non-zero rate
949    output: total log-likelihood (incl. zero rate category) */
950 double comptotloglkl(dmatrix cdl)
951 {
952         int k, r;
953         double loglkl, fv, fv2, sitelkl;
954
955         loglkl = 0.0;
956         fv = 1.0-fracinv;
957         fv2 = (1.0-fracinv)/(double) numcats;
958         
959         if (numcats == 1) {
960
961                 for (k = 0; k < Numptrn; k++) { 
962                 
963                         /* compute likelihood for pattern k */
964                         sitelkl = cdl[0][k]*fv;
965                         if (constpat[k] == TRUE)
966                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
967                 
968                         /* total log-likelihood */
969                         loglkl += log(sitelkl)*Weight[k];
970                 
971                 }
972
973         } else {
974         
975                 for (k = 0; k < Numptrn; k++) {
976                         
977                         /* this general routine works always but it's better 
978                to run it only when it's really necessary */
979                 
980                         /* compute likelihood for pattern k */
981                         sitelkl = 0.0;
982                         for (r = 0; r < numcats; r++)
983                                 sitelkl += cdl[r][k];
984                         sitelkl = fv2*sitelkl;
985                         if (constpat[k] == TRUE)
986                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
987                 
988                         /* total log-likelihood */
989                         loglkl += log(sitelkl)*Weight[k];
990                 
991                 }
992
993         }
994         
995         return loglkl;
996 }
997
998
999 /* computes the site log-likelihoods 
1000    input: likelihoods for each site and non-zero rate
1001    output: log-likelihood for each site */
1002 void allsitelkl(dmatrix cdl, dvector aslkl)
1003 {
1004         int k, r;
1005         double fv, fv2, sitelkl;
1006
1007         fv = 1.0-fracinv;
1008         fv2 = (1.0-fracinv)/(double) numcats;
1009         
1010         if (numcats == 1) {
1011
1012                 for (k = 0; k < Numptrn; k++) { 
1013                 
1014                         /* compute likelihood for pattern k */
1015                         sitelkl = cdl[0][k]*fv;
1016                         if (constpat[k] == TRUE)
1017                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1018                 
1019                         /* site log-likelihood */
1020                         aslkl[k] = log(sitelkl);
1021                 }
1022
1023         } else {
1024         
1025                 for (k = 0; k < Numptrn; k++) {
1026                         
1027                         /* this general routine works always but it's better 
1028                to run it only when it's really necessary */
1029                 
1030                         /* compute likelihood for pattern k */
1031                         sitelkl = 0.0;
1032                         for (r = 0; r < numcats; r++)
1033                                 sitelkl += cdl[r][k];
1034                         sitelkl = fv2*sitelkl;
1035                         if (constpat[k] == TRUE)
1036                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1037                 
1038                         /* total log-likelihood */
1039                         aslkl[k] = log(sitelkl);
1040                 
1041                 }
1042         }
1043 }
1044
1045
1046 /***************************** internal functions *****************************/
1047
1048 /* compute negative log-likelihood of distance arc between sequences seqchi/j */
1049 double pairlkl(double arc)
1050 {
1051         int k, r, ci, cj;
1052         double loglkl, fv, sitelkl;
1053
1054         
1055         /* compute tpms */
1056         for (r = 0; r < numcats; r++)
1057                 /* compute tpm for rate category r */
1058                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1059
1060         loglkl = 0.0;
1061         fv = 1.0-fracinv;
1062         
1063         if (numcats == 1) {
1064
1065                 for (k = 0; k < Numptrn; k++) { 
1066                 
1067                         /* compute likelihood for site k */
1068                         ci = seqchi[k];
1069                         cj = seqchj[k];
1070                         if (ci != tpmradix && cj != tpmradix)
1071                                 sitelkl = ltprobr[0][ci][cj]*fv;
1072                         else
1073                                 sitelkl = fv;
1074                         if (ci == cj && ci != tpmradix)
1075                                 sitelkl += fracinv*Freqtpm[ci];
1076                 
1077                         /* total log-likelihood */
1078                         loglkl += log(sitelkl)*Weight[k];
1079                 
1080                 }
1081
1082         } else {
1083         
1084                 for (k = 0; k < Numptrn; k++) {
1085                         
1086                         /* this general routine works always but it's better 
1087                to run it only when it's really necessary */
1088                 
1089                         /* compute likelihood for site k */
1090                         ci = seqchi[k];
1091                         cj = seqchj[k];
1092                         if (ci != tpmradix && cj != tpmradix) {
1093                                 sitelkl = 0.0;
1094                                 for (r = 0; r < numcats; r++)
1095                                         sitelkl += ltprobr[r][ci][cj];
1096                                 sitelkl = fv*sitelkl/(double) numcats;  
1097                         } else
1098                                 sitelkl = fv;
1099                         if (ci == cj && ci != tpmradix)
1100                                 sitelkl += fracinv*Freqtpm[ci];
1101                 
1102                         /* total log-likelihood */
1103                         loglkl += log(sitelkl)*Weight[k];
1104         
1105         }
1106
1107     }
1108         
1109         /* return negative log-likelihood as we use a minimizing procedure */
1110         return -loglkl;
1111 }
1112
1113
1114 /***************************** exported functions *****************************/
1115
1116
1117
1118 /******************************************************************************/
1119
1120 /* maximum likelihood distance between sequence i and j */
1121 /* CZ changed 05/16/01 */
1122 double mldistance( int i ) {
1123         double dist, fx, f2x;
1124                 
1125     /* use old distance as start value */       
1126     dist = Distanmat[ i ];
1127
1128     if ( dist == 0.0 ) {
1129         return 0.0;
1130     }
1131
1132         seqchi = Seqpat[ Maxspc - 1 ];
1133         seqchj = Seqpat[ i ];
1134
1135         if (dist <= MINARC) dist = MINARC+1.0;
1136         if (dist >= MAXARC) dist = MAXARC-1.0;
1137         
1138         dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x);
1139         
1140         return dist;
1141 }
1142
1143
1144
1145 /* initialize distance matrix */
1146 /* CZ changed 05/16/01 */
1147 void initdistan() {
1148         int i, k, diff, x, y;
1149         double obs, temp;
1150  
1151         for (i = 0; i < Maxspc - 1 ; i++) {
1152                 
1153                         seqchi = Seqpat[i];
1154                         seqchj = Seqpat[Maxspc - 1];
1155                         
1156                         /* count observed differences */
1157                         diff = 0;
1158                         for (k = 0; k < Numptrn; k++) {
1159                                 x = seqchi[k];
1160                                 y = seqchj[k];
1161                                 if (x != y &&
1162                                         x != tpmradix &&
1163                                         y != tpmradix)
1164                                         diff += Weight[k];
1165                         }
1166                         if (diff == 0)
1167                                 Distanmat[i] = 0.0;
1168                         else {
1169                                 /* use generalized JC correction to get first estimate
1170                                    (for the SH model the observed distance is used) */
1171                                 /* observed distance */
1172                                 obs = (double) diff / (double) Maxsite;
1173                                 temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1174                                 if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1175                                         /* use JC corrected distance */
1176                                         Distanmat[i] = -100.0*(tpmradix-1.0)/tpmradix * log(temp);
1177                                 else
1178                                         /* use observed distance */
1179                                         Distanmat[i] = obs * 100.0;
1180                                 if (Distanmat[i] < MINARC) Distanmat[i] = MINARC;
1181                                 if (Distanmat[i] > MAXARC) Distanmat[i] = MAXARC;
1182                         }
1183         }
1184     
1185 }
1186
1187
1188
1189
1190 /* compute distance matrix */
1191 /* CZ changed 05/16/01 */
1192 void computedistan() {
1193         int i;
1194
1195         for ( i = 0; i < Maxspc - 1; i++ ) {
1196             Distanmat[ i ] = mldistance( i );
1197     }
1198 }
1199
1200
1201 /******************************************************************************/
1202
1203
1204
1205
1206
1207 /******************************************************************************/
1208 /* computation of maximum likelihood edge lengths for a given tree            */
1209 /******************************************************************************/
1210
1211
1212 /***************************** internal functions *****************************/
1213
1214
1215 /* multiply partial likelihoods */
1216 void productpartials(Node *op)
1217 {
1218         Node *cp;
1219         int i, j, r;
1220         dcube opc, cpc;
1221
1222         cp = op;
1223         opc = op->partials;
1224         while (cp->isop->isop != op) {
1225                 cp = cp->isop;
1226                 cpc = cp->partials;
1227                 for (r = 0; r < numcats; r++)
1228                         for (i = 0; i < Numptrn; i++)
1229                                 for (j = 0; j < tpmradix; j++)
1230                                         opc[r][i][j] *= cpc[r][i][j];
1231         }
1232 }
1233
1234
1235 /* compute internal partial likelihoods */
1236 void partialsinternal(Node *op)
1237 {
1238         int i, j, k, r;
1239         double sum;
1240         dcube oprob, cprob;
1241
1242         if (clockmode == 1) { /* clocklike branch lengths */
1243                 for (r = 0; r < numcats; r++) {
1244                         tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1245                 }
1246         } else {  /* non-clocklike branch lengths */
1247                 for (r = 0; r < numcats; r++) {
1248                         tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1249                 }
1250         }
1251         
1252         oprob = op->partials;
1253         cprob = op->kinp->isop->partials;
1254         for (r = 0; r < numcats; r++) {
1255                 for (k = 0; k < Numptrn; k++) {
1256                         for (i = 0; i < tpmradix; i++) {
1257                                 sum = 0.0;
1258                                 for (j = 0; j < tpmradix; j++)
1259                                         sum += ltprobr[r][i][j] * cprob[r][k][j];
1260                                 oprob[r][k][i] = sum;
1261                         }
1262                 }
1263         }
1264 }
1265
1266
1267 /* compute external partial likelihoods */
1268 void partialsexternal(Node *op)
1269 {
1270         int i, j, k, r;
1271         dcube oprob;
1272         cvector dseqi;
1273
1274         if (clockmode == 1) { /* clocklike branch lengths */
1275                 for (r = 0; r < numcats; r++) {
1276                         tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1277                 }
1278         } else {  /* nonclocklike branch lengths */
1279                 for (r = 0; r < numcats; r++) {
1280                         tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1281                 }
1282         }
1283
1284         oprob = op->partials;
1285         dseqi = op->kinp->eprob;
1286         for (r = 0; r < numcats; r++) {
1287                 for (k = 0; k < Numptrn; k++) {
1288                         if ((j = dseqi[k]) == tpmradix) {
1289                                 for (i = 0; i < tpmradix; i++)
1290                                         oprob[r][k][i] = 1.0;
1291                         } else {
1292                                 for (i = 0; i < tpmradix; i++)
1293                                         oprob[r][k][i] = ltprobr[r][i][j];
1294                         }
1295                 }
1296         }
1297 }
1298
1299
1300 /* compute all partial likelihoods */
1301 void initpartials(Tree *tr)
1302 {
1303         Node *cp, *rp;
1304
1305         cp = rp = tr->rootp;
1306         do {
1307                 cp = cp->isop->kinp;
1308                 if (cp->isop == NULL) { /* external node */                     
1309                         cp = cp->kinp; /* not descen */
1310                         partialsexternal(cp);
1311                 } else { /* internal node */    
1312                         if (!cp->descen) {
1313                                 productpartials(cp->kinp->isop);
1314                                 partialsinternal(cp);
1315                         }
1316                 }
1317         } while (cp != rp);
1318 }
1319
1320
1321 /* compute log-likelihood given internal branch with length arc
1322    between partials partiali and partials partialj */
1323 double intlkl(double arc)
1324 {
1325         double sumlk, slk;
1326         int r, s, i, j;
1327         dmatrix cdl;
1328
1329         cdl = Ctree->condlkl;
1330         for (r = 0; r < numcats; r++) {
1331                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1332         }
1333         for (r = 0; r < numcats; r++) {
1334                 for (s = 0; s < Numptrn; s++) {
1335                         sumlk = 0.0;
1336                         for (i = 0; i < tpmradix; i++) {                        
1337                                 slk = 0.0;
1338                                 for (j = 0; j < tpmradix; j++) 
1339                                         slk += partialj[r][s][j] * ltprobr[r][i][j];            
1340                                 sumlk += Freqtpm[i] * partiali[r][s][i] * slk;  
1341                         }
1342                         cdl[r][s] = sumlk;
1343                 }
1344         }
1345
1346         /* compute total log-likelihood for current tree */
1347         Ctree->lklhd = comptotloglkl(cdl);
1348
1349         return -(Ctree->lklhd); /* we use a minimizing procedure */
1350 }
1351
1352
1353 /* optimize internal branch */
1354 void optinternalbranch(Node *op)
1355 {
1356         double arc, fx, f2x;
1357
1358         partiali = op->isop->partials;
1359         partialj = op->kinp->isop->partials;
1360         arc = op->length; /* nonclocklike branch lengths */
1361         if (arc <= MINARC) arc = MINARC+1.0;
1362         if (arc >= MAXARC) arc = MAXARC-1.0;
1363         arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x);
1364         op->kinp->length = arc;
1365         op->length = arc;
1366         
1367         /* variance of branch length */
1368         f2x = fabs(f2x);
1369         if (1.0/(MAXARC*MAXARC) < f2x)
1370                 op->varlen = 1.0/f2x;
1371         else
1372                 op->varlen = MAXARC*MAXARC;
1373 }
1374
1375
1376 /* compute log-likelihood given external branch with length arc
1377    between partials partiali and sequence seqchi */
1378 double extlkl(double arc)
1379 {
1380         double sumlk;
1381         int r, s, i, j;
1382         dvector opb;
1383         dmatrix cdl;
1384         
1385         cdl = Ctree->condlkl;
1386         for (r = 0; r < numcats; r++) {
1387                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1388         }
1389         for (r = 0; r < numcats; r++) {
1390                 for (s = 0; s < Numptrn; s++) {
1391                         opb = partiali[r][s];
1392                         sumlk = 0.0;
1393                         if ((j = seqchi[s]) != tpmradix) {
1394                                 for (i = 0; i < tpmradix; i++)
1395                                         sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1396                         } else {
1397                                 for (i = 0; i < tpmradix; i++)
1398                                         sumlk += Freqtpm[i] * opb[i];
1399                         }
1400                         cdl[r][s] = sumlk;
1401                 }
1402         }
1403         
1404         /* compute total log-likelihood for current tree */
1405         Ctree->lklhd = comptotloglkl(cdl);
1406
1407         return -(Ctree->lklhd); /* we use a minimizing procedure */
1408 }
1409
1410 /* optimize external branch */
1411 void optexternalbranch(Node *op)
1412 {
1413         double arc, fx, f2x;
1414
1415         partiali = op->isop->partials;
1416         seqchi = op->kinp->eprob;
1417         arc = op->length; /* nonclocklike branch lengths */
1418         if (arc <= MINARC) arc = MINARC+1.0;
1419         if (arc >= MAXARC) arc = MAXARC-1.0;
1420         arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x);
1421         op->kinp->length = arc;
1422         op->length = arc;
1423         
1424          /* variance of branch length */
1425         f2x = fabs(f2x);
1426         if (1.0/(MAXARC*MAXARC) < f2x)
1427                 op->varlen = 1.0/f2x;
1428         else
1429                 op->varlen = MAXARC*MAXARC;
1430 }
1431
1432
1433 /* finish likelihoods for each rate and site */
1434 void finishlkl(Node *op)
1435 {
1436         int r, k, i, j;
1437         double arc, sumlk, slk;
1438         dmatrix cdl;
1439
1440         partiali = op->isop->partials;
1441         partialj = op->kinp->isop->partials;
1442         cdl = Ctree->condlkl;
1443         arc = op->length; /* nonclocklike branch lengths */
1444         for (r = 0; r < numcats; r++) {
1445                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1446         }
1447         for (r = 0; r < numcats; r++) {
1448                 for (k = 0; k < Numptrn; k++) {
1449                         sumlk = 0.0;
1450                         for (i = 0; i < tpmradix; i++) {
1451                                 slk = 0.0;
1452                                 for (j = 0; j < tpmradix; j++)
1453                                         slk += partialj[r][k][j] * ltprobr[r][i][j];
1454                                 sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1455                         }                       
1456                         cdl[r][k] = sumlk;
1457                 }
1458         }
1459 }
1460
1461
1462 /***************************** exported functions *****************************/
1463
1464
1465 /* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1466 double optlkl(Tree *tr)
1467 {
1468         Node *cp, *rp;
1469         int nconv;
1470         double lendiff;
1471         
1472         clockmode = 0; /* nonclocklike branch lengths */
1473         nconv = 0;
1474         Converg = FALSE;
1475         initpartials(tr);
1476         for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1477                 
1478                 cp = rp = tr->rootp;
1479                 do {
1480                         cp = cp->isop->kinp;
1481                         productpartials(cp->kinp->isop);
1482                         if (cp->isop == NULL) { /* external node */     
1483                                 cp = cp->kinp; /* not descen */
1484                                 
1485                                 lendiff = cp->length;
1486                                 optexternalbranch(cp);
1487                                 lendiff = fabs(lendiff - cp->length);
1488                                 if (lendiff < EPSILON) nconv++;
1489                                 else nconv = 0;                         
1490                                 
1491                                 partialsexternal(cp);
1492                         } else { /* internal node */
1493                                 if (cp->descen) {
1494                                         partialsinternal(cp);
1495                                 } else {
1496                                         
1497                                         lendiff = cp->length;
1498                                         optinternalbranch(cp);
1499                                         lendiff = fabs(lendiff - cp->length);
1500                                         if (lendiff < EPSILON) nconv++;
1501                                         else nconv = 0;
1502                                         
1503                                         /* eventually compute likelihoods for each site */
1504                                         if ((cp->number == Numibrnch-1 && lendiff < EPSILON) ||
1505                                         Numit == MAXIT-1) finishlkl(cp);
1506
1507                                         partialsinternal(cp);
1508                                 }
1509                         }                       
1510                         if (nconv >= Numbrnch) { /* convergence */
1511                                 Converg = TRUE;
1512                                 cp = rp; /* get out of here */
1513                         }
1514                 } while (cp != rp);
1515         }
1516
1517         /* compute total log-likelihood for current tree */
1518         return comptotloglkl(tr->condlkl);
1519 }
1520
1521
1522 /* compute likelihood of tree for given branch lengths */
1523 double treelkl(Tree *tr)
1524 {
1525         int i, k, r;
1526         Node *cp;
1527         dmatrix cdl;
1528         dcube prob1, prob2;
1529         double sumlk;
1530
1531         /* compute for each site and rate log-likelihoods */
1532         initpartials(tr);
1533         cp = tr->rootp;
1534         productpartials(cp->isop);
1535         prob1 = cp->partials;
1536         prob2 = cp->isop->partials;
1537         cdl = tr->condlkl;
1538         for (r = 0; r < numcats; r++) {
1539                 for (k = 0; k < Numptrn; k++) {
1540                         sumlk = 0.0;
1541                         for (i = 0; i < tpmradix; i++)
1542                                 sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1543                         cdl[r][k] = sumlk;
1544                 }
1545         }       
1546         
1547         /* return total log-likelihood for current tree */
1548         return comptotloglkl(cdl);
1549 }
1550
1551
1552 /******************************************************************************/
1553 /* least-squares estimate of branch lengths                                   */
1554 /******************************************************************************/
1555
1556
1557 /***************************** internal functions *****************************/
1558
1559
1560 void luequation(dmatrix amat, dvector yvec, int size)
1561 {
1562     double eps = 1.0e-20; /* ! */
1563         int i, j, k, l, maxi=0, idx;
1564         double sum, tmp, maxb, aw;
1565         dvector wk;
1566         ivector index;
1567
1568
1569         wk = new_dvector(size);
1570         index = new_ivector(size);
1571         aw = 1.0;
1572         for (i = 0; i < size; i++) {
1573                 maxb = 0.0;
1574                 for (j = 0; j < size; j++) {
1575                         if (fabs(amat[i][j]) > maxb)
1576                                 maxb = fabs(amat[i][j]);
1577                 }
1578                 if (maxb == 0.0) {
1579                         /* Singular matrix */
1580                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1581                         exit(1);
1582                 }
1583                 wk[i] = 1.0 / maxb;
1584         }
1585         for (j = 0; j < size; j++) {
1586                 for (i = 0; i < j; i++) {
1587                         sum = amat[i][j];
1588                         for (k = 0; k < i; k++)
1589                                 sum -= amat[i][k] * amat[k][j];
1590                         amat[i][j] = sum;
1591                 }
1592                 maxb = 0.0;
1593                 for (i = j; i < size; i++) {
1594                         sum = amat[i][j];
1595                         for (k = 0; k < j; k++)
1596                                 sum -= amat[i][k] * amat[k][j];
1597                         amat[i][j] = sum;
1598                         tmp = wk[i] * fabs(sum);
1599                         if (tmp >= maxb) {
1600                                 maxb = tmp;
1601                                 maxi = i;
1602                         }
1603                 }
1604                 if (j != maxi) {
1605                         for (k = 0; k < size; k++) {
1606                                 tmp = amat[maxi][k];
1607                                 amat[maxi][k] = amat[j][k];
1608                                 amat[j][k] = tmp;
1609                         }
1610                         aw = -aw;
1611                         wk[maxi] = wk[j];
1612                 }
1613                 index[j] = maxi;
1614                 if (amat[j][j] == 0.0)
1615                         amat[j][j] = eps;
1616                 if (j != size - 1) {
1617                         tmp = 1.0 / amat[j][j];
1618                         for (i = j + 1; i < size; i++)
1619                                 amat[i][j] *= tmp;
1620                 }
1621         }
1622         l = -1;
1623         for (i = 0; i < size; i++) {
1624                 idx = index[i];
1625                 sum = yvec[idx];
1626                 yvec[idx] = yvec[i];
1627                 if (l != -1) {
1628                         for (j = l; j < i; j++)
1629                                 sum -= amat[i][j] * yvec[j];
1630                 } else if (sum != 0.0)
1631                         l = i;
1632                 yvec[i] = sum;
1633         }
1634         for (i = size - 1; i >= 0; i--) {
1635                 sum = yvec[i];
1636                 for (j = i + 1; j < size; j++)
1637                         sum -= amat[i][j] * yvec[j];
1638                 yvec[i] = sum / amat[i][i];
1639         }
1640         free_ivector(index);
1641         free_dvector(wk);
1642 }
1643
1644
1645 /* least square estimation of branch lengths
1646    used for the approximate ML and as starting point 
1647    in the calculation of the exact value of the ML */
1648 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1649 {
1650         int i, i1, j, j1, j2, k, numbrnch, numpair;
1651         double sum, leng, alllen, rss;
1652         ivector pths;
1653         dmatrix atmt, atamt;
1654         Node **ebp, **ibp;
1655
1656         numbrnch = numspc + numibrnch;
1657         numpair = (numspc * (numspc - 1)) / 2;
1658         atmt = new_dmatrix(numbrnch, numpair);
1659         atamt = new_dmatrix(numbrnch, numbrnch);
1660         ebp = tr->ebrnchp;
1661         ibp = tr->ibrnchp;
1662         for (i = 0; i < numspc; i++) {
1663                 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1664                         if (j1 == i) {
1665                                 for (j2 = 0; j2 < j1; j2++, j++) {
1666                                         atmt[i][j] = 1.0;
1667                                 }
1668                         } else {
1669                                 for (j2 = 0; j2 < j1; j2++, j++) {
1670                                         if (j2 == i)
1671                                                 atmt[i][j] = 1.0;
1672                                         else
1673                                                 atmt[i][j] = 0.0;
1674                                 }
1675                         }
1676                 }
1677         }
1678         for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
1679                 pths = ibp[i1]->paths;
1680                 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1681                         for (j2 = 0; j2 < j1; j2++, j++) {
1682                                 if (pths[j1] != pths[j2])
1683                                         atmt[i][j] = 1.0;
1684                                 else
1685                                         atmt[i][j] = 0.0;
1686                         }
1687                 }
1688         }
1689         for (i = 0; i < numbrnch; i++) {
1690                 for (j = 0; j <= i; j++) {
1691                         for (k = 0, sum = 0.0; k < numpair; k++)
1692                                 sum += atmt[i][k] * atmt[j][k];
1693                         atamt[i][j] = sum;
1694                         atamt[j][i] = sum;
1695                 }
1696         }
1697         for (i = 0; i < numbrnch; i++) {
1698                 for (k = 0, sum = 0.0; k < numpair; k++)
1699                         sum += atmt[i][k] * distanvec[k];
1700                 Brnlength[i] = sum;
1701         }
1702         luequation(atamt, Brnlength, numbrnch);
1703         for (i = 0, rss = 0.0; i < numpair; i++) {
1704                 sum = distanvec[i];
1705                 for (j = 0; j < numbrnch; j++) {
1706                         if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
1707                                 sum -= Brnlength[j];
1708                 }
1709                 rss += sum * sum;
1710         }
1711         tr->rssleast = sqrt(rss);
1712         alllen = 0.0;
1713         for (i = 0; i < numspc; i++) {
1714                 leng = Brnlength[i];
1715                 alllen += leng;
1716                 if (leng < MINARC) leng = MINARC;
1717                 if (leng > MAXARC) leng = MAXARC;
1718                 if (clockmode) { /* clock */
1719                         ebp[i]->lengthc = leng;
1720                         ebp[i]->kinp->lengthc = leng;
1721                 } else { /* no clock */
1722                         ebp[i]->length = leng;
1723                         ebp[i]->kinp->length = leng;
1724                 }       
1725                 Brnlength[i] = leng;
1726         }
1727         for (i = 0, j = numspc; i < numibrnch; i++, j++) {
1728                 leng = Brnlength[j];
1729                 alllen += leng;
1730                 if (leng < MINARC) leng = MINARC;
1731                 if (leng > MAXARC) leng = MAXARC;
1732                 if (clockmode) { /* clock */
1733                         ibp[i]->lengthc = leng;
1734                         ibp[i]->kinp->lengthc = leng;
1735                 } else { /* no clock */
1736                         ibp[i]->length = leng;
1737                         ibp[i]->kinp->length = leng;
1738                 }
1739                 Brnlength[j] = leng;
1740         }
1741         free_dmatrix(atmt);
1742         free_dmatrix(atamt);
1743 }