initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_mod / src / ml1.c
1 /*
2  * ml1.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 /******************************************************************************/
17 /* definitions and prototypes                                                 */
18 /******************************************************************************/
19
20 #define EXTERN extern
21
22 /* prototypes */
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 #include <ctype.h>
27 #include "util.h"
28 #include "ml.h"
29
30 #define STDOUT     stdout
31 #ifndef PARALLEL             /* because printf() runs significantly faster */
32                              /* than fprintf(stdout) on an Apple McIntosh  */
33                              /* (HS) */
34 #       define FPRINTF    printf
35 #       define STDOUTFILE
36 #else
37 #       define FPRINTF    fprintf
38 #       define STDOUTFILE STDOUT,
39 #endif
40
41
42 /******************************************************************************/
43 /* compacting sequence data information                                       */
44 /******************************************************************************/
45
46
47 /***************************** internal functions *****************************/
48
49
50 /* make all frequencies a little different */
51 void convfreq(dvector freqemp)
52 {
53         int i, j, maxi=0;
54         double freq, maxfreq, sum;
55
56
57         sum = 0.0;
58         maxfreq = 0.0;
59         for (i = 0; i < tpmradix; i++) {
60                 freq = freqemp[i];
61                 if (freq < MINFREQ) freqemp[i] = MINFREQ;
62                 if (freq > maxfreq) {
63                         maxfreq = freq;
64                         maxi = i;
65                 }
66                 sum += freqemp[i];
67         }
68         freqemp[maxi] += 1.0 - sum;
69         
70         for (i = 0; i < tpmradix - 1; i++) {
71                 for (j = i + 1; j < tpmradix; j++) {
72                         if (freqemp[i] == freqemp[j]) {
73                                 freqemp[i] += MINFDIFF/2.0;
74                                 freqemp[j] -= MINFDIFF/2.0;
75                         }
76                 }
77         }
78 }
79
80 /* sort site patters of original input data */
81 void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
82         int *numptrn)
83 {
84         int i, j, k, l, n, pass;
85         int *awork;
86         int *count;
87
88
89         awork = new_ivector(maxsite);
90         count = new_ivector(tpmradix+1);
91         for (i = 0; i < maxsite; i++)
92                 ali[i] = i;
93         for (pass = maxspc - 1; pass >= 0; pass--) {
94                 for (j = 0; j < tpmradix+1; j++)
95                         count[j] = 0;
96                 for (i = 0; i < maxsite; i++)
97                         count[(int) seqchar[pass][ali[i]]]++;
98                 for (j = 1; j < tpmradix+1; j++)
99                         count[j] += count[j-1];
100                 for (i = maxsite-1; i >= 0; i--)
101                         awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
102                 for (i = 0; i < maxsite; i++)
103                         ali[i] = awork[i];
104         }
105         free_ivector(awork);
106         free_ivector(count);
107         n = 1;
108         for (j = 1; j < maxsite; j++) {
109                 k = ali[j];
110                 l = ali[j-1];
111                 for (i = 0; i < maxspc; i++) {
112                         if (seqchar[i][l] != seqchar[i][k]) {
113                                 n++;
114                                 break;
115                         }
116                 }
117         }
118         *numptrn = n;
119 }
120
121
122 void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
123         ivector weight, int maxspc, int maxsite, int numptrn)
124 {
125         int i, j, k, n;
126         int agree_flag; /* boolean */
127
128
129         n = 0;
130         k = ali[n];
131         for (i = 0; i < maxspc; i++) {
132                 seqconint[i][n] = seqchar[i][k];
133         }
134         weight[n] = 1;
135         Alias[k] = 0;
136         for (j = 1; j < maxsite; j++) {
137                 k = ali[j];
138                 agree_flag = TRUE;
139                 for (i = 0; i < maxspc; i++) {
140                         if (seqconint[i][n] != seqchar[i][k]) {
141                                 agree_flag = FALSE;
142                                 break;
143                         }
144                 }
145                 if (agree_flag == FALSE) {
146                         n++;
147                         for (i = 0; i < maxspc; i++) {
148                                 seqconint[i][n] = seqchar[i][k];
149                         }
150                         weight[n] = 1;
151                         Alias[k] = n;
152                 } else {
153                         weight[n]++;
154                         Alias[k] = n;
155                 }
156         }
157         n++;
158         if (numptrn != n) {
159                 /* Problem in condenceseq */
160                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
161                 exit(1);
162         }
163 }
164
165 void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
166         int *numconst, int *numconstpat)
167 {
168         int character, s, i, constflag;
169
170         *numconst    = 0;
171         *numconstpat = 0;
172         for (s = 0; s < numptrn; s++) { /* check all patterns */
173                 constpat[s] = FALSE;
174                 constflag = TRUE;
175                 character = seqpat[0][s];
176                 for (i = 1; i < maxspc; i++) {
177                         if (seqpat[i][s] != character) {
178                                 constflag = FALSE;
179                                 break;
180                         }
181                 }
182                 if (character != tpmradix && constflag) {
183                         (*numconst) = (*numconst) + weight[s];
184                         (*numconstpat)++;
185                         constpat[s] = TRUE;
186                 }
187         }
188 }
189
190 /***************************** exported functions *****************************/
191
192
193 void evaluateseqs()
194 {       
195         ivector ali;
196
197         convfreq(Freqtpm); /* make all frequencies slightly different */
198         ali = new_ivector(Maxsite);
199         radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
200         Seqpat = new_cmatrix(Maxspc, Numptrn);
201         constpat = new_ivector(Numptrn);
202         Weight = new_ivector(Numptrn);
203         condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
204         free_ivector(ali);
205         countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
206         fracconstpat = (double) Numconstpat / (double) Numptrn; 
207         fracconst    = (double) Numconst / (double) Maxsite;    
208 }
209
210
211 /******************************************************************************/
212 /* computation of Pij(t)                                                      */
213 /******************************************************************************/
214
215
216 /***************************** internal functions *****************************/
217
218
219 void elmhes(dmatrix a, ivector ordr, int n)
220 {
221         int m, j, i;
222         double y, x;
223
224
225         for (i = 0; i < n; i++)
226                 ordr[i] = 0;
227         for (m = 2; m < n; m++) {
228                 x = 0.0;
229                 i = m;
230                 for (j = m; j <= n; j++) {
231                         if (fabs(a[j - 1][m - 2]) > fabs(x)) {
232                                 x = a[j - 1][m - 2];
233                                 i = j;
234                         }
235                 }
236                 ordr[m - 1] = i;      /* vector */
237                 if (i != m) {
238                         for (j = m - 2; j < n; j++) {
239                                 y = a[i - 1][j];
240                                 a[i - 1][j] = a[m - 1][j];
241                                 a[m - 1][j] = y;
242                         }
243                         for (j = 0; j < n; j++) {
244                                 y = a[j][i - 1];
245                                 a[j][i - 1] = a[j][m - 1];
246                                 a[j][m - 1] = y;
247                         }
248                 }
249                 if (x != 0.0) {
250                         for (i = m; i < n; i++) {
251                                 y = a[i][m - 2];
252                                 if (y != 0.0) {
253                                         y /= x;
254                                         a[i][m - 2] = y;
255                                         for (j = m - 1; j < n; j++)
256                                                 a[i][j] -= y * a[m - 1][j];
257                                         for (j = 0; j < n; j++)
258                                                 a[j][m - 1] += y * a[j][i];
259                                 }
260                         }
261                 }
262         }
263 }
264
265
266 void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
267 {
268         int i, j, m;
269
270
271         for (i = 0; i < n; i++) {
272                 for (j = i + 1; j < n; j++) {
273                         zz[i][j] = 0.0;
274                         zz[j][i] = 0.0;
275                 }
276                 zz[i][i] = 1.0;
277         }
278         if (n <= 2)
279                 return;
280         for (m = n - 1; m >= 2; m--) {
281                 for (i = m; i < n; i++)
282                         zz[i][m - 1] = a[i][m - 2];
283                 i = ordr[m - 1];
284                 if (i != m) {
285                         for (j = m - 1; j < n; j++) {
286                                 zz[m - 1][j] = zz[i - 1][j];
287                                 zz[i - 1][j] = 0.0;
288                         }
289                         zz[i - 1][m - 1] = 1.0;
290                 }
291         }
292 }
293
294
295 void mcdiv(double ar, double ai, double br, double bi,
296         double *cr, double *ci)
297 {
298         double s, ars, ais, brs, bis;
299
300
301         s = fabs(br) + fabs(bi);
302         ars = ar / s;
303         ais = ai / s;
304         brs = br / s;
305         bis = bi / s;
306         s = brs * brs + bis * bis;
307         *cr = (ars * brs + ais * bis) / s;
308         *ci = (ais * brs - ars * bis) / s;
309 }
310
311
312 void hqr2(int n, int low, int hgh, dmatrix h,
313         dmatrix zz, dvector wr, dvector wi)
314 {
315         int i, j, k, l=0, m, en, na, itn, its;
316         double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
317         int notlas; /* boolean */
318
319
320         norm = 0.0;
321         k = 1;
322         /* store isolated roots and compute matrix norm */
323         for (i = 0; i < n; i++) {
324                 for (j = k - 1; j < n; j++)
325                         norm += fabs(h[i][j]);
326                 k = i + 1;
327                 if (i + 1 < low || i + 1 > hgh) {
328                         wr[i] = h[i][i];
329                         wi[i] = 0.0;
330                 }
331         }
332         en = hgh;
333         t = 0.0;
334         itn = n * 30;
335         while (en >= low) {     /* search for next eigenvalues */
336                 its = 0;
337                 na = en - 1;
338                 while (en >= 1) {
339                         /* look for single small sub-diagonal element */
340                         for (l = en; l > low; l--) {
341                                 s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
342                                 if (s == 0.0)
343                                         s = norm;
344                                 tst1 = s;
345                                 tst2 = tst1 + fabs(h[l - 1][l - 2]);
346                                 if (tst2 == tst1)
347                                         goto L100;
348                         }
349                         l = low;
350         L100:
351                         x = h[en - 1][en - 1];  /* form shift */
352                         if (l == en || l == na)
353                                 break;
354                         if (itn == 0) {
355                                 /* all eigenvalues have not converged */
356                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
357                                 exit(1);
358                         }
359                         y = h[na - 1][na - 1];
360                         w = h[en - 1][na - 1] * h[na - 1][en - 1];
361                         /* form exceptional shift */
362                         if (its == 10 || its == 20) {
363                                 t += x;
364                                 for (i = low - 1; i < en; i++)
365                                         h[i][i] -= x;
366                                 s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
367                                 x = 0.75 * s;
368                                 y = x;
369                                 w = -0.4375 * s * s;
370                         }
371                         its++;
372                         itn--;
373                         /* look for two consecutive small sub-diagonal elements */
374                         for (m = en - 2; m >= l; m--) {
375                                 z = h[m - 1][m - 1];
376                                 r = x - z;
377                                 s = y - z;
378                                 p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
379                                 q = h[m][m] - z - r - s;
380                                 r = h[m + 1][m];
381                                 s = fabs(p) + fabs(q) + fabs(r);
382                                 p /= s;
383                                 q /= s;
384                                 r /= s;
385                                 if (m == l)
386                                         break;
387                                 tst1 = fabs(p) *
388                                                 (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
389                                 tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
390                                 if (tst2 == tst1)
391                                         break;
392                         }
393                         for (i = m + 2; i <= en; i++) {
394                                 h[i - 1][i - 3] = 0.0;
395                                 if (i != m + 2)
396                                         h[i - 1][i - 4] = 0.0;
397                         }
398                         for (k = m; k <= na; k++) {
399                                 notlas = (k != na);
400                                 if (k != m) {
401                                         p = h[k - 1][k - 2];
402                                         q = h[k][k - 2];
403                                         r = 0.0;
404                                         if (notlas)
405                                                 r = h[k + 1][k - 2];
406                                         x = fabs(p) + fabs(q) + fabs(r);
407                                         if (x != 0.0) {
408                                                 p /= x;
409                                                 q /= x;
410                                                 r /= x;
411                                         }
412                                 }
413                                 if (x != 0.0) {
414                                         if (p < 0.0) /* sign */
415                                                 s = - sqrt(p * p + q * q + r * r);
416                                         else
417                                                 s = sqrt(p * p + q * q + r * r);
418                                         if (k != m)
419                                                 h[k - 1][k - 2] = -s * x;
420                                         else {
421                                                 if (l != m)
422                                                         h[k - 1][k - 2] = -h[k - 1][k - 2];
423                                         }
424                                         p += s;
425                                         x = p / s;
426                                         y = q / s;
427                                         z = r / s;
428                                         q /= p;
429                                         r /= p;
430                                         if (!notlas) {
431                                                 for (j = k - 1; j < n; j++) {   /* row modification */
432                                                         p = h[k - 1][j] + q * h[k][j];
433                                                         h[k - 1][j] -= p * x;
434                                                         h[k][j] -= p * y;
435                                                 }
436                                                 j = (en < (k + 3)) ? en : (k + 3); /* min */
437                                                 for (i = 0; i < j; i++) {       /* column modification */
438                                                         p = x * h[i][k - 1] + y * h[i][k];
439                                                         h[i][k - 1] -= p;
440                                                         h[i][k] -= p * q;
441                                                 }
442                                                 /* accumulate transformations */
443                                                 for (i = low - 1; i < hgh; i++) {
444                                                         p = x * zz[i][k - 1] + y * zz[i][k];
445                                                         zz[i][k - 1] -= p;
446                                                         zz[i][k] -= p * q;
447                                                 }
448                                         } else {
449                                                 for (j = k - 1; j < n; j++) {   /* row modification */
450                                                         p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
451                                                         h[k - 1][j] -= p * x;
452                                                         h[k][j] -= p * y;
453                                                         h[k + 1][j] -= p * z;
454                                                 }
455                                                 j = (en < (k + 3)) ? en : (k + 3); /* min */
456                                                 for (i = 0; i < j; i++) {       /* column modification */
457                                                         p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
458                                                         h[i][k - 1] -= p;
459                                                         h[i][k] -= p * q;
460                                                         h[i][k + 1] -= p * r;
461                                                 }
462                                                 /* accumulate transformations */
463                                                 for (i = low - 1; i < hgh; i++) {
464                                                         p = x * zz[i][k - 1] + y * zz[i][k] +
465                                                                 z * zz[i][k + 1];
466                                                         zz[i][k - 1] -= p;
467                                                         zz[i][k] -= p * q;
468                                                         zz[i][k + 1] -= p * r;
469                                                 }
470                                         }
471                                 }
472                         }              /* for k */
473                 }                      /* while infinite loop */
474                 if (l == en) {         /* one root found */
475                         h[en - 1][en - 1] = x + t;
476                         wr[en - 1] = h[en - 1][en - 1];
477                         wi[en - 1] = 0.0;
478                         en = na;
479                         continue;
480                 }
481                 y = h[na - 1][na - 1];
482                 w = h[en - 1][na - 1] * h[na - 1][en - 1];
483                 p = (y - x) / 2.0;
484                 q = p * p + w;
485                 z = sqrt(fabs(q));
486                 h[en - 1][en - 1] = x + t;
487                 x = h[en - 1][en - 1];
488                 h[na - 1][na - 1] = y + t;
489                 if (q >= 0.0) {        /* real pair */
490                         if (p < 0.0) /* sign */
491                                 z = p - fabs(z);
492                         else
493                                 z = p + fabs(z);
494                         wr[na - 1] = x + z;
495                         wr[en - 1] = wr[na - 1];
496                         if (z != 0.0)
497                                 wr[en - 1] = x - w / z;
498                         wi[na - 1] = 0.0;
499                         wi[en - 1] = 0.0;
500                         x = h[en - 1][na - 1];
501                         s = fabs(x) + fabs(z);
502                         p = x / s;
503                         q = z / s;
504                         r = sqrt(p * p + q * q);
505                         p /= r;
506                         q /= r;
507                         for (j = na - 1; j < n; j++) {  /* row modification */
508                                 z = h[na - 1][j];
509                                 h[na - 1][j] = q * z + p * h[en - 1][j];
510                                 h[en - 1][j] = q * h[en - 1][j] - p * z;
511                         }
512                         for (i = 0; i < en; i++) {      /* column modification */
513                                 z = h[i][na - 1];
514                                 h[i][na - 1] = q * z + p * h[i][en - 1];
515                                 h[i][en - 1] = q * h[i][en - 1] - p * z;
516                         }
517                         /* accumulate transformations */
518                         for (i = low - 1; i < hgh; i++) {
519                                 z = zz[i][na - 1];
520                                 zz[i][na - 1] = q * z + p * zz[i][en - 1];
521                                 zz[i][en - 1] = q * zz[i][en - 1] - p * z;
522                         }
523                 } else {               /* complex pair */
524                         wr[na - 1] = x + p;
525                         wr[en - 1] = x + p;
526                         wi[na - 1] = z;
527                         wi[en - 1] = -z;
528                 }
529                 en -= 2;
530         }                              /* while en >= low */
531         /* backsubstitute to find vectors of upper triangular form */
532         if (norm != 0.0) {
533                 for (en = n; en >= 1; en--) {
534                         p = wr[en - 1];
535                         q = wi[en - 1];
536                         na = en - 1;
537                         if (q == 0.0) {/* real vector */
538                                 m = en;
539                                 h[en - 1][en - 1] = 1.0;
540                                 if (na != 0) {
541                                         for (i = en - 2; i >= 0; i--) {
542                                                 w = h[i][i] - p;
543                                                 r = 0.0;
544                                                 for (j = m - 1; j < en; j++)
545                                                         r += h[i][j] * h[j][en - 1];
546                                                 if (wi[i] < 0.0) {
547                                                         z = w;
548                                                         s = r;
549                                                 } else {
550                                                         m = i + 1;
551                                                         if (wi[i] == 0.0) {
552                                                                 t = w;
553                                                                 if (t == 0.0) {
554                                                                         tst1 = norm;
555                                                                         t = tst1;
556                                                                         do {
557                                                                                 t = 0.01 * t;
558                                                                                 tst2 = norm + t;
559                                                                         } while (tst2 > tst1);
560                                                                 }
561                                                                 h[i][en - 1] = -(r / t);
562                                                         } else {        /* solve real equations */
563                                                                 x = h[i][i + 1];
564                                                                 y = h[i + 1][i];
565                                                                 q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
566                                                                 t = (x * s - z * r) / q;
567                                                                 h[i][en - 1] = t;
568                                                                 if (fabs(x) > fabs(z))
569                                                                         h[i + 1][en - 1] = (-r - w * t) / x;
570                                                                 else
571                                                                         h[i + 1][en - 1] = (-s - y * t) / z;
572                                                         }
573                                                         /* overflow control */
574                                                         t = fabs(h[i][en - 1]);
575                                                         if (t != 0.0) {
576                                                                 tst1 = t;
577                                                                 tst2 = tst1 + 1.0 / tst1;
578                                                                 if (tst2 <= tst1) {
579                                                                         for (j = i; j < en; j++)
580                                                                                 h[j][en - 1] /= t;
581                                                                 }
582                                                         }
583                                                 }
584                                         }
585                                 }
586                         } else if (q > 0.0) {
587                                 m = na;
588                                 if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
589                                         h[na - 1][na - 1] = q / h[en - 1][na - 1];
590                                         h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
591                                                                                                                 h[en - 1][na - 1];
592                                 } else
593                                         mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
594                                                 &h[na - 1][na - 1], &h[na - 1][en - 1]);
595                                 h[en - 1][na - 1] = 0.0;
596                                 h[en - 1][en - 1] = 1.0;
597                                 if (en != 2) {
598                                         for (i = en - 3; i >= 0; i--) {
599                                                 w = h[i][i] - p;
600                                                 ra = 0.0;
601                                                 sa = 0.0;
602                                                 for (j = m - 1; j < en; j++) {
603                                                         ra += h[i][j] * h[j][na - 1];
604                                                         sa += h[i][j] * h[j][en - 1];
605                                                 }
606                                                 if (wi[i] < 0.0) {
607                                                         z = w;
608                                                         r = ra;
609                                                         s = sa;
610                                                 } else {
611                                                         m = i + 1;
612                                                         if (wi[i] == 0.0)
613                                                                 mcdiv(-ra, -sa, w, q, &h[i][na - 1],
614                                                                         &h[i][en - 1]);
615                                                         else {  /* solve complex equations */
616                                                                 x = h[i][i + 1];
617                                                                 y = h[i + 1][i];
618                                                                 vr = (wr[i] - p) * (wr[i] - p);
619                                                                 vr = vr + wi[i] * wi[i] - q * q;        
620                                                                 vi = (wr[i] - p) * 2.0 * q;
621                                                                 if (vr == 0.0 && vi == 0.0) {
622                                                                         tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
623                                                                                                    fabs(y) + fabs(z));
624                                                                         vr = tst1;
625                                                                         do {
626                                                                                 vr = 0.01 * vr;
627                                                                                 tst2 = tst1 + vr;
628                                                                         } while (tst2 > tst1);
629                                                                 }
630                                                                 mcdiv(x * r - z * ra + q * sa,
631                                                                          x * s - z * sa - q * ra, vr, vi,
632                                                                      &h[i][na - 1], &h[i][en - 1]);
633                                                                 if (fabs(x) > fabs(z) + fabs(q)) {
634                                                                         h[i + 1]
635                                                                                 [na - 1] = (q * h[i][en - 1] -
636                                                                                                         w * h[i][na - 1] - ra) / x;
637                                                                         h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
638                                                                                                                 q * h[i][na - 1]) / x;
639                                                                 } else
640                                                                         mcdiv(-r - y * h[i][na - 1],
641                                                                                  -s - y * h[i][en - 1], z, q,
642                                                                              &h[i + 1][na - 1], &h[i + 1][en - 1]);
643                                                         }
644                                                         /* overflow control */
645                                                         t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
646                                                                  fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
647                                                         if (t != 0.0) {
648                                                                 tst1 = t;
649                                                                 tst2 = tst1 + 1.0 / tst1;
650                                                                 if (tst2 <= tst1) {
651                                                                         for (j = i; j < en; j++) {
652                                                                                 h[j][na - 1] /= t;
653                                                                                 h[j][en - 1] /= t;
654                                                                         }
655                                                                 }
656                                                         }
657                                                 }
658                                         }
659                                 }
660                         }
661                 }
662                 /* end back substitution. vectors of isolated roots */
663                 for (i = 0; i < n; i++) {
664                         if (i + 1 < low || i + 1 > hgh) {
665                                 for (j = i; j < n; j++)
666                                         zz[i][j] = h[i][j];
667                         }
668                 }
669                 /* multiply by transformation matrix to give vectors of
670                  * original full matrix. */
671                 for (j = n - 1; j >= low - 1; j--) {
672                         m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
673                         for (i = low - 1; i < hgh; i++) {
674                                 z = 0.0;
675                                 for (k = low - 1; k < m; k++)
676                                         z += zz[i][k] * h[k][j];
677                                 zz[i][j] = z;
678                         }
679                 }
680         }
681         return;
682 }
683
684
685 /* make rate matrix with 0.01 expected substitutions per unit time */
686 void onepamratematrix(dmatrix a)
687 {
688         int i, j;
689         double delta, temp, sum;
690         dvector m;
691
692         for (i = 0; i < tpmradix; i++)
693         {
694                 for (j = 0; j < tpmradix; j++)
695                 {
696                         a[i][j] = Freqtpm[j]*a[i][j];
697                 }
698         }
699
700         m = new_dvector(tpmradix);
701         for (i = 0, sum = 0.0; i < tpmradix; i++)
702         {
703                 for (j = 0, temp = 0.0; j < tpmradix; j++)
704                         temp += a[i][j];
705                 m[i] = temp; /* row sum */
706                 sum += temp*Freqtpm[i]; /* exp. rate */
707         }
708         delta = 0.01 / sum; /* 0.01 subst. per unit time */
709         for (i = 0; i < tpmradix; i++) {
710                 for (j = 0; j < tpmradix; j++) {
711                         if (i != j)
712                                 a[i][j] = delta * a[i][j];
713                         else
714                                 a[i][j] = delta * (-m[i]);
715                 }
716         }
717         free_dvector(m);
718 }
719
720
721 void eigensystem(dvector eval, dmatrix evec)
722 {
723         dvector evali, forg;
724         dmatrix a, b;
725         ivector ordr;
726         int i, j, k, error;
727         double zero;
728
729
730         ordr = new_ivector(tpmradix);
731         evali = new_dvector(tpmradix);
732         forg = new_dvector(tpmradix);
733         a = new_dmatrix(tpmradix,tpmradix);
734         b = new_dmatrix(tpmradix,tpmradix);
735
736         rtfdata(a, forg); /* get relative transition matrix and frequencies */
737         
738         onepamratematrix(a); /* make 1 PAM rate matrix */
739         
740         /* copy a to b */
741         for (i = 0; i < tpmradix; i++)
742                 for (j = 0; j < tpmradix; j++)
743                         b[i][j] = a[i][j];
744
745         elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
746         eltran(a, evec, ordr, tpmradix);
747         hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
748         
749         /* check eigenvalue equation */
750         error = FALSE;
751         for (j = 0; j < tpmradix; j++) {
752                 for (i = 0, zero = 0.0; i < tpmradix; i++) {
753                         for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
754                         zero -= eval[j] * evec[i][j];
755                         if (fabs(zero) > 1.0e-5)
756                                 error = TRUE;   
757                 }
758         }
759         if (error)
760                 FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
761
762         free_ivector(ordr);
763         free_dvector(evali);
764         free_dvector(forg);
765         free_dmatrix(a);
766         free_dmatrix(b);
767 }
768
769
770 void luinverse(dmatrix inmat, dmatrix imtrx, int size)
771 {
772     double eps = 1.0e-20; /* ! */
773         int i, j, k, l, maxi=0, idx, ix, jx;
774         double sum, tmp, maxb, aw;
775         ivector index;
776         double *wk;
777         dmatrix omtrx;
778
779         
780         index = new_ivector(tpmradix);
781         omtrx = new_dmatrix(tpmradix,tpmradix);
782         
783         /* copy inmat to omtrx */
784         for (i = 0; i < tpmradix; i++)
785                 for (j = 0; j < tpmradix; j++)
786                         omtrx[i][j] = inmat[i][j];
787         
788         wk = (double *) malloc((unsigned)size * sizeof(double));
789         aw = 1.0;
790         for (i = 0; i < size; i++) {
791                 maxb = 0.0;
792                 for (j = 0; j < size; j++) {
793                         if (fabs(omtrx[i][j]) > maxb)
794                                 maxb = fabs(omtrx[i][j]);
795                 }
796                 if (maxb == 0.0) {
797                         /* Singular matrix */
798                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
799                         exit(1);
800                 }
801                 wk[i] = 1.0 / maxb;
802         }
803         for (j = 0; j < size; j++) {
804                 for (i = 0; i < j; i++) {
805                         sum = omtrx[i][j];
806                         for (k = 0; k < i; k++)
807                                 sum -= omtrx[i][k] * omtrx[k][j];
808                         omtrx[i][j] = sum;
809                 }
810                 maxb = 0.0;
811                 for (i = j; i < size; i++) {
812                         sum = omtrx[i][j];
813                         for (k = 0; k < j; k++)
814                                 sum -= omtrx[i][k] * omtrx[k][j];
815                         omtrx[i][j] = sum;
816                         tmp = wk[i] * fabs(sum);
817                         if (tmp >= maxb) {
818                                 maxb = tmp;
819                                 maxi = i;
820                         }
821                 }
822                 if (j != maxi) {
823                         for (k = 0; k < size; k++) {
824                                 tmp = omtrx[maxi][k];
825                                 omtrx[maxi][k] = omtrx[j][k];
826                                 omtrx[j][k] = tmp;
827                         }
828                         aw = -aw;
829                         wk[maxi] = wk[j];
830                 }
831                 index[j] = maxi;
832                 if (omtrx[j][j] == 0.0)
833                         omtrx[j][j] = eps;
834                 if (j != size - 1) {
835                         tmp = 1.0 / omtrx[j][j];
836                         for (i = j + 1; i < size; i++)
837                                 omtrx[i][j] *= tmp;
838                 }
839         }
840         for (jx = 0; jx < size; jx++) {
841                 for (ix = 0; ix < size; ix++)
842                         wk[ix] = 0.0;
843                 wk[jx] = 1.0;
844                 l = -1;
845                 for (i = 0; i < size; i++) {
846                         idx = index[i];
847                         sum = wk[idx];
848                         wk[idx] = wk[i];
849                         if (l != -1) {
850                                 for (j = l; j < i; j++)
851                                         sum -= omtrx[i][j] * wk[j];
852                         } else if (sum != 0.0)
853                                 l = i;
854                         wk[i] = sum;
855                 }
856                 for (i = size - 1; i >= 0; i--) {
857                         sum = wk[i];
858                         for (j = i + 1; j < size; j++)
859                                 sum -= omtrx[i][j] * wk[j];
860                         wk[i] = sum / omtrx[i][i];
861                 }
862                 for (ix = 0; ix < size; ix++)
863                         imtrx[ix][jx] = wk[ix];
864         }
865         free((char *)wk);
866         wk = NULL;
867         free_ivector(index);
868         free_dmatrix(omtrx);
869 }
870
871
872 void checkevector(dmatrix evec, dmatrix ivec, int nn)
873 {
874         int i, j, ia, ib, ic, error;
875         dmatrix matx;
876         double sum;
877
878
879         matx = new_dmatrix(nn, nn);
880         /* multiply matrix of eigenvectors and its inverse */
881         for (ia = 0; ia < nn; ia++) {
882                 for (ic = 0; ic < nn; ic++) {
883                         sum = 0.0;
884                         for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
885                         matx[ia][ic] = sum;
886                 }
887         }
888         /* check whether the unitary matrix is obtained */
889         error = FALSE;
890         for (i = 0; i < nn; i++) {
891                 for (j = 0; j < nn; j++) {
892                         if (i == j) {
893                                 if (fabs(matx[i][j] - 1.0) > 1.0e-5)
894                                         error = TRUE;
895                         } else {
896                                 if (fabs(matx[i][j]) > 1.0e-5)
897                                         error = TRUE;
898                         }
899                 }
900         }
901         if (error) {
902                 FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
903         }
904         free_dmatrix(matx);
905 }
906
907
908 /***************************** exported functions *****************************/
909
910
911 /* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
912 void tranprobmat()
913 {
914         eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
915         luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
916         checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
917 }
918
919
920 /* compute P(t) */
921 void tprobmtrx(double arc, dmatrix tpr)
922 {
923         register int i, j, k;
924         register double temp;
925
926
927         for (k = 0; k < tpmradix; k++) {
928                 temp = exp(arc * Eval[k]);
929                 for (j = 0; j < tpmradix; j++)
930                         iexp[k][j] = Ievc[k][j] * temp;
931         }
932         for (i = 0; i < tpmradix; i++) {
933                 for (j = 0; j < tpmradix; j++) {
934                         temp = 0.0;
935                         for (k = 0; k < tpmradix; k++)
936                                 temp += Evec[i][k] * iexp[k][j];
937                         tpr[i][j] = fabs(temp);
938                 }
939         }
940 }
941
942
943 /******************************************************************************/
944 /* estimation of maximum likelihood distances                                 */
945 /******************************************************************************/
946
947 /* compute total log-likelihood
948    input: likelihoods for each site and non-zero rate
949    output: total log-likelihood (incl. zero rate category) */
950 double comptotloglkl(dmatrix cdl)
951 {
952         int k, r;
953         double loglkl, fv, fv2, sitelkl;
954
955         loglkl = 0.0;
956         fv = 1.0-fracinv;
957         fv2 = (1.0-fracinv)/(double) numcats;
958         
959         if (numcats == 1) {
960
961                 for (k = 0; k < Numptrn; k++) { 
962                 
963                         /* compute likelihood for pattern k */
964                         sitelkl = cdl[0][k]*fv;
965                         if (constpat[k] == TRUE)
966                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
967                 
968                         /* total log-likelihood */
969                         loglkl += log(sitelkl)*Weight[k];
970                 
971                 }
972
973         } else {
974         
975                 for (k = 0; k < Numptrn; k++) {
976                         
977                         /* this general routine works always but it's better 
978                to run it only when it's really necessary */
979                 
980                         /* compute likelihood for pattern k */
981                         sitelkl = 0.0;
982                         for (r = 0; r < numcats; r++)
983                                 sitelkl += cdl[r][k];
984                         sitelkl = fv2*sitelkl;
985                         if (constpat[k] == TRUE)
986                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
987                 
988                         /* total log-likelihood */
989                         loglkl += log(sitelkl)*Weight[k];
990                 
991                 }
992
993         }
994         
995         return loglkl;
996 }
997
998
999 /* computes the site log-likelihoods 
1000    input: likelihoods for each site and non-zero rate
1001    output: log-likelihood for each site */
1002 void allsitelkl(dmatrix cdl, dvector aslkl)
1003 {
1004         int k, r;
1005         double fv, fv2, sitelkl;
1006
1007         fv = 1.0-fracinv;
1008         fv2 = (1.0-fracinv)/(double) numcats;
1009         
1010         if (numcats == 1) {
1011
1012                 for (k = 0; k < Numptrn; k++) { 
1013                 
1014                         /* compute likelihood for pattern k */
1015                         sitelkl = cdl[0][k]*fv;
1016                         if (constpat[k] == TRUE)
1017                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1018                 
1019                         /* site log-likelihood */
1020                         aslkl[k] = log(sitelkl);
1021                 }
1022
1023         } else {
1024         
1025                 for (k = 0; k < Numptrn; k++) {
1026                         
1027                         /* this general routine works always but it's better 
1028                to run it only when it's really necessary */
1029                 
1030                         /* compute likelihood for pattern k */
1031                         sitelkl = 0.0;
1032                         for (r = 0; r < numcats; r++)
1033                                 sitelkl += cdl[r][k];
1034                         sitelkl = fv2*sitelkl;
1035                         if (constpat[k] == TRUE)
1036                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1037                 
1038                         /* total log-likelihood */
1039                         aslkl[k] = log(sitelkl);
1040                 
1041                 }
1042         }
1043 }
1044
1045
1046 /***************************** internal functions *****************************/
1047
1048 /* compute negative log-likelihood of distance arc between sequences seqchi/j */
1049 double pairlkl(double arc)
1050 {
1051         int k, r, ci, cj;
1052         double loglkl, fv, sitelkl;
1053
1054         
1055         /* compute tpms */
1056         for (r = 0; r < numcats; r++)
1057                 /* compute tpm for rate category r */
1058                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1059
1060         loglkl = 0.0;
1061         fv = 1.0-fracinv;
1062         
1063         if (numcats == 1) {
1064
1065                 for (k = 0; k < Numptrn; k++) { 
1066                 
1067                         /* compute likelihood for site k */
1068                         ci = seqchi[k];
1069                         cj = seqchj[k];
1070                         if (ci != tpmradix && cj != tpmradix)
1071                                 sitelkl = ltprobr[0][ci][cj]*fv;
1072                         else
1073                                 sitelkl = fv;
1074                         if (ci == cj && ci != tpmradix)
1075                                 sitelkl += fracinv*Freqtpm[ci];
1076                 
1077                         /* total log-likelihood */
1078                         loglkl += log(sitelkl)*Weight[k];
1079                 
1080                 }
1081
1082         } else {
1083         
1084                 for (k = 0; k < Numptrn; k++) {
1085                         
1086                         /* this general routine works always but it's better 
1087                to run it only when it's really necessary */
1088                 
1089                         /* compute likelihood for site k */
1090                         ci = seqchi[k];
1091                         cj = seqchj[k];
1092                         if (ci != tpmradix && cj != tpmradix) {
1093                                 sitelkl = 0.0;
1094                                 for (r = 0; r < numcats; r++)
1095                                         sitelkl += ltprobr[r][ci][cj];
1096                                 sitelkl = fv*sitelkl/(double) numcats;  
1097                         } else
1098                                 sitelkl = fv;
1099                         if (ci == cj && ci != tpmradix)
1100                                 sitelkl += fracinv*Freqtpm[ci];
1101                 
1102                         /* total log-likelihood */
1103                         loglkl += log(sitelkl)*Weight[k];
1104                 
1105                 }
1106
1107         }
1108         
1109         /* return negative log-likelihood as we use a minimizing procedure */
1110         return -loglkl;
1111 }
1112
1113
1114 /***************************** exported functions *****************************/
1115
1116
1117 /* maximum likelihood distance between sequence i and j */
1118 double mldistance(int i, int j)
1119 {
1120         double dist, fx, f2x;
1121         
1122         if (i == j) return 0.0;
1123         
1124         /* use old distance as start value */   
1125         dist = Distanmat[i][j];
1126
1127         if (dist == 0.0) return 0.0;
1128
1129         seqchi = Seqpat[i];
1130         seqchj = Seqpat[j];
1131
1132         if (dist <= MINARC) dist = MINARC+1.0;
1133         if (dist >= MAXARC) dist = MAXARC-1.0;
1134         
1135         dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x);
1136         
1137         return dist;
1138 }
1139
1140
1141 /* initialize distance matrix */
1142 void initdistan()
1143 {
1144         int i, j, k, diff, x, y;
1145         double obs, temp;
1146         
1147         for (i = 0; i < Maxspc; i++) {
1148                 Distanmat[i][i] = 0.0;
1149                 for (j = i + 1; j < Maxspc; j++) {
1150                         seqchi = Seqpat[i];
1151                         seqchj = Seqpat[j];
1152                         
1153                         /* count observed differences */
1154                         diff = 0;
1155                         for (k = 0; k < Numptrn; k++) {
1156                                 x = seqchi[k];
1157                                 y = seqchj[k];
1158                                 if (x != y &&
1159                                         x != tpmradix &&
1160                                         y != tpmradix)
1161                                         diff += Weight[k];
1162                         }
1163                         if (diff == 0)
1164                                 Distanmat[i][j] = 0.0;
1165                         else {
1166                                 /* use generalized JC correction to get first estimate
1167                                    (for the SH model the observed distance is used) */
1168                                 /* observed distance */
1169                                 obs = (double) diff / (double) Maxsite;
1170                                 temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1171                                 if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1172                                         /* use JC corrected distance */
1173                                         Distanmat[i][j] = -100.0*(tpmradix-1.0)/tpmradix * log(temp);
1174                                 else
1175                                         /* use observed distance */
1176                                         Distanmat[i][j] = obs * 100.0;
1177                                 if (Distanmat[i][j] < MINARC) Distanmat[i][j] = MINARC;
1178                                 if (Distanmat[i][j] > MAXARC) Distanmat[i][j] = MAXARC;
1179                         }
1180                         Distanmat[j][i] = Distanmat[i][j];
1181                 }
1182         }
1183 }
1184
1185 /* compute distance matrix */
1186 void computedistan()
1187 {
1188         int i, j;
1189
1190         for (i = 0; i < Maxspc; i++)
1191                 for (j = i; j < Maxspc; j++) {
1192                         Distanmat[i][j] = mldistance(i, j);
1193                         Distanmat[j][i] = Distanmat[i][j];
1194                 }
1195 }
1196
1197
1198 /******************************************************************************/
1199 /* computation of maximum likelihood edge lengths for a given tree            */
1200 /******************************************************************************/
1201
1202
1203 /***************************** internal functions *****************************/
1204
1205
1206 /* multiply partial likelihoods */
1207 void productpartials(Node *op)
1208 {
1209         Node *cp;
1210         int i, j, r;
1211         dcube opc, cpc;
1212
1213         cp = op;
1214         opc = op->partials;
1215         while (cp->isop->isop != op) {
1216                 cp = cp->isop;
1217                 cpc = cp->partials;
1218                 for (r = 0; r < numcats; r++)
1219                         for (i = 0; i < Numptrn; i++)
1220                                 for (j = 0; j < tpmradix; j++)
1221                                         opc[r][i][j] *= cpc[r][i][j];
1222         }
1223 }
1224
1225
1226 /* compute internal partial likelihoods */
1227 void partialsinternal(Node *op)
1228 {
1229         int i, j, k, r;
1230         double sum;
1231         dcube oprob, cprob;
1232
1233         if (clockmode == 1) { /* clocklike branch lengths */
1234                 for (r = 0; r < numcats; r++) {
1235                         tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1236                 }
1237         } else {  /* non-clocklike branch lengths */
1238                 for (r = 0; r < numcats; r++) {
1239                         tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1240                 }
1241         }
1242         
1243         oprob = op->partials;
1244         cprob = op->kinp->isop->partials;
1245         for (r = 0; r < numcats; r++) {
1246                 for (k = 0; k < Numptrn; k++) {
1247                         for (i = 0; i < tpmradix; i++) {
1248                                 sum = 0.0;
1249                                 for (j = 0; j < tpmradix; j++)
1250                                         sum += ltprobr[r][i][j] * cprob[r][k][j];
1251                                 oprob[r][k][i] = sum;
1252                         }
1253                 }
1254         }
1255 }
1256
1257
1258 /* compute external partial likelihoods */
1259 void partialsexternal(Node *op)
1260 {
1261         int i, j, k, r;
1262         dcube oprob;
1263         cvector dseqi;
1264
1265         if (clockmode == 1) { /* clocklike branch lengths */
1266                 for (r = 0; r < numcats; r++) {
1267                         tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1268                 }
1269         } else {  /* nonclocklike branch lengths */
1270                 for (r = 0; r < numcats; r++) {
1271                         tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1272                 }
1273         }
1274
1275         oprob = op->partials;
1276         dseqi = op->kinp->eprob;
1277         for (r = 0; r < numcats; r++) {
1278                 for (k = 0; k < Numptrn; k++) {
1279                         if ((j = dseqi[k]) == tpmradix) {
1280                                 for (i = 0; i < tpmradix; i++)
1281                                         oprob[r][k][i] = 1.0;
1282                         } else {
1283                                 for (i = 0; i < tpmradix; i++)
1284                                         oprob[r][k][i] = ltprobr[r][i][j];
1285                         }
1286                 }
1287         }
1288 }
1289
1290
1291 /* compute all partial likelihoods */
1292 void initpartials(Tree *tr)
1293 {
1294         Node *cp, *rp;
1295
1296         cp = rp = tr->rootp;
1297         do {
1298                 cp = cp->isop->kinp;
1299                 if (cp->isop == NULL) { /* external node */                     
1300                         cp = cp->kinp; /* not descen */
1301                         partialsexternal(cp);
1302                 } else { /* internal node */    
1303                         if (!cp->descen) {
1304                                 productpartials(cp->kinp->isop);
1305                                 partialsinternal(cp);
1306                         }
1307                 }
1308         } while (cp != rp);
1309 }
1310
1311
1312 /* compute log-likelihood given internal branch with length arc
1313    between partials partiali and partials partialj */
1314 double intlkl(double arc)
1315 {
1316         double sumlk, slk;
1317         int r, s, i, j;
1318         dmatrix cdl;
1319
1320         cdl = Ctree->condlkl;
1321         for (r = 0; r < numcats; r++) {
1322                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1323         }
1324         for (r = 0; r < numcats; r++) {
1325                 for (s = 0; s < Numptrn; s++) {
1326                         sumlk = 0.0;
1327                         for (i = 0; i < tpmradix; i++) {                        
1328                                 slk = 0.0;
1329                                 for (j = 0; j < tpmradix; j++) 
1330                                         slk += partialj[r][s][j] * ltprobr[r][i][j];            
1331                                 sumlk += Freqtpm[i] * partiali[r][s][i] * slk;  
1332                         }
1333                         cdl[r][s] = sumlk;
1334                 }
1335         }
1336
1337         /* compute total log-likelihood for current tree */
1338         Ctree->lklhd = comptotloglkl(cdl);
1339
1340         return -(Ctree->lklhd); /* we use a minimizing procedure */
1341 }
1342
1343
1344 /* optimize internal branch */
1345 void optinternalbranch(Node *op)
1346 {
1347         double arc, fx, f2x;
1348
1349         partiali = op->isop->partials;
1350         partialj = op->kinp->isop->partials;
1351         arc = op->length; /* nonclocklike branch lengths */
1352         if (arc <= MINARC) arc = MINARC+1.0;
1353         if (arc >= MAXARC) arc = MAXARC-1.0;
1354         arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x);
1355         op->kinp->length = arc;
1356         op->length = arc;
1357         
1358         /* variance of branch length */
1359         f2x = fabs(f2x);
1360         if (1.0/(MAXARC*MAXARC) < f2x)
1361                 op->varlen = 1.0/f2x;
1362         else
1363                 op->varlen = MAXARC*MAXARC;
1364 }
1365
1366
1367 /* compute log-likelihood given external branch with length arc
1368    between partials partiali and sequence seqchi */
1369 double extlkl(double arc)
1370 {
1371         double sumlk;
1372         int r, s, i, j;
1373         dvector opb;
1374         dmatrix cdl;
1375         
1376         cdl = Ctree->condlkl;
1377         for (r = 0; r < numcats; r++) {
1378                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1379         }
1380         for (r = 0; r < numcats; r++) {
1381                 for (s = 0; s < Numptrn; s++) {
1382                         opb = partiali[r][s];
1383                         sumlk = 0.0;
1384                         if ((j = seqchi[s]) != tpmradix) {
1385                                 for (i = 0; i < tpmradix; i++)
1386                                         sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1387                         } else {
1388                                 for (i = 0; i < tpmradix; i++)
1389                                         sumlk += Freqtpm[i] * opb[i];
1390                         }
1391                         cdl[r][s] = sumlk;
1392                 }
1393         }
1394         
1395         /* compute total log-likelihood for current tree */
1396         Ctree->lklhd = comptotloglkl(cdl);
1397
1398         return -(Ctree->lklhd); /* we use a minimizing procedure */
1399 }
1400
1401 /* optimize external branch */
1402 void optexternalbranch(Node *op)
1403 {
1404         double arc, fx, f2x;
1405
1406         partiali = op->isop->partials;
1407         seqchi = op->kinp->eprob;
1408         arc = op->length; /* nonclocklike branch lengths */
1409         if (arc <= MINARC) arc = MINARC+1.0;
1410         if (arc >= MAXARC) arc = MAXARC-1.0;
1411         arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x);
1412         op->kinp->length = arc;
1413         op->length = arc;
1414         
1415          /* variance of branch length */
1416         f2x = fabs(f2x);
1417         if (1.0/(MAXARC*MAXARC) < f2x)
1418                 op->varlen = 1.0/f2x;
1419         else
1420                 op->varlen = MAXARC*MAXARC;
1421 }
1422
1423
1424 /* finish likelihoods for each rate and site */
1425 void finishlkl(Node *op)
1426 {
1427         int r, k, i, j;
1428         double arc, sumlk, slk;
1429         dmatrix cdl;
1430
1431         partiali = op->isop->partials;
1432         partialj = op->kinp->isop->partials;
1433         cdl = Ctree->condlkl;
1434         arc = op->length; /* nonclocklike branch lengths */
1435         for (r = 0; r < numcats; r++) {
1436                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1437         }
1438         for (r = 0; r < numcats; r++) {
1439                 for (k = 0; k < Numptrn; k++) {
1440                         sumlk = 0.0;
1441                         for (i = 0; i < tpmradix; i++) {
1442                                 slk = 0.0;
1443                                 for (j = 0; j < tpmradix; j++)
1444                                         slk += partialj[r][k][j] * ltprobr[r][i][j];
1445                                 sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1446                         }                       
1447                         cdl[r][k] = sumlk;
1448                 }
1449         }
1450 }
1451
1452
1453 /***************************** exported functions *****************************/
1454
1455
1456 /* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1457 double optlkl(Tree *tr)
1458 {
1459         Node *cp, *rp;
1460         int nconv;
1461         double lendiff;
1462         
1463         clockmode = 0; /* nonclocklike branch lengths */
1464         nconv = 0;
1465         Converg = FALSE;
1466         initpartials(tr);
1467         for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1468                 
1469                 cp = rp = tr->rootp;
1470                 do {
1471                         cp = cp->isop->kinp;
1472                         productpartials(cp->kinp->isop);
1473                         if (cp->isop == NULL) { /* external node */     
1474                                 cp = cp->kinp; /* not descen */
1475                                 
1476                                 lendiff = cp->length;
1477                                 optexternalbranch(cp);
1478                                 lendiff = fabs(lendiff - cp->length);
1479                                 if (lendiff < EPSILON) nconv++;
1480                                 else nconv = 0;                         
1481                                 
1482                                 partialsexternal(cp);
1483                         } else { /* internal node */
1484                                 if (cp->descen) {
1485                                         partialsinternal(cp);
1486                                 } else {
1487                                         
1488                                         lendiff = cp->length;
1489                                         optinternalbranch(cp);
1490                                         lendiff = fabs(lendiff - cp->length);
1491                                         if (lendiff < EPSILON) nconv++;
1492                                         else nconv = 0;
1493                                         
1494                                         /* eventually compute likelihoods for each site */
1495                                         if ((cp->number == Numibrnch-1 && lendiff < EPSILON) ||
1496                                         Numit == MAXIT-1) finishlkl(cp);
1497
1498                                         partialsinternal(cp);
1499                                 }
1500                         }                       
1501                         if (nconv >= Numbrnch) { /* convergence */
1502                                 Converg = TRUE;
1503                                 cp = rp; /* get out of here */
1504                         }
1505                 } while (cp != rp);
1506         }
1507
1508         /* compute total log-likelihood for current tree */
1509         return comptotloglkl(tr->condlkl);
1510 }
1511
1512
1513 /* compute likelihood of tree for given branch lengths */
1514 double treelkl(Tree *tr)
1515 {
1516         int i, k, r;
1517         Node *cp;
1518         dmatrix cdl;
1519         dcube prob1, prob2;
1520         double sumlk;
1521
1522         /* compute for each site and rate log-likelihoods */
1523         initpartials(tr);
1524         cp = tr->rootp;
1525         productpartials(cp->isop);
1526         prob1 = cp->partials;
1527         prob2 = cp->isop->partials;
1528         cdl = tr->condlkl;
1529         for (r = 0; r < numcats; r++) {
1530                 for (k = 0; k < Numptrn; k++) {
1531                         sumlk = 0.0;
1532                         for (i = 0; i < tpmradix; i++)
1533                                 sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1534                         cdl[r][k] = sumlk;
1535                 }
1536         }       
1537         
1538         /* return total log-likelihood for current tree */
1539         return comptotloglkl(cdl);
1540 }
1541
1542
1543 /******************************************************************************/
1544 /* least-squares estimate of branch lengths                                   */
1545 /******************************************************************************/
1546
1547
1548 /***************************** internal functions *****************************/
1549
1550
1551 void luequation(dmatrix amat, dvector yvec, int size)
1552 {
1553     double eps = 1.0e-20; /* ! */
1554         int i, j, k, l, maxi=0, idx;
1555         double sum, tmp, maxb, aw;
1556         dvector wk;
1557         ivector index;
1558
1559
1560         wk = new_dvector(size);
1561         index = new_ivector(size);
1562         aw = 1.0;
1563         for (i = 0; i < size; i++) {
1564                 maxb = 0.0;
1565                 for (j = 0; j < size; j++) {
1566                         if (fabs(amat[i][j]) > maxb)
1567                                 maxb = fabs(amat[i][j]);
1568                 }
1569                 if (maxb == 0.0) {
1570                         /* Singular matrix */
1571                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1572                         exit(1);
1573                 }
1574                 wk[i] = 1.0 / maxb;
1575         }
1576         for (j = 0; j < size; j++) {
1577                 for (i = 0; i < j; i++) {
1578                         sum = amat[i][j];
1579                         for (k = 0; k < i; k++)
1580                                 sum -= amat[i][k] * amat[k][j];
1581                         amat[i][j] = sum;
1582                 }
1583                 maxb = 0.0;
1584                 for (i = j; i < size; i++) {
1585                         sum = amat[i][j];
1586                         for (k = 0; k < j; k++)
1587                                 sum -= amat[i][k] * amat[k][j];
1588                         amat[i][j] = sum;
1589                         tmp = wk[i] * fabs(sum);
1590                         if (tmp >= maxb) {
1591                                 maxb = tmp;
1592                                 maxi = i;
1593                         }
1594                 }
1595                 if (j != maxi) {
1596                         for (k = 0; k < size; k++) {
1597                                 tmp = amat[maxi][k];
1598                                 amat[maxi][k] = amat[j][k];
1599                                 amat[j][k] = tmp;
1600                         }
1601                         aw = -aw;
1602                         wk[maxi] = wk[j];
1603                 }
1604                 index[j] = maxi;
1605                 if (amat[j][j] == 0.0)
1606                         amat[j][j] = eps;
1607                 if (j != size - 1) {
1608                         tmp = 1.0 / amat[j][j];
1609                         for (i = j + 1; i < size; i++)
1610                                 amat[i][j] *= tmp;
1611                 }
1612         }
1613         l = -1;
1614         for (i = 0; i < size; i++) {
1615                 idx = index[i];
1616                 sum = yvec[idx];
1617                 yvec[idx] = yvec[i];
1618                 if (l != -1) {
1619                         for (j = l; j < i; j++)
1620                                 sum -= amat[i][j] * yvec[j];
1621                 } else if (sum != 0.0)
1622                         l = i;
1623                 yvec[i] = sum;
1624         }
1625         for (i = size - 1; i >= 0; i--) {
1626                 sum = yvec[i];
1627                 for (j = i + 1; j < size; j++)
1628                         sum -= amat[i][j] * yvec[j];
1629                 yvec[i] = sum / amat[i][i];
1630         }
1631         free_ivector(index);
1632         free_dvector(wk);
1633 }
1634
1635
1636 /* least square estimation of branch lengths
1637    used for the approximate ML and as starting point 
1638    in the calculation of the exact value of the ML */
1639 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1640 {
1641         int i, i1, j, j1, j2, k, numbrnch, numpair;
1642         double sum, leng, alllen, rss;
1643         ivector pths;
1644         dmatrix atmt, atamt;
1645         Node **ebp, **ibp;
1646
1647         numbrnch = numspc + numibrnch;
1648         numpair = (numspc * (numspc - 1)) / 2;
1649         atmt = new_dmatrix(numbrnch, numpair);
1650         atamt = new_dmatrix(numbrnch, numbrnch);
1651         ebp = tr->ebrnchp;
1652         ibp = tr->ibrnchp;
1653         for (i = 0; i < numspc; i++) {
1654                 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1655                         if (j1 == i) {
1656                                 for (j2 = 0; j2 < j1; j2++, j++) {
1657                                         atmt[i][j] = 1.0;
1658                                 }
1659                         } else {
1660                                 for (j2 = 0; j2 < j1; j2++, j++) {
1661                                         if (j2 == i)
1662                                                 atmt[i][j] = 1.0;
1663                                         else
1664                                                 atmt[i][j] = 0.0;
1665                                 }
1666                         }
1667                 }
1668         }
1669         for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
1670                 pths = ibp[i1]->paths;
1671                 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1672                         for (j2 = 0; j2 < j1; j2++, j++) {
1673                                 if (pths[j1] != pths[j2])
1674                                         atmt[i][j] = 1.0;
1675                                 else
1676                                         atmt[i][j] = 0.0;
1677                         }
1678                 }
1679         }
1680         for (i = 0; i < numbrnch; i++) {
1681                 for (j = 0; j <= i; j++) {
1682                         for (k = 0, sum = 0.0; k < numpair; k++)
1683                                 sum += atmt[i][k] * atmt[j][k];
1684                         atamt[i][j] = sum;
1685                         atamt[j][i] = sum;
1686                 }
1687         }
1688         for (i = 0; i < numbrnch; i++) {
1689                 for (k = 0, sum = 0.0; k < numpair; k++)
1690                         sum += atmt[i][k] * distanvec[k];
1691                 Brnlength[i] = sum;
1692         }
1693         luequation(atamt, Brnlength, numbrnch);
1694         for (i = 0, rss = 0.0; i < numpair; i++) {
1695                 sum = distanvec[i];
1696                 for (j = 0; j < numbrnch; j++) {
1697                         if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
1698                                 sum -= Brnlength[j];
1699                 }
1700                 rss += sum * sum;
1701         }
1702         tr->rssleast = sqrt(rss);
1703         alllen = 0.0;
1704         for (i = 0; i < numspc; i++) {
1705                 leng = Brnlength[i];
1706                 alllen += leng;
1707                 if (leng < MINARC) leng = MINARC;
1708                 if (leng > MAXARC) leng = MAXARC;
1709                 if (clockmode) { /* clock */
1710                         ebp[i]->lengthc = leng;
1711                         ebp[i]->kinp->lengthc = leng;
1712                 } else { /* no clock */
1713                         ebp[i]->length = leng;
1714                         ebp[i]->kinp->length = leng;
1715                 }       
1716                 Brnlength[i] = leng;
1717         }
1718         for (i = 0, j = numspc; i < numibrnch; i++, j++) {
1719                 leng = Brnlength[j];
1720                 alllen += leng;
1721                 if (leng < MINARC) leng = MINARC;
1722                 if (leng > MAXARC) leng = MAXARC;
1723                 if (clockmode) { /* clock */
1724                         ibp[i]->lengthc = leng;
1725                         ibp[i]->kinp->lengthc = leng;
1726                 } else { /* no clock */
1727                         ibp[i]->length = leng;
1728                         ibp[i]->kinp->length = leng;
1729                 }
1730                 Brnlength[j] = leng;
1731         }
1732         free_dmatrix(atmt);
1733         free_dmatrix(atamt);
1734 }