forester/archive/RIO/others/puzzle_mod/src/ml1.c

   1 /*
   2  * ml1.c
   3  *
   4  *
   5  * Part of TREE-PUZZLE 5.0 (June 2000)
   6  *
   7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
   8  *                  M. Vingron, and Arndt von Haeseler
   9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
  10  *
  11  * All parts of the source except where indicated are distributed under
  12  * the GNU public licence.  See http://www.opensource.org for details.
  13  */
  14
  15
  16 /******************************************************************************/
  17 /* definitions and prototypes                                                 */
  18 /******************************************************************************/
  19
  20 #define EXTERN extern
  21
  22 /* prototypes */
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <math.h>
  26 #include <ctype.h>
  27 #include "util.h"
  28 #include "ml.h"
  29
  30 #define STDOUT     stdout
  31 #ifndef PARALLEL             /* because printf() runs significantly faster */
  32                              /* than fprintf(stdout) on an Apple McIntosh  */
  33                              /* (HS) */
  34 #       define FPRINTF    printf
  35 #       define STDOUTFILE
  36 #else
  37 #       define FPRINTF    fprintf
  38 #       define STDOUTFILE STDOUT,
  39 #endif
  40
  41
  42 /******************************************************************************/
  43 /* compacting sequence data information                                       */
  44 /******************************************************************************/
  45
  46
  47 /***************************** internal functions *****************************/
  48
  49
  50 /* make all frequencies a little different */
  51 void convfreq(dvector freqemp)
  52 {
  53         int i, j, maxi=0;
  54         double freq, maxfreq, sum;
  55
  56
  57         sum = 0.0;
  58         maxfreq = 0.0;
  59         for (i = 0; i < tpmradix; i++) {
  60                 freq = freqemp[i];
  61                 if (freq < MINFREQ) freqemp[i] = MINFREQ;
  62                 if (freq > maxfreq) {
  63                         maxfreq = freq;
  64                         maxi = i;
  65                 }
  66                 sum += freqemp[i];
  67         }
  68         freqemp[maxi] += 1.0 - sum;
  69
  70         for (i = 0; i < tpmradix - 1; i++) {
  71                 for (j = i + 1; j < tpmradix; j++) {
  72                         if (freqemp[i] == freqemp[j]) {
  73                                 freqemp[i] += MINFDIFF/2.0;
  74                                 freqemp[j] -= MINFDIFF/2.0;
  75                         }
  76                 }
  77         }
  78 }
  79
  80 /* sort site patters of original input data */
  81 void radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
  82         int *numptrn)
  83 {
  84         int i, j, k, l, n, pass;
  85         int *awork;
  86         int *count;
  87
  88
  89         awork = new_ivector(maxsite);
  90         count = new_ivector(tpmradix+1);
  91         for (i = 0; i < maxsite; i++)
  92                 ali[i] = i;
  93         for (pass = maxspc - 1; pass >= 0; pass--) {
  94                 for (j = 0; j < tpmradix+1; j++)
  95                         count[j] = 0;
  96                 for (i = 0; i < maxsite; i++)
  97                         count[(int) seqchar[pass][ali[i]]]++;
  98                 for (j = 1; j < tpmradix+1; j++)
  99                         count[j] += count[j-1];
 100                 for (i = maxsite-1; i >= 0; i--)
 101                         awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
 102                 for (i = 0; i < maxsite; i++)
 103                         ali[i] = awork[i];
 104         }
 105         free_ivector(awork);
 106         free_ivector(count);
 107         n = 1;
 108         for (j = 1; j < maxsite; j++) {
 109                 k = ali[j];
 110                 l = ali[j-1];
 111                 for (i = 0; i < maxspc; i++) {
 112                         if (seqchar[i][l] != seqchar[i][k]) {
 113                                 n++;
 114                                 break;
 115                         }
 116                 }
 117         }
 118         *numptrn = n;
 119 }
 120
 121
 122 void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
 123         ivector weight, int maxspc, int maxsite, int numptrn)
 124 {
 125         int i, j, k, n;
 126         int agree_flag; /* boolean */
 127
 128
 129         n = 0;
 130         k = ali[n];
 131         for (i = 0; i < maxspc; i++) {
 132                 seqconint[i][n] = seqchar[i][k];
 133         }
 134         weight[n] = 1;
 135         Alias[k] = 0;
 136         for (j = 1; j < maxsite; j++) {
 137                 k = ali[j];
 138                 agree_flag = TRUE;
 139                 for (i = 0; i < maxspc; i++) {
 140                         if (seqconint[i][n] != seqchar[i][k]) {
 141                                 agree_flag = FALSE;
 142                                 break;
 143                         }
 144                 }
 145                 if (agree_flag == FALSE) {
 146                         n++;
 147                         for (i = 0; i < maxspc; i++) {
 148                                 seqconint[i][n] = seqchar[i][k];
 149                         }
 150                         weight[n] = 1;
 151                         Alias[k] = n;
 152                 } else {
 153                         weight[n]++;
 154                         Alias[k] = n;
 155                 }
 156         }
 157         n++;
 158         if (numptrn != n) {
 159                 /* Problem in condenceseq */
 160                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
 161                 exit(1);
 162         }
 163 }
 164
 165 void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
 166         int *numconst, int *numconstpat)
 167 {
 168         int character, s, i, constflag;
 169
 170         *numconst    = 0;
 171         *numconstpat = 0;
 172         for (s = 0; s < numptrn; s++) { /* check all patterns */
 173                 constpat[s] = FALSE;
 174                 constflag = TRUE;
 175                 character = seqpat[0][s];
 176                 for (i = 1; i < maxspc; i++) {
 177                         if (seqpat[i][s] != character) {
 178                                 constflag = FALSE;
 179                                 break;
 180                         }
 181                 }
 182                 if (character != tpmradix && constflag) {
 183                         (*numconst) = (*numconst) + weight[s];
 184                         (*numconstpat)++;
 185                         constpat[s] = TRUE;
 186                 }
 187         }
 188 }
 189
 190 /***************************** exported functions *****************************/
 191
 192
 193 void evaluateseqs()
 194 {
 195         ivector ali;
 196
 197         convfreq(Freqtpm); /* make all frequencies slightly different */
 198         ali = new_ivector(Maxsite);
 199         radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
 200         Seqpat = new_cmatrix(Maxspc, Numptrn);
 201         constpat = new_ivector(Numptrn);
 202         Weight = new_ivector(Numptrn);
 203         condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
 204         free_ivector(ali);
 205         countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
 206         fracconstpat = (double) Numconstpat / (double) Numptrn;
 207         fracconst    = (double) Numconst / (double) Maxsite;
 208 }
 209
 210
 211 /******************************************************************************/
 212 /* computation of Pij(t)                                                      */
 213 /******************************************************************************/
 214
 215
 216 /***************************** internal functions *****************************/
 217
 218
 219 void elmhes(dmatrix a, ivector ordr, int n)
 220 {
 221         int m, j, i;
 222         double y, x;
 223
 224
 225         for (i = 0; i < n; i++)
 226                 ordr[i] = 0;
 227         for (m = 2; m < n; m++) {
 228                 x = 0.0;
 229                 i = m;
 230                 for (j = m; j <= n; j++) {
 231                         if (fabs(a[j - 1][m - 2]) > fabs(x)) {
 232                                 x = a[j - 1][m - 2];
 233                                 i = j;
 234                         }
 235                 }
 236                 ordr[m - 1] = i;      /* vector */
 237                 if (i != m) {
 238                         for (j = m - 2; j < n; j++) {
 239                                 y = a[i - 1][j];
 240                                 a[i - 1][j] = a[m - 1][j];
 241                                 a[m - 1][j] = y;
 242                         }
 243                         for (j = 0; j < n; j++) {
 244                                 y = a[j][i - 1];
 245                                 a[j][i - 1] = a[j][m - 1];
 246                                 a[j][m - 1] = y;
 247                         }
 248                 }
 249                 if (x != 0.0) {
 250                         for (i = m; i < n; i++) {
 251                                 y = a[i][m - 2];
 252                                 if (y != 0.0) {
 253                                         y /= x;
 254                                         a[i][m - 2] = y;
 255                                         for (j = m - 1; j < n; j++)
 256                                                 a[i][j] -= y * a[m - 1][j];
 257                                         for (j = 0; j < n; j++)
 258                                                 a[j][m - 1] += y * a[j][i];
 259                                 }
 260                         }
 261                 }
 262         }
 263 }
 264
 265
 266 void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
 267 {
 268         int i, j, m;
 269
 270
 271         for (i = 0; i < n; i++) {
 272                 for (j = i + 1; j < n; j++) {
 273                         zz[i][j] = 0.0;
 274                         zz[j][i] = 0.0;
 275                 }
 276                 zz[i][i] = 1.0;
 277         }
 278         if (n <= 2)
 279                 return;
 280         for (m = n - 1; m >= 2; m--) {
 281                 for (i = m; i < n; i++)
 282                         zz[i][m - 1] = a[i][m - 2];
 283                 i = ordr[m - 1];
 284                 if (i != m) {
 285                         for (j = m - 1; j < n; j++) {
 286                                 zz[m - 1][j] = zz[i - 1][j];
 287                                 zz[i - 1][j] = 0.0;
 288                         }
 289                         zz[i - 1][m - 1] = 1.0;
 290                 }
 291         }
 292 }
 293
 294
 295 void mcdiv(double ar, double ai, double br, double bi,
 296         double *cr, double *ci)
 297 {
 298         double s, ars, ais, brs, bis;
 299
 300
 301         s = fabs(br) + fabs(bi);
 302         ars = ar / s;
 303         ais = ai / s;
 304         brs = br / s;
 305         bis = bi / s;
 306         s = brs * brs + bis * bis;
 307         *cr = (ars * brs + ais * bis) / s;
 308         *ci = (ais * brs - ars * bis) / s;
 309 }
 310
 311
 312 void hqr2(int n, int low, int hgh, dmatrix h,
 313         dmatrix zz, dvector wr, dvector wi)
 314 {
 315         int i, j, k, l=0, m, en, na, itn, its;
 316         double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
 317         int notlas; /* boolean */
 318
 319
 320         norm = 0.0;
 321         k = 1;
 322         /* store isolated roots and compute matrix norm */
 323         for (i = 0; i < n; i++) {
 324                 for (j = k - 1; j < n; j++)
 325                         norm += fabs(h[i][j]);
 326                 k = i + 1;
 327                 if (i + 1 < low || i + 1 > hgh) {
 328                         wr[i] = h[i][i];
 329                         wi[i] = 0.0;
 330                 }
 331         }
 332         en = hgh;
 333         t = 0.0;
 334         itn = n * 30;
 335         while (en >= low) {     /* search for next eigenvalues */
 336                 its = 0;
 337                 na = en - 1;
 338                 while (en >= 1) {
 339                         /* look for single small sub-diagonal element */
 340                         for (l = en; l > low; l--) {
 341                                 s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
 342                                 if (s == 0.0)
 343                                         s = norm;
 344                                 tst1 = s;
 345                                 tst2 = tst1 + fabs(h[l - 1][l - 2]);
 346                                 if (tst2 == tst1)
 347                                         goto L100;
 348                         }
 349                         l = low;
 350         L100:
 351                         x = h[en - 1][en - 1];  /* form shift */
 352                         if (l == en || l == na)
 353                                 break;
 354                         if (itn == 0) {
 355                                 /* all eigenvalues have not converged */
 356                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
 357                                 exit(1);
 358                         }
 359                         y = h[na - 1][na - 1];
 360                         w = h[en - 1][na - 1] * h[na - 1][en - 1];
 361                         /* form exceptional shift */
 362                         if (its == 10 || its == 20) {
 363                                 t += x;
 364                                 for (i = low - 1; i < en; i++)
 365                                         h[i][i] -= x;
 366                                 s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
 367                                 x = 0.75 * s;
 368                                 y = x;
 369                                 w = -0.4375 * s * s;
 370                         }
 371                         its++;
 372                         itn--;
 373                         /* look for two consecutive small sub-diagonal elements */
 374                         for (m = en - 2; m >= l; m--) {
 375                                 z = h[m - 1][m - 1];
 376                                 r = x - z;
 377                                 s = y - z;
 378                                 p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
 379                                 q = h[m][m] - z - r - s;
 380                                 r = h[m + 1][m];
 381                                 s = fabs(p) + fabs(q) + fabs(r);
 382                                 p /= s;
 383                                 q /= s;
 384                                 r /= s;
 385                                 if (m == l)
 386                                         break;
 387                                 tst1 = fabs(p) *
 388                                                 (fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
 389                                 tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
 390                                 if (tst2 == tst1)
 391                                         break;
 392                         }
 393                         for (i = m + 2; i <= en; i++) {
 394                                 h[i - 1][i - 3] = 0.0;
 395                                 if (i != m + 2)
 396                                         h[i - 1][i - 4] = 0.0;
 397                         }
 398                         for (k = m; k <= na; k++) {
 399                                 notlas = (k != na);
 400                                 if (k != m) {
 401                                         p = h[k - 1][k - 2];
 402                                         q = h[k][k - 2];
 403                                         r = 0.0;
 404                                         if (notlas)
 405                                                 r = h[k + 1][k - 2];
 406                                         x = fabs(p) + fabs(q) + fabs(r);
 407                                         if (x != 0.0) {
 408                                                 p /= x;
 409                                                 q /= x;
 410                                                 r /= x;
 411                                         }
 412                                 }
 413                                 if (x != 0.0) {
 414                                         if (p < 0.0) /* sign */
 415                                                 s = - sqrt(p * p + q * q + r * r);
 416                                         else
 417                                                 s = sqrt(p * p + q * q + r * r);
 418                                         if (k != m)
 419                                                 h[k - 1][k - 2] = -s * x;
 420                                         else {
 421                                                 if (l != m)
 422                                                         h[k - 1][k - 2] = -h[k - 1][k - 2];
 423                                         }
 424                                         p += s;
 425                                         x = p / s;
 426                                         y = q / s;
 427                                         z = r / s;
 428                                         q /= p;
 429                                         r /= p;
 430                                         if (!notlas) {
 431                                                 for (j = k - 1; j < n; j++) {   /* row modification */
 432                                                         p = h[k - 1][j] + q * h[k][j];
 433                                                         h[k - 1][j] -= p * x;
 434                                                         h[k][j] -= p * y;
 435                                                 }
 436                                                 j = (en < (k + 3)) ? en : (k + 3); /* min */
 437                                                 for (i = 0; i < j; i++) {       /* column modification */
 438                                                         p = x * h[i][k - 1] + y * h[i][k];
 439                                                         h[i][k - 1] -= p;
 440                                                         h[i][k] -= p * q;
 441                                                 }
 442                                                 /* accumulate transformations */
 443                                                 for (i = low - 1; i < hgh; i++) {
 444                                                         p = x * zz[i][k - 1] + y * zz[i][k];
 445                                                         zz[i][k - 1] -= p;
 446                                                         zz[i][k] -= p * q;
 447                                                 }
 448                                         } else {
 449                                                 for (j = k - 1; j < n; j++) {   /* row modification */
 450                                                         p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
 451                                                         h[k - 1][j] -= p * x;
 452                                                         h[k][j] -= p * y;
 453                                                         h[k + 1][j] -= p * z;
 454                                                 }
 455                                                 j = (en < (k + 3)) ? en : (k + 3); /* min */
 456                                                 for (i = 0; i < j; i++) {       /* column modification */
 457                                                         p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
 458                                                         h[i][k - 1] -= p;
 459                                                         h[i][k] -= p * q;
 460                                                         h[i][k + 1] -= p * r;
 461                                                 }
 462                                                 /* accumulate transformations */
 463                                                 for (i = low - 1; i < hgh; i++) {
 464                                                         p = x * zz[i][k - 1] + y * zz[i][k] +
 465                                                                 z * zz[i][k + 1];
 466                                                         zz[i][k - 1] -= p;
 467                                                         zz[i][k] -= p * q;
 468                                                         zz[i][k + 1] -= p * r;
 469                                                 }
 470                                         }
 471                                 }
 472                         }              /* for k */
 473                 }                      /* while infinite loop */
 474                 if (l == en) {         /* one root found */
 475                         h[en - 1][en - 1] = x + t;
 476                         wr[en - 1] = h[en - 1][en - 1];
 477                         wi[en - 1] = 0.0;
 478                         en = na;
 479                         continue;
 480                 }
 481                 y = h[na - 1][na - 1];
 482                 w = h[en - 1][na - 1] * h[na - 1][en - 1];
 483                 p = (y - x) / 2.0;
 484                 q = p * p + w;
 485                 z = sqrt(fabs(q));
 486                 h[en - 1][en - 1] = x + t;
 487                 x = h[en - 1][en - 1];
 488                 h[na - 1][na - 1] = y + t;
 489                 if (q >= 0.0) {        /* real pair */
 490                         if (p < 0.0) /* sign */
 491                                 z = p - fabs(z);
 492                         else
 493                                 z = p + fabs(z);
 494                         wr[na - 1] = x + z;
 495                         wr[en - 1] = wr[na - 1];
 496                         if (z != 0.0)
 497                                 wr[en - 1] = x - w / z;
 498                         wi[na - 1] = 0.0;
 499                         wi[en - 1] = 0.0;
 500                         x = h[en - 1][na - 1];
 501                         s = fabs(x) + fabs(z);
 502                         p = x / s;
 503                         q = z / s;
 504                         r = sqrt(p * p + q * q);
 505                         p /= r;
 506                         q /= r;
 507                         for (j = na - 1; j < n; j++) {  /* row modification */
 508                                 z = h[na - 1][j];
 509                                 h[na - 1][j] = q * z + p * h[en - 1][j];
 510                                 h[en - 1][j] = q * h[en - 1][j] - p * z;
 511                         }
 512                         for (i = 0; i < en; i++) {      /* column modification */
 513                                 z = h[i][na - 1];
 514                                 h[i][na - 1] = q * z + p * h[i][en - 1];
 515                                 h[i][en - 1] = q * h[i][en - 1] - p * z;
 516                         }
 517                         /* accumulate transformations */
 518                         for (i = low - 1; i < hgh; i++) {
 519                                 z = zz[i][na - 1];
 520                                 zz[i][na - 1] = q * z + p * zz[i][en - 1];
 521                                 zz[i][en - 1] = q * zz[i][en - 1] - p * z;
 522                         }
 523                 } else {               /* complex pair */
 524                         wr[na - 1] = x + p;
 525                         wr[en - 1] = x + p;
 526                         wi[na - 1] = z;
 527                         wi[en - 1] = -z;
 528                 }
 529                 en -= 2;
 530         }                              /* while en >= low */
 531         /* backsubstitute to find vectors of upper triangular form */
 532         if (norm != 0.0) {
 533                 for (en = n; en >= 1; en--) {
 534                         p = wr[en - 1];
 535                         q = wi[en - 1];
 536                         na = en - 1;
 537                         if (q == 0.0) {/* real vector */
 538                                 m = en;
 539                                 h[en - 1][en - 1] = 1.0;
 540                                 if (na != 0) {
 541                                         for (i = en - 2; i >= 0; i--) {
 542                                                 w = h[i][i] - p;
 543                                                 r = 0.0;
 544                                                 for (j = m - 1; j < en; j++)
 545                                                         r += h[i][j] * h[j][en - 1];
 546                                                 if (wi[i] < 0.0) {
 547                                                         z = w;
 548                                                         s = r;
 549                                                 } else {
 550                                                         m = i + 1;
 551                                                         if (wi[i] == 0.0) {
 552                                                                 t = w;
 553                                                                 if (t == 0.0) {
 554                                                                         tst1 = norm;
 555                                                                         t = tst1;
 556                                                                         do {
 557                                                                                 t = 0.01 * t;
 558                                                                                 tst2 = norm + t;
 559                                                                         } while (tst2 > tst1);
 560                                                                 }
 561                                                                 h[i][en - 1] = -(r / t);
 562                                                         } else {        /* solve real equations */
 563                                                                 x = h[i][i + 1];
 564                                                                 y = h[i + 1][i];
 565                                                                 q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
 566                                                                 t = (x * s - z * r) / q;
 567                                                                 h[i][en - 1] = t;
 568                                                                 if (fabs(x) > fabs(z))
 569                                                                         h[i + 1][en - 1] = (-r - w * t) / x;
 570                                                                 else
 571                                                                         h[i + 1][en - 1] = (-s - y * t) / z;
 572                                                         }
 573                                                         /* overflow control */
 574                                                         t = fabs(h[i][en - 1]);
 575                                                         if (t != 0.0) {
 576                                                                 tst1 = t;
 577                                                                 tst2 = tst1 + 1.0 / tst1;
 578                                                                 if (tst2 <= tst1) {
 579                                                                         for (j = i; j < en; j++)
 580                                                                                 h[j][en - 1] /= t;
 581                                                                 }
 582                                                         }
 583                                                 }
 584                                         }
 585                                 }
 586                         } else if (q > 0.0) {
 587                                 m = na;
 588                                 if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
 589                                         h[na - 1][na - 1] = q / h[en - 1][na - 1];
 590                                         h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
 591                                                                                                                 h[en - 1][na - 1];
 592                                 } else
 593                                         mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
 594                                                 &h[na - 1][na - 1], &h[na - 1][en - 1]);
 595                                 h[en - 1][na - 1] = 0.0;
 596                                 h[en - 1][en - 1] = 1.0;
 597                                 if (en != 2) {
 598                                         for (i = en - 3; i >= 0; i--) {
 599                                                 w = h[i][i] - p;
 600                                                 ra = 0.0;
 601                                                 sa = 0.0;
 602                                                 for (j = m - 1; j < en; j++) {
 603                                                         ra += h[i][j] * h[j][na - 1];
 604                                                         sa += h[i][j] * h[j][en - 1];
 605                                                 }
 606                                                 if (wi[i] < 0.0) {
 607                                                         z = w;
 608                                                         r = ra;
 609                                                         s = sa;
 610                                                 } else {
 611                                                         m = i + 1;
 612                                                         if (wi[i] == 0.0)
 613                                                                 mcdiv(-ra, -sa, w, q, &h[i][na - 1],
 614                                                                         &h[i][en - 1]);
 615                                                         else {  /* solve complex equations */
 616                                                                 x = h[i][i + 1];
 617                                                                 y = h[i + 1][i];
 618                                                                 vr = (wr[i] - p) * (wr[i] - p);
 619                                                                 vr = vr + wi[i] * wi[i] - q * q;
 620                                                                 vi = (wr[i] - p) * 2.0 * q;
 621                                                                 if (vr == 0.0 && vi == 0.0) {
 622                                                                         tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
 623                                                                                                    fabs(y) + fabs(z));
 624                                                                         vr = tst1;
 625                                                                         do {
 626                                                                                 vr = 0.01 * vr;
 627                                                                                 tst2 = tst1 + vr;
 628                                                                         } while (tst2 > tst1);
 629                                                                 }
 630                                                                 mcdiv(x * r - z * ra + q * sa,
 631                                                                          x * s - z * sa - q * ra, vr, vi,
 632                                                                      &h[i][na - 1], &h[i][en - 1]);
 633                                                                 if (fabs(x) > fabs(z) + fabs(q)) {
 634                                                                         h[i + 1]
 635                                                                                 [na - 1] = (q * h[i][en - 1] -
 636                                                                                                         w * h[i][na - 1] - ra) / x;
 637                                                                         h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
 638                                                                                                                 q * h[i][na - 1]) / x;
 639                                                                 } else
 640                                                                         mcdiv(-r - y * h[i][na - 1],
 641                                                                                  -s - y * h[i][en - 1], z, q,
 642                                                                              &h[i + 1][na - 1], &h[i + 1][en - 1]);
 643                                                         }
 644                                                         /* overflow control */
 645                                                         t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
 646                                                                  fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
 647                                                         if (t != 0.0) {
 648                                                                 tst1 = t;
 649                                                                 tst2 = tst1 + 1.0 / tst1;
 650                                                                 if (tst2 <= tst1) {
 651                                                                         for (j = i; j < en; j++) {
 652                                                                                 h[j][na - 1] /= t;
 653                                                                                 h[j][en - 1] /= t;
 654                                                                         }
 655                                                                 }
 656                                                         }
 657                                                 }
 658                                         }
 659                                 }
 660                         }
 661                 }
 662                 /* end back substitution. vectors of isolated roots */
 663                 for (i = 0; i < n; i++) {
 664                         if (i + 1 < low || i + 1 > hgh) {
 665                                 for (j = i; j < n; j++)
 666                                         zz[i][j] = h[i][j];
 667                         }
 668                 }
 669                 /* multiply by transformation matrix to give vectors of
 670                  * original full matrix. */
 671                 for (j = n - 1; j >= low - 1; j--) {
 672                         m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
 673                         for (i = low - 1; i < hgh; i++) {
 674                                 z = 0.0;
 675                                 for (k = low - 1; k < m; k++)
 676                                         z += zz[i][k] * h[k][j];
 677                                 zz[i][j] = z;
 678                         }
 679                 }
 680         }
 681         return;
 682 }
 683
 684
 685 /* make rate matrix with 0.01 expected substitutions per unit time */
 686 void onepamratematrix(dmatrix a)
 687 {
 688         int i, j;
 689         double delta, temp, sum;
 690         dvector m;
 691
 692         for (i = 0; i < tpmradix; i++)
 693         {
 694                 for (j = 0; j < tpmradix; j++)
 695                 {
 696                         a[i][j] = Freqtpm[j]*a[i][j];
 697                 }
 698         }
 699
 700         m = new_dvector(tpmradix);
 701         for (i = 0, sum = 0.0; i < tpmradix; i++)
 702         {
 703                 for (j = 0, temp = 0.0; j < tpmradix; j++)
 704                         temp += a[i][j];
 705                 m[i] = temp; /* row sum */
 706                 sum += temp*Freqtpm[i]; /* exp. rate */
 707         }
 708         delta = 0.01 / sum; /* 0.01 subst. per unit time */
 709         for (i = 0; i < tpmradix; i++) {
 710                 for (j = 0; j < tpmradix; j++) {
 711                         if (i != j)
 712                                 a[i][j] = delta * a[i][j];
 713                         else
 714                                 a[i][j] = delta * (-m[i]);
 715                 }
 716         }
 717         free_dvector(m);
 718 }
 719
 720
 721 void eigensystem(dvector eval, dmatrix evec)
 722 {
 723         dvector evali, forg;
 724         dmatrix a, b;
 725         ivector ordr;
 726         int i, j, k, error;
 727         double zero;
 728
 729
 730         ordr = new_ivector(tpmradix);
 731         evali = new_dvector(tpmradix);
 732         forg = new_dvector(tpmradix);
 733         a = new_dmatrix(tpmradix,tpmradix);
 734         b = new_dmatrix(tpmradix,tpmradix);
 735
 736         rtfdata(a, forg); /* get relative transition matrix and frequencies */
 737
 738         onepamratematrix(a); /* make 1 PAM rate matrix */
 739
 740         /* copy a to b */
 741         for (i = 0; i < tpmradix; i++)
 742                 for (j = 0; j < tpmradix; j++)
 743                         b[i][j] = a[i][j];
 744
 745         elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
 746         eltran(a, evec, ordr, tpmradix);
 747         hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
 748
 749         /* check eigenvalue equation */
 750         error = FALSE;
 751         for (j = 0; j < tpmradix; j++) {
 752                 for (i = 0, zero = 0.0; i < tpmradix; i++) {
 753                         for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
 754                         zero -= eval[j] * evec[i][j];
 755                         if (fabs(zero) > 1.0e-5)
 756                                 error = TRUE;
 757                 }
 758         }
 759         if (error)
 760                 FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
 761
 762         free_ivector(ordr);
 763         free_dvector(evali);
 764         free_dvector(forg);
 765         free_dmatrix(a);
 766         free_dmatrix(b);
 767 }
 768
 769
 770 void luinverse(dmatrix inmat, dmatrix imtrx, int size)
 771 {
 772     double eps = 1.0e-20; /* ! */
 773         int i, j, k, l, maxi=0, idx, ix, jx;
 774         double sum, tmp, maxb, aw;
 775         ivector index;
 776         double *wk;
 777         dmatrix omtrx;
 778
 779
 780         index = new_ivector(tpmradix);
 781         omtrx = new_dmatrix(tpmradix,tpmradix);
 782
 783         /* copy inmat to omtrx */
 784         for (i = 0; i < tpmradix; i++)
 785                 for (j = 0; j < tpmradix; j++)
 786                         omtrx[i][j] = inmat[i][j];
 787
 788         wk = (double *) malloc((unsigned)size * sizeof(double));
 789         aw = 1.0;
 790         for (i = 0; i < size; i++) {
 791                 maxb = 0.0;
 792                 for (j = 0; j < size; j++) {
 793                         if (fabs(omtrx[i][j]) > maxb)
 794                                 maxb = fabs(omtrx[i][j]);
 795                 }
 796                 if (maxb == 0.0) {
 797                         /* Singular matrix */
 798                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
 799                         exit(1);
 800                 }
 801                 wk[i] = 1.0 / maxb;
 802         }
 803         for (j = 0; j < size; j++) {
 804                 for (i = 0; i < j; i++) {
 805                         sum = omtrx[i][j];
 806                         for (k = 0; k < i; k++)
 807                                 sum -= omtrx[i][k] * omtrx[k][j];
 808                         omtrx[i][j] = sum;
 809                 }
 810                 maxb = 0.0;
 811                 for (i = j; i < size; i++) {
 812                         sum = omtrx[i][j];
 813                         for (k = 0; k < j; k++)
 814                                 sum -= omtrx[i][k] * omtrx[k][j];
 815                         omtrx[i][j] = sum;
 816                         tmp = wk[i] * fabs(sum);
 817                         if (tmp >= maxb) {
 818                                 maxb = tmp;
 819                                 maxi = i;
 820                         }
 821                 }
 822                 if (j != maxi) {
 823                         for (k = 0; k < size; k++) {
 824                                 tmp = omtrx[maxi][k];
 825                                 omtrx[maxi][k] = omtrx[j][k];
 826                                 omtrx[j][k] = tmp;
 827                         }
 828                         aw = -aw;
 829                         wk[maxi] = wk[j];
 830                 }
 831                 index[j] = maxi;
 832                 if (omtrx[j][j] == 0.0)
 833                         omtrx[j][j] = eps;
 834                 if (j != size - 1) {
 835                         tmp = 1.0 / omtrx[j][j];
 836                         for (i = j + 1; i < size; i++)
 837                                 omtrx[i][j] *= tmp;
 838                 }
 839         }
 840         for (jx = 0; jx < size; jx++) {
 841                 for (ix = 0; ix < size; ix++)
 842                         wk[ix] = 0.0;
 843                 wk[jx] = 1.0;
 844                 l = -1;
 845                 for (i = 0; i < size; i++) {
 846                         idx = index[i];
 847                         sum = wk[idx];
 848                         wk[idx] = wk[i];
 849                         if (l != -1) {
 850                                 for (j = l; j < i; j++)
 851                                         sum -= omtrx[i][j] * wk[j];
 852                         } else if (sum != 0.0)
 853                                 l = i;
 854                         wk[i] = sum;
 855                 }
 856                 for (i = size - 1; i >= 0; i--) {
 857                         sum = wk[i];
 858                         for (j = i + 1; j < size; j++)
 859                                 sum -= omtrx[i][j] * wk[j];
 860                         wk[i] = sum / omtrx[i][i];
 861                 }
 862                 for (ix = 0; ix < size; ix++)
 863                         imtrx[ix][jx] = wk[ix];
 864         }
 865         free((char *)wk);
 866         wk = NULL;
 867         free_ivector(index);
 868         free_dmatrix(omtrx);
 869 }
 870
 871
 872 void checkevector(dmatrix evec, dmatrix ivec, int nn)
 873 {
 874         int i, j, ia, ib, ic, error;
 875         dmatrix matx;
 876         double sum;
 877
 878
 879         matx = new_dmatrix(nn, nn);
 880         /* multiply matrix of eigenvectors and its inverse */
 881         for (ia = 0; ia < nn; ia++) {
 882                 for (ic = 0; ic < nn; ic++) {
 883                         sum = 0.0;
 884                         for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
 885                         matx[ia][ic] = sum;
 886                 }
 887         }
 888         /* check whether the unitary matrix is obtained */
 889         error = FALSE;
 890         for (i = 0; i < nn; i++) {
 891                 for (j = 0; j < nn; j++) {
 892                         if (i == j) {
 893                                 if (fabs(matx[i][j] - 1.0) > 1.0e-5)
 894                                         error = TRUE;
 895                         } else {
 896                                 if (fabs(matx[i][j]) > 1.0e-5)
 897                                         error = TRUE;
 898                         }
 899                 }
 900         }
 901         if (error) {
 902                 FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
 903         }
 904         free_dmatrix(matx);
 905 }
 906
 907
 908 /***************************** exported functions *****************************/
 909
 910
 911 /* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
 912 void tranprobmat()
 913 {
 914         eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
 915         luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
 916         checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
 917 }
 918
 919
 920 /* compute P(t) */
 921 void tprobmtrx(double arc, dmatrix tpr)
 922 {
 923         register int i, j, k;
 924         register double temp;
 925
 926
 927         for (k = 0; k < tpmradix; k++) {
 928                 temp = exp(arc * Eval[k]);
 929                 for (j = 0; j < tpmradix; j++)
 930                         iexp[k][j] = Ievc[k][j] * temp;
 931         }
 932         for (i = 0; i < tpmradix; i++) {
 933                 for (j = 0; j < tpmradix; j++) {
 934                         temp = 0.0;
 935                         for (k = 0; k < tpmradix; k++)
 936                                 temp += Evec[i][k] * iexp[k][j];
 937                         tpr[i][j] = fabs(temp);
 938                 }
 939         }
 940 }
 941
 942
 943 /******************************************************************************/
 944 /* estimation of maximum likelihood distances                                 */
 945 /******************************************************************************/
 946
 947 /* compute total log-likelihood
 948    input: likelihoods for each site and non-zero rate
 949    output: total log-likelihood (incl. zero rate category) */
 950 double comptotloglkl(dmatrix cdl)
 951 {
 952         int k, r;
 953         double loglkl, fv, fv2, sitelkl;
 954
 955         loglkl = 0.0;
 956         fv = 1.0-fracinv;
 957         fv2 = (1.0-fracinv)/(double) numcats;
 958
 959         if (numcats == 1) {
 960
 961                 for (k = 0; k < Numptrn; k++) {
 962
 963                         /* compute likelihood for pattern k */
 964                         sitelkl = cdl[0][k]*fv;
 965                         if (constpat[k] == TRUE)
 966                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
 967
 968                         /* total log-likelihood */
 969                         loglkl += log(sitelkl)*Weight[k];
 970
 971                 }
 972
 973         } else {
 974
 975                 for (k = 0; k < Numptrn; k++) {
 976
 977                         /* this general routine works always but it's better
 978                to run it only when it's really necessary */
 979
 980                         /* compute likelihood for pattern k */
 981                         sitelkl = 0.0;
 982                         for (r = 0; r < numcats; r++)
 983                                 sitelkl += cdl[r][k];
 984                         sitelkl = fv2*sitelkl;
 985                         if (constpat[k] == TRUE)
 986                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
 987
 988                         /* total log-likelihood */
 989                         loglkl += log(sitelkl)*Weight[k];
 990
 991                 }
 992
 993         }
 994
 995         return loglkl;
 996 }
 997
 998
 999 /* computes the site log-likelihoods
1000    input: likelihoods for each site and non-zero rate
1001    output: log-likelihood for each site */
1002 void allsitelkl(dmatrix cdl, dvector aslkl)
1003 {
1004         int k, r;
1005         double fv, fv2, sitelkl;
1006
1007         fv = 1.0-fracinv;
1008         fv2 = (1.0-fracinv)/(double) numcats;
1009
1010         if (numcats == 1) {
1011
1012                 for (k = 0; k < Numptrn; k++) {
1013
1014                         /* compute likelihood for pattern k */
1015                         sitelkl = cdl[0][k]*fv;
1016                         if (constpat[k] == TRUE)
1017                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1018
1019                         /* site log-likelihood */
1020                         aslkl[k] = log(sitelkl);
1021                 }
1022
1023         } else {
1024
1025                 for (k = 0; k < Numptrn; k++) {
1026
1027                         /* this general routine works always but it's better
1028                to run it only when it's really necessary */
1029
1030                         /* compute likelihood for pattern k */
1031                         sitelkl = 0.0;
1032                         for (r = 0; r < numcats; r++)
1033                                 sitelkl += cdl[r][k];
1034                         sitelkl = fv2*sitelkl;
1035                         if (constpat[k] == TRUE)
1036                                 sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1037
1038                         /* total log-likelihood */
1039                         aslkl[k] = log(sitelkl);
1040
1041                 }
1042         }
1043 }
1044
1045
1046 /***************************** internal functions *****************************/
1047
1048 /* compute negative log-likelihood of distance arc between sequences seqchi/j */
1049 double pairlkl(double arc)
1050 {
1051         int k, r, ci, cj;
1052         double loglkl, fv, sitelkl;
1053
1054
1055         /* compute tpms */
1056         for (r = 0; r < numcats; r++)
1057                 /* compute tpm for rate category r */
1058                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1059
1060         loglkl = 0.0;
1061         fv = 1.0-fracinv;
1062
1063         if (numcats == 1) {
1064
1065                 for (k = 0; k < Numptrn; k++) {
1066
1067                         /* compute likelihood for site k */
1068                         ci = seqchi[k];
1069                         cj = seqchj[k];
1070                         if (ci != tpmradix && cj != tpmradix)
1071                                 sitelkl = ltprobr[0][ci][cj]*fv;
1072                         else
1073                                 sitelkl = fv;
1074                         if (ci == cj && ci != tpmradix)
1075                                 sitelkl += fracinv*Freqtpm[ci];
1076
1077                         /* total log-likelihood */
1078                         loglkl += log(sitelkl)*Weight[k];
1079
1080                 }
1081
1082         } else {
1083
1084                 for (k = 0; k < Numptrn; k++) {
1085
1086                         /* this general routine works always but it's better
1087                to run it only when it's really necessary */
1088
1089                         /* compute likelihood for site k */
1090                         ci = seqchi[k];
1091                         cj = seqchj[k];
1092                         if (ci != tpmradix && cj != tpmradix) {
1093                                 sitelkl = 0.0;
1094                                 for (r = 0; r < numcats; r++)
1095                                         sitelkl += ltprobr[r][ci][cj];
1096                                 sitelkl = fv*sitelkl/(double) numcats;
1097                         } else
1098                                 sitelkl = fv;
1099                         if (ci == cj && ci != tpmradix)
1100                                 sitelkl += fracinv*Freqtpm[ci];
1101
1102                         /* total log-likelihood */
1103                         loglkl += log(sitelkl)*Weight[k];
1104
1105                 }
1106
1107         }
1108
1109         /* return negative log-likelihood as we use a minimizing procedure */
1110         return -loglkl;
1111 }
1112
1113
1114 /***************************** exported functions *****************************/
1115
1116
1117 /* maximum likelihood distance between sequence i and j */
1118 double mldistance(int i, int j)
1119 {
1120         double dist, fx, f2x;
1121
1122         if (i == j) return 0.0;
1123
1124         /* use old distance as start value */
1125         dist = Distanmat[i][j];
1126
1127         if (dist == 0.0) return 0.0;
1128
1129         seqchi = Seqpat[i];
1130         seqchj = Seqpat[j];
1131
1132         if (dist <= MINARC) dist = MINARC+1.0;
1133         if (dist >= MAXARC) dist = MAXARC-1.0;
1134
1135         dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x);
1136
1137         return dist;
1138 }
1139
1140
1141 /* initialize distance matrix */
1142 void initdistan()
1143 {
1144         int i, j, k, diff, x, y;
1145         double obs, temp;
1146
1147         for (i = 0; i < Maxspc; i++) {
1148                 Distanmat[i][i] = 0.0;
1149                 for (j = i + 1; j < Maxspc; j++) {
1150                         seqchi = Seqpat[i];
1151                         seqchj = Seqpat[j];
1152
1153                         /* count observed differences */
1154                         diff = 0;
1155                         for (k = 0; k < Numptrn; k++) {
1156                                 x = seqchi[k];
1157                                 y = seqchj[k];
1158                                 if (x != y &&
1159                                         x != tpmradix &&
1160                                         y != tpmradix)
1161                                         diff += Weight[k];
1162                         }
1163                         if (diff == 0)
1164                                 Distanmat[i][j] = 0.0;
1165                         else {
1166                                 /* use generalized JC correction to get first estimate
1167                                    (for the SH model the observed distance is used) */
1168                                 /* observed distance */
1169                                 obs = (double) diff / (double) Maxsite;
1170                                 temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1171                                 if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1172                                         /* use JC corrected distance */
1173                                         Distanmat[i][j] = -100.0*(tpmradix-1.0)/tpmradix * log(temp);
1174                                 else
1175                                         /* use observed distance */
1176                                         Distanmat[i][j] = obs * 100.0;
1177                                 if (Distanmat[i][j] < MINARC) Distanmat[i][j] = MINARC;
1178                                 if (Distanmat[i][j] > MAXARC) Distanmat[i][j] = MAXARC;
1179                         }
1180                         Distanmat[j][i] = Distanmat[i][j];
1181                 }
1182         }
1183 }
1184
1185 /* compute distance matrix */
1186 void computedistan()
1187 {
1188         int i, j;
1189
1190         for (i = 0; i < Maxspc; i++)
1191                 for (j = i; j < Maxspc; j++) {
1192                         Distanmat[i][j] = mldistance(i, j);
1193                         Distanmat[j][i] = Distanmat[i][j];
1194                 }
1195 }
1196
1197
1198 /******************************************************************************/
1199 /* computation of maximum likelihood edge lengths for a given tree            */
1200 /******************************************************************************/
1201
1202
1203 /***************************** internal functions *****************************/
1204
1205
1206 /* multiply partial likelihoods */
1207 void productpartials(Node *op)
1208 {
1209         Node *cp;
1210         int i, j, r;
1211         dcube opc, cpc;
1212
1213         cp = op;
1214         opc = op->partials;
1215         while (cp->isop->isop != op) {
1216                 cp = cp->isop;
1217                 cpc = cp->partials;
1218                 for (r = 0; r < numcats; r++)
1219                         for (i = 0; i < Numptrn; i++)
1220                                 for (j = 0; j < tpmradix; j++)
1221                                         opc[r][i][j] *= cpc[r][i][j];
1222         }
1223 }
1224
1225
1226 /* compute internal partial likelihoods */
1227 void partialsinternal(Node *op)
1228 {
1229         int i, j, k, r;
1230         double sum;
1231         dcube oprob, cprob;
1232
1233         if (clockmode == 1) { /* clocklike branch lengths */
1234                 for (r = 0; r < numcats; r++) {
1235                         tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1236                 }
1237         } else {  /* non-clocklike branch lengths */
1238                 for (r = 0; r < numcats; r++) {
1239                         tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1240                 }
1241         }
1242
1243         oprob = op->partials;
1244         cprob = op->kinp->isop->partials;
1245         for (r = 0; r < numcats; r++) {
1246                 for (k = 0; k < Numptrn; k++) {
1247                         for (i = 0; i < tpmradix; i++) {
1248                                 sum = 0.0;
1249                                 for (j = 0; j < tpmradix; j++)
1250                                         sum += ltprobr[r][i][j] * cprob[r][k][j];
1251                                 oprob[r][k][i] = sum;
1252                         }
1253                 }
1254         }
1255 }
1256
1257
1258 /* compute external partial likelihoods */
1259 void partialsexternal(Node *op)
1260 {
1261         int i, j, k, r;
1262         dcube oprob;
1263         cvector dseqi;
1264
1265         if (clockmode == 1) { /* clocklike branch lengths */
1266                 for (r = 0; r < numcats; r++) {
1267                         tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1268                 }
1269         } else {  /* nonclocklike branch lengths */
1270                 for (r = 0; r < numcats; r++) {
1271                         tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1272                 }
1273         }
1274
1275         oprob = op->partials;
1276         dseqi = op->kinp->eprob;
1277         for (r = 0; r < numcats; r++) {
1278                 for (k = 0; k < Numptrn; k++) {
1279                         if ((j = dseqi[k]) == tpmradix) {
1280                                 for (i = 0; i < tpmradix; i++)
1281                                         oprob[r][k][i] = 1.0;
1282                         } else {
1283                                 for (i = 0; i < tpmradix; i++)
1284                                         oprob[r][k][i] = ltprobr[r][i][j];
1285                         }
1286                 }
1287         }
1288 }
1289
1290
1291 /* compute all partial likelihoods */
1292 void initpartials(Tree *tr)
1293 {
1294         Node *cp, *rp;
1295
1296         cp = rp = tr->rootp;
1297         do {
1298                 cp = cp->isop->kinp;
1299                 if (cp->isop == NULL) { /* external node */
1300                         cp = cp->kinp; /* not descen */
1301                         partialsexternal(cp);
1302                 } else { /* internal node */
1303                         if (!cp->descen) {
1304                                 productpartials(cp->kinp->isop);
1305                                 partialsinternal(cp);
1306                         }
1307                 }
1308         } while (cp != rp);
1309 }
1310
1311
1312 /* compute log-likelihood given internal branch with length arc
1313    between partials partiali and partials partialj */
1314 double intlkl(double arc)
1315 {
1316         double sumlk, slk;
1317         int r, s, i, j;
1318         dmatrix cdl;
1319
1320         cdl = Ctree->condlkl;
1321         for (r = 0; r < numcats; r++) {
1322                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1323         }
1324         for (r = 0; r < numcats; r++) {
1325                 for (s = 0; s < Numptrn; s++) {
1326                         sumlk = 0.0;
1327                         for (i = 0; i < tpmradix; i++) {
1328                                 slk = 0.0;
1329                                 for (j = 0; j < tpmradix; j++)
1330                                         slk += partialj[r][s][j] * ltprobr[r][i][j];
1331                                 sumlk += Freqtpm[i] * partiali[r][s][i] * slk;
1332                         }
1333                         cdl[r][s] = sumlk;
1334                 }
1335         }
1336
1337         /* compute total log-likelihood for current tree */
1338         Ctree->lklhd = comptotloglkl(cdl);
1339
1340         return -(Ctree->lklhd); /* we use a minimizing procedure */
1341 }
1342
1343
1344 /* optimize internal branch */
1345 void optinternalbranch(Node *op)
1346 {
1347         double arc, fx, f2x;
1348
1349         partiali = op->isop->partials;
1350         partialj = op->kinp->isop->partials;
1351         arc = op->length; /* nonclocklike branch lengths */
1352         if (arc <= MINARC) arc = MINARC+1.0;
1353         if (arc >= MAXARC) arc = MAXARC-1.0;
1354         arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x);
1355         op->kinp->length = arc;
1356         op->length = arc;
1357
1358         /* variance of branch length */
1359         f2x = fabs(f2x);
1360         if (1.0/(MAXARC*MAXARC) < f2x)
1361                 op->varlen = 1.0/f2x;
1362         else
1363                 op->varlen = MAXARC*MAXARC;
1364 }
1365
1366
1367 /* compute log-likelihood given external branch with length arc
1368    between partials partiali and sequence seqchi */
1369 double extlkl(double arc)
1370 {
1371         double sumlk;
1372         int r, s, i, j;
1373         dvector opb;
1374         dmatrix cdl;
1375
1376         cdl = Ctree->condlkl;
1377         for (r = 0; r < numcats; r++) {
1378                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1379         }
1380         for (r = 0; r < numcats; r++) {
1381                 for (s = 0; s < Numptrn; s++) {
1382                         opb = partiali[r][s];
1383                         sumlk = 0.0;
1384                         if ((j = seqchi[s]) != tpmradix) {
1385                                 for (i = 0; i < tpmradix; i++)
1386                                         sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1387                         } else {
1388                                 for (i = 0; i < tpmradix; i++)
1389                                         sumlk += Freqtpm[i] * opb[i];
1390                         }
1391                         cdl[r][s] = sumlk;
1392                 }
1393         }
1394
1395         /* compute total log-likelihood for current tree */
1396         Ctree->lklhd = comptotloglkl(cdl);
1397
1398         return -(Ctree->lklhd); /* we use a minimizing procedure */
1399 }
1400
1401 /* optimize external branch */
1402 void optexternalbranch(Node *op)
1403 {
1404         double arc, fx, f2x;
1405
1406         partiali = op->isop->partials;
1407         seqchi = op->kinp->eprob;
1408         arc = op->length; /* nonclocklike branch lengths */
1409         if (arc <= MINARC) arc = MINARC+1.0;
1410         if (arc >= MAXARC) arc = MAXARC-1.0;
1411         arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x);
1412         op->kinp->length = arc;
1413         op->length = arc;
1414
1415          /* variance of branch length */
1416         f2x = fabs(f2x);
1417         if (1.0/(MAXARC*MAXARC) < f2x)
1418                 op->varlen = 1.0/f2x;
1419         else
1420                 op->varlen = MAXARC*MAXARC;
1421 }
1422
1423
1424 /* finish likelihoods for each rate and site */
1425 void finishlkl(Node *op)
1426 {
1427         int r, k, i, j;
1428         double arc, sumlk, slk;
1429         dmatrix cdl;
1430
1431         partiali = op->isop->partials;
1432         partialj = op->kinp->isop->partials;
1433         cdl = Ctree->condlkl;
1434         arc = op->length; /* nonclocklike branch lengths */
1435         for (r = 0; r < numcats; r++) {
1436                 tprobmtrx(arc*Rates[r], ltprobr[r]);
1437         }
1438         for (r = 0; r < numcats; r++) {
1439                 for (k = 0; k < Numptrn; k++) {
1440                         sumlk = 0.0;
1441                         for (i = 0; i < tpmradix; i++) {
1442                                 slk = 0.0;
1443                                 for (j = 0; j < tpmradix; j++)
1444                                         slk += partialj[r][k][j] * ltprobr[r][i][j];
1445                                 sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1446                         }
1447                         cdl[r][k] = sumlk;
1448                 }
1449         }
1450 }
1451
1452
1453 /***************************** exported functions *****************************/
1454
1455
1456 /* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1457 double optlkl(Tree *tr)
1458 {
1459         Node *cp, *rp;
1460         int nconv;
1461         double lendiff;
1462
1463         clockmode = 0; /* nonclocklike branch lengths */
1464         nconv = 0;
1465         Converg = FALSE;
1466         initpartials(tr);
1467         for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1468
1469                 cp = rp = tr->rootp;
1470                 do {
1471                         cp = cp->isop->kinp;
1472                         productpartials(cp->kinp->isop);
1473                         if (cp->isop == NULL) { /* external node */
1474                                 cp = cp->kinp; /* not descen */
1475
1476                                 lendiff = cp->length;
1477                                 optexternalbranch(cp);
1478                                 lendiff = fabs(lendiff - cp->length);
1479                                 if (lendiff < EPSILON) nconv++;
1480                                 else nconv = 0;
1481
1482                                 partialsexternal(cp);
1483                         } else { /* internal node */
1484                                 if (cp->descen) {
1485                                         partialsinternal(cp);
1486                                 } else {
1487
1488                                         lendiff = cp->length;
1489                                         optinternalbranch(cp);
1490                                         lendiff = fabs(lendiff - cp->length);
1491                                         if (lendiff < EPSILON) nconv++;
1492                                         else nconv = 0;
1493
1494                                         /* eventually compute likelihoods for each site */
1495                                         if ((cp->number == Numibrnch-1 && lendiff < EPSILON) ||
1496                                         Numit == MAXIT-1) finishlkl(cp);
1497
1498                                         partialsinternal(cp);
1499                                 }
1500                         }
1501                         if (nconv >= Numbrnch) { /* convergence */
1502                                 Converg = TRUE;
1503                                 cp = rp; /* get out of here */
1504                         }
1505                 } while (cp != rp);
1506         }
1507
1508         /* compute total log-likelihood for current tree */
1509         return comptotloglkl(tr->condlkl);
1510 }
1511
1512
1513 /* compute likelihood of tree for given branch lengths */
1514 double treelkl(Tree *tr)
1515 {
1516         int i, k, r;
1517         Node *cp;
1518         dmatrix cdl;
1519         dcube prob1, prob2;
1520         double sumlk;
1521
1522         /* compute for each site and rate log-likelihoods */
1523         initpartials(tr);
1524         cp = tr->rootp;
1525         productpartials(cp->isop);
1526         prob1 = cp->partials;
1527         prob2 = cp->isop->partials;
1528         cdl = tr->condlkl;
1529         for (r = 0; r < numcats; r++) {
1530                 for (k = 0; k < Numptrn; k++) {
1531                         sumlk = 0.0;
1532                         for (i = 0; i < tpmradix; i++)
1533                                 sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1534                         cdl[r][k] = sumlk;
1535                 }
1536         }
1537
1538         /* return total log-likelihood for current tree */
1539         return comptotloglkl(cdl);
1540 }
1541
1542
1543 /******************************************************************************/
1544 /* least-squares estimate of branch lengths                                   */
1545 /******************************************************************************/
1546
1547
1548 /***************************** internal functions *****************************/
1549
1550
1551 void luequation(dmatrix amat, dvector yvec, int size)
1552 {
1553     double eps = 1.0e-20; /* ! */
1554         int i, j, k, l, maxi=0, idx;
1555         double sum, tmp, maxb, aw;
1556         dvector wk;
1557         ivector index;
1558
1559
1560         wk = new_dvector(size);
1561         index = new_ivector(size);
1562         aw = 1.0;
1563         for (i = 0; i < size; i++) {
1564                 maxb = 0.0;
1565                 for (j = 0; j < size; j++) {
1566                         if (fabs(amat[i][j]) > maxb)
1567                                 maxb = fabs(amat[i][j]);
1568                 }
1569                 if (maxb == 0.0) {
1570                         /* Singular matrix */
1571                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1572                         exit(1);
1573                 }
1574                 wk[i] = 1.0 / maxb;
1575         }
1576         for (j = 0; j < size; j++) {
1577                 for (i = 0; i < j; i++) {
1578                         sum = amat[i][j];
1579                         for (k = 0; k < i; k++)
1580                                 sum -= amat[i][k] * amat[k][j];
1581                         amat[i][j] = sum;
1582                 }
1583                 maxb = 0.0;
1584                 for (i = j; i < size; i++) {
1585                         sum = amat[i][j];
1586                         for (k = 0; k < j; k++)
1587                                 sum -= amat[i][k] * amat[k][j];
1588                         amat[i][j] = sum;
1589                         tmp = wk[i] * fabs(sum);
1590                         if (tmp >= maxb) {
1591                                 maxb = tmp;
1592                                 maxi = i;
1593                         }
1594                 }
1595                 if (j != maxi) {
1596                         for (k = 0; k < size; k++) {
1597                                 tmp = amat[maxi][k];
1598                                 amat[maxi][k] = amat[j][k];
1599                                 amat[j][k] = tmp;
1600                         }
1601                         aw = -aw;
1602                         wk[maxi] = wk[j];
1603                 }
1604                 index[j] = maxi;
1605                 if (amat[j][j] == 0.0)
1606                         amat[j][j] = eps;
1607                 if (j != size - 1) {
1608                         tmp = 1.0 / amat[j][j];
1609                         for (i = j + 1; i < size; i++)
1610                                 amat[i][j] *= tmp;
1611                 }
1612         }
1613         l = -1;
1614         for (i = 0; i < size; i++) {
1615                 idx = index[i];
1616                 sum = yvec[idx];
1617                 yvec[idx] = yvec[i];
1618                 if (l != -1) {
1619                         for (j = l; j < i; j++)
1620                                 sum -= amat[i][j] * yvec[j];
1621                 } else if (sum != 0.0)
1622                         l = i;
1623                 yvec[i] = sum;
1624         }
1625         for (i = size - 1; i >= 0; i--) {
1626                 sum = yvec[i];
1627                 for (j = i + 1; j < size; j++)
1628                         sum -= amat[i][j] * yvec[j];
1629                 yvec[i] = sum / amat[i][i];
1630         }
1631         free_ivector(index);
1632         free_dvector(wk);
1633 }
1634
1635
1636 /* least square estimation of branch lengths
1637    used for the approximate ML and as starting point
1638    in the calculation of the exact value of the ML */
1639 void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1640 {
1641         int i, i1, j, j1, j2, k, numbrnch, numpair;
1642         double sum, leng, alllen, rss;
1643         ivector pths;
1644         dmatrix atmt, atamt;
1645         Node **ebp, **ibp;
1646
1647         numbrnch = numspc + numibrnch;
1648         numpair = (numspc * (numspc - 1)) / 2;
1649         atmt = new_dmatrix(numbrnch, numpair);
1650         atamt = new_dmatrix(numbrnch, numbrnch);
1651         ebp = tr->ebrnchp;
1652         ibp = tr->ibrnchp;
1653         for (i = 0; i < numspc; i++) {
1654                 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1655                         if (j1 == i) {
1656                                 for (j2 = 0; j2 < j1; j2++, j++) {
1657                                         atmt[i][j] = 1.0;
1658                                 }
1659                         } else {
1660                                 for (j2 = 0; j2 < j1; j2++, j++) {
1661                                         if (j2 == i)
1662                                                 atmt[i][j] = 1.0;
1663                                         else
1664                                                 atmt[i][j] = 0.0;
1665                                 }
1666                         }
1667                 }
1668         }
1669         for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
1670                 pths = ibp[i1]->paths;
1671                 for (j1 = 1, j = 0; j1 < numspc; j1++) {
1672                         for (j2 = 0; j2 < j1; j2++, j++) {
1673                                 if (pths[j1] != pths[j2])
1674                                         atmt[i][j] = 1.0;
1675                                 else
1676                                         atmt[i][j] = 0.0;
1677                         }
1678                 }
1679         }
1680         for (i = 0; i < numbrnch; i++) {
1681                 for (j = 0; j <= i; j++) {
1682                         for (k = 0, sum = 0.0; k < numpair; k++)
1683                                 sum += atmt[i][k] * atmt[j][k];
1684                         atamt[i][j] = sum;
1685                         atamt[j][i] = sum;
1686                 }
1687         }
1688         for (i = 0; i < numbrnch; i++) {
1689                 for (k = 0, sum = 0.0; k < numpair; k++)
1690                         sum += atmt[i][k] * distanvec[k];
1691                 Brnlength[i] = sum;
1692         }
1693         luequation(atamt, Brnlength, numbrnch);
1694         for (i = 0, rss = 0.0; i < numpair; i++) {
1695                 sum = distanvec[i];
1696                 for (j = 0; j < numbrnch; j++) {
1697                         if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
1698                                 sum -= Brnlength[j];
1699                 }
1700                 rss += sum * sum;
1701         }
1702         tr->rssleast = sqrt(rss);
1703         alllen = 0.0;
1704         for (i = 0; i < numspc; i++) {
1705                 leng = Brnlength[i];
1706                 alllen += leng;
1707                 if (leng < MINARC) leng = MINARC;
1708                 if (leng > MAXARC) leng = MAXARC;
1709                 if (clockmode) { /* clock */
1710                         ebp[i]->lengthc = leng;
1711                         ebp[i]->kinp->lengthc = leng;
1712                 } else { /* no clock */
1713                         ebp[i]->length = leng;
1714                         ebp[i]->kinp->length = leng;
1715                 }
1716                 Brnlength[i] = leng;
1717         }
1718         for (i = 0, j = numspc; i < numibrnch; i++, j++) {
1719                 leng = Brnlength[j];
1720                 alllen += leng;
1721                 if (leng < MINARC) leng = MINARC;
1722                 if (leng > MAXARC) leng = MAXARC;
1723                 if (clockmode) { /* clock */
1724                         ibp[i]->lengthc = leng;
1725                         ibp[i]->kinp->lengthc = leng;
1726                 } else { /* no clock */
1727                         ibp[i]->length = leng;
1728                         ibp[i]->kinp->length = leng;
1729                 }
1730                 Brnlength[j] = leng;
1731         }
1732         free_dmatrix(atmt);
1733         free_dmatrix(atamt);
1734 }