5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
28 #ifndef PARALLEL /* because printf() runs significantly faster */
29 /* than fprintf(stdout) on an Apple McIntosh */
31 # define FPRINTF printf
34 # define FPRINTF fprintf
35 # define STDOUTFILE STDOUT,
38 /* prototypes for two functions of puzzle2.c */
39 void fputid10(FILE *, int);
40 int fputid(FILE *, int);
43 /******************************************************************************/
45 /******************************************************************************/
47 /* read user tree, drop all blanks, tabs, and newlines.
48 Drop edgelengths (after :) but keep internal
49 labels. Check whether all pairs of brackets match. */
50 void getusertree(FILE *itfp, cvector tr, int maxlen)
55 /* look for opening bracket */
61 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n");
64 if (ci == '[') comment = 1;
65 if ((ci == ']') && comment) {
69 } while (comment || ((char) ci != '('));
74 /* get next character (skip blanks, newlines, and tabs) */
78 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n");
81 if (ci == '[') comment = 1;
82 if ((ci == ']') && comment) {
86 } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t');
88 if ((char) ci == ':') { /* skip characters until a ,) appears */
92 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n");
95 if (ci == '[') comment = 1;
96 if ((ci == ']') && comment) {
100 } while (comment || ((char) ci != ',' && (char) ci != ')') );
103 if ((char) ci == '(') {
106 if ((char) ci == ')') {
113 } while (((char) ci != ';') && (n != maxlen-2));
116 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n");
121 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n");
130 Node *internalnode(Tree *tr, char **chpp, int *ninode)
133 int i, j, dvg, ff, stop, numc;
134 char ident[100], idcomp[27]; /*CZ*/
138 if (**chpp == '(') { /* process subgroup */
140 xp = internalnode(tr, chpp, ninode);
143 while (**chpp != ')') {
144 if (**chpp == '\0') {
145 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
149 /* insert edges around node */
150 np = internalnode(tr, chpp, ninode);
155 /* closing bracket reached */
159 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n");
163 if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */
164 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
168 rp = tr->ibrnchp[*ninode];
172 for (j = 0; j < Numspc; j++)
176 for (j = 0; j < Numspc; j++) {
177 if (xp->paths[j] == 1)
184 if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
185 if ((**chpp) == '\0') {
186 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
190 /* read internal label into rp->label (max. 20 characters) */
191 rp->label = new_cvector(21);
192 (rp->label)[0] = **chpp;
193 (rp->label)[1] = '\0';
194 for (numc = 1; numc < 20; numc++) {
196 if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
197 if ((**chpp) == '\0') {
198 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
201 (rp->label)[numc] = **chpp;
202 (rp->label)[numc+1] = '\0';
204 do { /* skip the rest of the internal label */
206 if ((**chpp) == '\0') {
207 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
210 } while (((**chpp) != ',' && (**chpp) != ')'));
214 } else { /* process species names */
215 /* read species name */
216 for (idp = ident; **chpp != ',' &&
217 **chpp != ')' && **chpp != '\0'; (*chpp)++) {
221 /* look for internal number */
222 idcomp[26] = '\0'; /*CZ*/
224 for (i = 0; i < Maxspc; i++) {
228 idcomp[ff] = Identif[i][ff];
230 if (idcomp[ff-1] == ' ') stop = TRUE;
231 } while (!stop && (ff != 26)); /*CZ*/
232 if (stop) idcomp[ff-1] = '\0';
234 if (!strcmp(ident, idcomp)) {
236 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence '");
237 FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident);
241 return tr->ebrnchp[i]->kinp;
245 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident);
248 return NULL; /* never returned but without some compilers complain */
251 /* make tree structure, the tree description may contain internal
252 labels but no edge lengths */
253 void constructtree(Tree *tr, cvector strtree)
262 usedtaxa = new_ivector(Maxspc);
263 for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE;
265 xp = internalnode(tr, &chp, &ninode);
268 while (*chp != ')') { /* look for closing bracket */
270 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
274 /* insert edges around node */
275 np = internalnode(tr, &chp, &ninode);
281 for (i = 0; i < Maxspc; i++)
282 if (usedtaxa[i] == FALSE) {
283 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n");
287 /* closing bracket reached */
289 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
294 Numbrnch = Numspc + ninode;
297 if (*chp == ';' || *chp == '\0') {
298 free_ivector(usedtaxa);
302 /* copy last internal label (max. 20 characters) */
303 xp->label = new_cvector(21);
304 (xp->label)[0] = *chp;
305 (xp->label)[1] = '\0';
306 for (numc = 1; numc < 20; numc++) {
308 if (*chp == ';' || *chp == '\0') {
309 free_ivector(usedtaxa);
312 (xp->label)[numc] = *chp;
313 (xp->label)[numc+1] = '\0';
316 free_ivector(usedtaxa);
321 /* remove possible basal bifurcation */
322 void removebasalbif(cvector strtree)
324 int n, c, brak, cutflag, h;
326 /* check how many OTUs on basal level */
331 if (strtree[n] == '(') brak++;
332 if (strtree[n] == ')') brak--;
334 if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */
337 } while (strtree[n] != '\0');
339 /* if only 1 OTU inside outer bracket stop now */
341 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n");
345 /* if only 2 OTUs inside outer bracket delete second pair of
346 brackets from the right to remove basal bifurcation */
352 cutflag = 0; /* not yet cutted */
355 if (strtree[n] == '(') brak++;
356 if (strtree[n] == ')') brak--;
358 if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */
359 if (brak == 1 && cutflag == 1) {
360 cutflag = 2; /* cutted */
361 /* leave out internal label */
364 } while (strtree[n+h] != ')' && strtree[n+h] != ',');
368 if (cutflag == 1) strtree[n] = strtree[n+1];
369 if (cutflag == 2) strtree[n-1] = strtree[n+h];
372 } while (strtree[n] != '\0');
377 void makeusertree(FILE *itfp)
381 strtree = new_cvector(23*Maxspc); /* for treefile */
382 getusertree(itfp, strtree, 23*Maxspc);
383 removebasalbif(strtree);
384 constructtree(Ctree, strtree);
385 free_cvector(strtree);
389 /******************************************************************************/
390 /* memory organisation for maximum likelihood tree */
391 /******************************************************************************/
393 /* initialise new tree */
394 Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint)
400 maxibrnch = maxspc - 3;
401 heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *));
402 if (heights == NULL) maerror("heights in new_tree");
403 tr = (Tree *) malloc(sizeof(Tree));
404 if (tr == NULL) maerror("tr in new_tree");
405 tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *));
406 if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree");
407 tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *));
408 if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree");
409 tr->condlkl = new_dmatrix(numcats, numptrn);
410 for (n = 0; n < maxspc; n++) {
411 dp = (Node *) malloc(sizeof(Node));
412 if (dp == NULL) maerror("dp in new_tree");
413 up = (Node *) malloc(sizeof(Node));
414 if (up == NULL) maerror("up in new_tree");
429 dp->paths = new_ivector(maxspc);
430 up->paths = dp->paths;
431 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
433 dp->eprob = seqconint[n];
436 up->partials = new_dcube(numcats, numptrn, tpmradix);
441 for (n = 0; n < maxibrnch; n++) {
442 dp = (Node *) malloc(sizeof(Node));
443 if (dp == NULL) maerror("dp in new_tree");
444 up = (Node *) malloc(sizeof(Node));
445 if (up == NULL) maerror("up in new_tree");
460 dp->paths = new_ivector(maxspc);
461 up->paths = dp->paths;
462 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
465 dp->partials = new_dcube(numcats, numptrn, tpmradix);
466 up->partials = new_dcube(numcats, numptrn, tpmradix);
474 * reserve memory for lengths of the tree branches
475 * and for the distance matrix as a vector
476 * (needed for LS estimation of tree branch lengths)
479 Brnlength = new_dvector(2 * maxspc - 3);
480 Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2);
486 /* initialise quartet tree */
487 Tree *new_quartet(int numptrn, cmatrix seqconint)
493 heights = (Node **) malloc((unsigned)2 * sizeof(Node *));
494 if (heights == NULL) maerror("heights in new_quartet");
495 /* reserve memory for tree */
496 tr = (Tree *) malloc(sizeof(Tree));
497 if (tr == NULL) maerror("tr in new_quartet");
498 tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *));
499 if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet");
500 tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *));
501 if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet");
502 tr->condlkl = new_dmatrix(numcats, numptrn);
503 /* reserve memory for nodes */
504 for (n = 0; n < 4; n++) {
505 dp = (Node *) malloc(sizeof(Node));
506 if (dp == NULL) maerror("dp in new_quartet");
507 up = (Node *) malloc(sizeof(Node));
508 if (up == NULL) maerror("dp in new_quartet");
522 dp->paths = new_ivector(4);
523 up->paths = dp->paths;
524 for (i = 0; i < 4; i++) dp->paths[i] = 0;
526 dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */
529 up->partials = new_dcube(numcats, numptrn, tpmradix);
533 /* reserve memory for internal branch */
534 dp = (Node *) malloc(sizeof(Node));
535 if (dp == NULL) maerror("dp in new_quartet");
536 up = (Node *) malloc(sizeof(Node));
537 if (up == NULL) maerror("dp in new_quartet");
538 dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */
539 up->isop = tr->ebrnchp[0]->kinp;
552 dp->paths = new_ivector(4);
553 up->paths = dp->paths;
560 dp->partials = new_dcube(numcats, numptrn, tpmradix);
561 up->partials = new_dcube(numcats, numptrn, tpmradix);
567 /* connect external branches */
568 tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp;
569 tr->ebrnchp[1]->kinp->isop = tr->rootp;
570 tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp;
571 tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp;
574 * reserve memory for lengths of the five branches
575 * of a quartet and for the six possible distances
576 * (needed for LS estimation of branch lengths)
578 Brnlength = new_dvector(NUMQBRNCH);
579 Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2);
585 /* free tree memory */
586 void free_tree(Tree *tr, int taxa)
592 free_dmatrix(tr->condlkl);
593 for (n = 0; n < taxa; n++) {
596 free_ivector(dp->paths);
597 free_dcube(up->partials);
602 for (n = 0; n < (taxa-3); n++) {
605 free_dcube(dp->partials);
606 free_dcube(up->partials);
607 free_ivector(dp->paths);
613 free_dvector(Brnlength); /* branch lengths (for LS estimation) */
614 free_dvector(Distanvec); /* distances (for LS estimation) */
618 /* make (a,b)-(c,d) quartet
624 species numbers range from 0 to Maxspc - 1 */
626 void make_quartet(int a, int b, int c, int d)
628 /* place sequences */
629 /*Ctree->ebrnchp[0]->eprob = Seqpat[a];
630 Ctree->ebrnchp[1]->eprob = Seqpat[b];
631 Ctree->ebrnchp[2]->eprob = Seqpat[c];
632 Ctree->ebrnchp[3]->eprob = Seqpat[d];
634 /* make distance vector */
635 /*Distanvec[0] = Distanmat[b][a];
636 Distanvec[1] = Distanmat[c][a];
637 Distanvec[2] = Distanmat[c][b];
638 Distanvec[3] = Distanmat[d][a];
639 Distanvec[4] = Distanmat[d][b];
640 Distanvec[5] = Distanmat[d][c];
644 /* write distance matrix as vector */
645 void changedistan(dmatrix distanmat, dvector distanvec, int numspc)
649 for (k = 0, i = 1; i < numspc; i++) {
650 for (j = 0; j < i; j++, k++)
651 distanvec[k] = distanmat[i][j];
656 /******************************************************************************/
657 /* computation of maximum likelihood tree */
658 /******************************************************************************/
661 /* compute the likelihood for (a,b)-(c,d) quartet */
662 double quartet_lklhd(int a, int b, int c, int d)
664 /* reserve memory for quartet if necessary */
665 if (mlmode != 1) { /* no quartet tree */
667 free_tree(Ctree, Numspc);
668 Ctree = new_quartet(Numptrn, Seqpat);
669 Numbrnch = NUMQBRNCH;
670 Numibrnch = NUMQIBRNCH;
675 /* make (a,b)-(c,d) quartet */
676 make_quartet(a,b,c,d);
678 clockmode = 0; /* nonclocklike branch lengths */
680 /* least square estimate for branch length */
681 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
683 /* compute likelihood */
684 Ctree->lklhd = optlkl(Ctree);
690 /* compute the approximate likelihood for (a,b)-(c,d) quartet */
691 double quartet_alklhd(int a, int b, int c, int d)
693 /* reserve memory for quartet if necessary */
694 if (mlmode != 1) { /* no quartet tree */
696 free_tree(Ctree, Numspc);
697 Ctree = new_quartet(Numptrn, Seqpat);
698 Numbrnch = NUMQBRNCH;
699 Numibrnch = NUMQIBRNCH;
704 /* make (a,b)-(c,d) quartet */
705 make_quartet(a,b,c,d);
707 clockmode = 0; /* nonclocklike branch lengths */
709 /* least square estimate for branch length */
710 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
712 /* compute likelihood */
713 Ctree->lklhd = treelkl(Ctree);
719 /* read usertree from file to memory */
720 void readusertree(FILE *ifp)
722 /* reserve memory for tree if necessary */
723 if (mlmode != 2) { /* no tree */
725 free_tree(Ctree, Numspc);
726 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
727 Numbrnch = 2*Maxspc-3;
728 Numibrnch = Maxspc-3;
738 /* compute the likelihood of a usertree */
739 double usertree_lklhd()
748 /* compute the approximate likelihood of a usertree */
749 double usertree_alklhd()
757 /* preparation for ML analysis */
760 /* number of states and code length */
761 tpmradix = gettpmradix();
763 /* declare variables */
764 Eval = new_dvector(tpmradix);
765 Evec = new_dmatrix(tpmradix,tpmradix);
766 Ievc = new_dmatrix(tpmradix,tpmradix);
767 iexp = new_dmatrix(tpmradix,tpmradix);
768 Alias = new_ivector(Maxsite);
770 /* process sequence information */
772 bestrate = new_ivector(Numptrn);
774 /* compute transition probability matrix */
777 /* non-zero rate categories */
778 Rates = new_dvector(numcats);
780 ltprobr = new_dcube(numcats, tpmradix,tpmradix);
782 /* compute distance matrix */
783 Distanmat = new_dvector( Maxspc - 1 );
786 /* initialize tree pointer for quartet tree */
788 Ctree = new_quartet(Numptrn, Seqpat);
789 Numbrnch = NUMQBRNCH;
790 Numibrnch = NUMQIBRNCH;
793 /* computing ML distances */
798 /* recompute ml distances for quartet only */
799 void distupdate(int a, int b, int c, int d)
801 /* update distance matrix */
802 /* consider only entries relevant to quartet */
804 Distanmat[a][b] = mldistance(a, b);
805 Distanmat[b][a] = Distanmat[a][b];
806 Distanmat[a][c] = mldistance(a, c);
807 Distanmat[c][a] = Distanmat[a][c];
808 Distanmat[a][d] = mldistance(a, d);
809 Distanmat[d][a] = Distanmat[a][d];
810 Distanmat[b][c] = mldistance(b, c);
811 Distanmat[c][b] = Distanmat[b][c];
812 Distanmat[b][d] = mldistance(b, d);
813 Distanmat[d][b] = Distanmat[b][d];
814 Distanmat[c][d] = mldistance(c, d);
815 Distanmat[d][c] = Distanmat[c][d];
820 /* cleanup after ML analysis */
824 free_tree(Ctree, Numspc);
825 free_ivector(bestrate);
827 free_cmatrix(Seqpat);
828 free_ivector(constpat);
829 free_ivector(Weight);
830 free_dvector(Distanmat); /* CZ */
840 /******************************************************************************/
842 /******************************************************************************/
850 void prbranch(Node *up, int depth, int m, int maxm,
851 ivector umbrella, ivector column, FILE *outfp)
853 int i, num, n, maxn, lim;
857 if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) {
858 column[depth] = MAXLENG;
861 column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3;
865 if (up->isop == NULL) { /* external branch */
866 num = up->number + 1; /* offset */
867 if (m == 1) umbrella[depth - 1] = TRUE;
868 for (i = 0; i < depth; i++) {
870 fprintf(outfp, "%*c", column[i], ':');
872 fprintf(outfp, "%*c", column[i], ' ');
875 umbrella[depth - 1] = FALSE;
876 for (i = 0, lim = column[depth] - 3; i < lim; i++)
878 fprintf(outfp, "-%d ", num);
880 fputid(outfp, up->number);
888 num = up->number + 1 + Numspc; /* offset, internal branch */
889 for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++)
891 for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) {
892 prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp);
893 if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE;
895 for (i = 0; i < depth; i++) {
897 fprintf(outfp, "%*c", column[i], ':');
899 fprintf(outfp, "%*c", column[i], ' ');
901 if (n == maxn / 2) { /* internal branch */
902 for (i = 0, lim = column[depth] - 3; i < lim; i++)
905 fprintf(outfp, "--%d", num);
907 fprintf(outfp, "-%2d", num);
909 fprintf(outfp, "%3d", num);
912 fprintf(outfp, "%*c", column[depth], ':');
914 fprintf(outfp, "%*c", column[depth], ' ');
919 if (m == maxm) umbrella[depth - 1] = FALSE;
925 void getproportion(double *proportion, dvector distanvec, int numspc)
930 maxpair = (numspc*(numspc-1))/2;
933 for (i = 0; i < maxpair; i++) {
934 if (distanvec[i] > maxdis) {
935 maxdis = distanvec[i];
938 *proportion = (double) MAXCOLUMN / (maxdis * 3.0);
939 if (*proportion > 1.0) *proportion = 1.0;
943 void prtopology(FILE *outfp)
950 getproportion(&Proportion, Distanvec, Numspc);
952 umbrella = new_ivector(Numspc);
953 column = new_ivector(Numspc);
955 for (n = 0; n < Numspc; n++) {
963 /* original code: rp = Ctree->rootp */
964 /* but we want to print the first group in the
965 trichotomy as outgroup at the bottom! */
966 rp = Ctree->rootp->isop;
968 for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++)
977 prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp);
978 if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':');
981 free_ivector(umbrella);
982 free_ivector(column);
986 /* print unrooted tree file with branch lengths */
987 void fputphylogeny(FILE *fp)
992 cp = rp = Ctree->rootp;
997 if (cp->isop == NULL) { /* external node */
1002 n += fputid(fp, cp->number);
1003 fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1006 } else { /* internal node */
1021 /* internal label */
1022 if (cp->kinp->label != NULL) {
1023 fprintf(fp, "%s", cp->kinp->label);
1024 n += strlen(cp->kinp->label);
1026 fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1030 if (!cp->descen && !cp->isop->descen && cp != rp) {
1031 putc(',', fp); /* not last subtree */
1036 /* internal label */
1037 if (cp->label != NULL)
1038 fprintf(fp, "%s", cp->label);
1043 void resulttree(FILE *outfp)
1045 int n, ne, closeflag;
1052 fprintf(outfp, "\n branch length nc/c");
1053 fprintf(outfp, " branch length nc/c (= non-clock/clock)\n");
1055 fprintf(outfp, "\n branch length S.E.");
1056 fprintf(outfp, " branch length S.E.\n");
1058 for (n = 0; n < Numspc; n++) {
1059 ep = Ctree->ebrnchp[n];
1061 fputid10(outfp, ne);
1063 fprintf(outfp, "%3d", ne + 1);
1064 blen = (clockmode ? ep->lengthc : ep->length);
1065 fprintf(outfp, "%9.5f", blen*0.01);
1066 if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1068 fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc));
1070 fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen));
1071 if (n < Numibrnch) {
1072 ip = Ctree->ibrnchp[n];
1073 fprintf(outfp, "%8d", n + 1 + Numspc);
1074 blen = (clockmode ? ip->lengthc : ip->length);
1075 fprintf(outfp, "%9.5f", blen*0.01);
1076 if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1078 fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc));
1080 fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen));
1083 if (n == Numspc - 3) {
1085 } else if (n == Numspc - 2) {
1088 fprintf(outfp, " No convergence after %d iterations!\n", Numitc);
1090 fprintf(outfp, " %d iterations until convergence\n", Numitc);
1093 fprintf(outfp, " No convergence after %d iterations!\n", Numit);
1095 fprintf(outfp, " %d iterations until convergence\n", Numit);
1097 } else if (n == Numspc - 1) {
1098 fprintf(outfp, " log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd));
1105 fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n");
1109 /******************************************************************************/
1110 /* Neighbor-joining tree */
1111 /******************************************************************************/
1114 /* compute NJ tree and write to file */
1115 void njtree(FILE *fp)
1117 /* reserve memory for tree if necessary */
1118 if (mlmode != 3) { /* no tree */
1120 free_tree(Ctree, Numspc);
1121 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
1122 Numbrnch = 2*Maxspc-3;
1123 Numibrnch = Maxspc-3;
1128 /* construct NJ tree from distance matrix */
1129 njdistantree(Ctree);
1135 /* construct NJ tree from distance matrix */
1136 void njdistantree(Tree *tr)
1138 /* removed, CZ, 05/16/01 */
1141 /******************************************************************************/
1142 /* find best assignment of rate categories */
1143 /******************************************************************************/
1145 /* find best assignment of rate categories */
1146 void findbestratecombination()
1149 double bestvalue, fv2;
1153 cdl = Ctree->condlkl;
1154 catprob = new_dvector(numcats+1);
1155 fv2 = (1.0-fracinv)/(double) numcats;
1157 for (k = 0; k < Numptrn; k++) {
1159 if (constpat[k] == TRUE)
1160 catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]];
1163 /* non-zero-rates */
1164 for (u = 1; u < numcats+1; u++)
1165 catprob[u] = fv2*cdl[u-1][k];
1167 bestvalue = catprob[0];
1169 for (u = 1; u < numcats+1; u++)
1170 if (catprob[u] >= bestvalue) {
1171 bestvalue = catprob[u];
1175 free_dvector(catprob);
1179 /* print best assignment of rate categories */
1180 void printbestratecombination(FILE *fp)
1184 for (s = 0; s < Maxsite; s++) {
1186 fprintf(fp, "%2d", bestrate[k]);
1187 if ((s+1) % 30 == 0)
1189 else if ((s+1) % 10 == 0)
1197 /******************************************************************************/
1198 /* computation of clocklike branch lengths */
1199 /******************************************************************************/
1201 /* checks wether e is a valid edge specification */
1202 int checkedge(int e)
1204 /* there are Numspc external branches:
1206 there are Numibrnch internal branches:
1207 Numspc - Numspc+Numibrnch-1
1210 if (e < 0) return FALSE;
1211 if (e < Numspc+Numibrnch) return TRUE;
1215 /* print topology of subtree */
1216 void fputsubstree(FILE *fp, Node *ip)
1220 if (ip->isop == NULL) { /* terminal nodes */
1221 numtc += fputid(fp, ip->number);
1227 cp = cp->isop->kinp;
1228 if (cp->isop == NULL) { /* external node */
1229 numtc += fputid(fp, cp->number);
1230 fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1233 } else { /* internal node */
1234 if (cp->height > 0.0) {
1237 } else if (cp->height < 0.0) {
1244 /* internal label */
1245 if (cp->kinp->label != NULL) {
1246 fprintf(fp, "%s", cp->kinp->label);
1247 numtc += strlen(cp->kinp->label);
1253 fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1261 if (cp->height <= 0.0 && cp->isop->height <= 0.0 &&
1263 putc(',', fp); /* not last subtree */
1270 } while (cp->isop != ip);
1281 /* print rooted tree file */
1282 void fputrooted(FILE *fp, int e)
1286 /* to be called only after clocklike branch
1287 lengths have been computed */
1289 /* pointer to root branch */
1290 if (e < Numspc) rootbr = Ctree->ebrnchp[e];
1291 else rootbr = Ctree->ibrnchp[e - Numspc];
1295 fputsubstree(fp, rootbr);
1296 /* internal label */
1297 if (rootbr->label != NULL) {
1298 fprintf(fp, "%s", rootbr->label);
1299 numtc += strlen(rootbr->label);
1305 fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01);
1311 fputsubstree(fp, rootbr->kinp);
1312 /* internal label */
1313 if (rootbr->kinp->label != NULL) {
1314 fprintf(fp, "%s", rootbr->kinp->label);
1315 numtc += strlen(rootbr->kinp->label);
1321 fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01);
1324 /* finds heights in subtree */
1325 void findheights(Node *ip)
1329 if (ip->isop != NULL) { /* forget terminal nodes */
1333 /* initialise node */
1334 cp->height = 1.0; /* up */
1336 while (rp->isop != cp) {
1338 rp->height = -1.0; /* down */
1342 cp = cp->isop->kinp;
1343 if (cp->isop == NULL) { /* external node */
1345 } else { /* internal node */
1346 if (cp->height == 0.0) { /* node not yet visited */
1347 cp->height = 1.0; /* up */
1349 while (rp->isop != cp) {
1351 rp->height = -1.0; /* down */
1353 } else if (cp->kinp->height == 1.0) {
1354 /* cp->kinp is next height pointer */
1355 heights[Numhts] = cp->kinp;
1359 } while (cp->isop != ip);
1360 /* ip is last height pointer */
1361 heights[Numhts] = ip;
1367 /* initialise clocklike branch lengths (with root on edge e) */
1368 void initclock(int e)
1373 /* approximate likelihood under the constaining assumption of
1374 clocklike branch lengths (with root on edge e) */
1375 double clock_alklhd(int e)
1378 Ctree->lklhdc = treelkl(Ctree);
1380 return Ctree->lklhdc;
1383 /* log-likelihood given height ht at node pointed to by chep */
1384 double heightlkl(double ht)
1389 /* adjust branch lengths */
1391 /* descendent branches */
1393 while (rp->isop != chep) {
1395 len = chep->height - rp->kinp->height;
1396 rp->kinp->lengthc = len;
1400 if (chep == rootbr || chep->kinp == rootbr) {
1401 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1402 chep->lengthc = len;
1403 chep->kinp->lengthc = len;
1406 while (rp->isop->height <= 0.0)
1408 chep->lengthc = rp->isop->height - chep->height;
1409 chep->kinp->lengthc = rp->isop->height - chep->height;
1412 /* compute likelihood */
1413 Ctree->lklhdc = treelkl(Ctree);
1415 return -(Ctree->lklhdc); /* we use a minimizing procedure */
1418 /* optimize current height */
1419 void optheight(void)
1421 double he, fx, f2x, minh, maxh, len;
1424 /* current height */
1430 while (rp->isop != chep) {
1432 if (rp->kinp->height > minh)
1433 minh = rp->kinp->height;
1438 if (chep == rootbr || chep->kinp == rootbr) {
1442 while (rp->isop->height <= 0.0)
1444 maxh = rp->isop->height;
1448 /* check borders for height */
1449 if (he < minh) he = minh;
1450 if (he > maxh) he = maxh;
1453 if (!(he == minh && he == maxh))
1454 he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x);
1456 /* variance of height */
1458 if (1.0/(maxhroot*maxhroot) < f2x)
1459 chep->varheight = 1.0/f2x;
1461 chep->varheight = maxhroot*maxhroot;
1463 /* adjust branch lengths */
1465 /* descendent branches */
1467 while (rp->isop != chep) {
1469 len = chep->height - rp->kinp->height;
1470 rp->kinp->lengthc = len;
1474 if (chep == rootbr || chep->kinp == rootbr) {
1475 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1476 chep->lengthc = len;
1477 chep->kinp->lengthc = len;
1480 while (rp->isop->height <= 0.0)
1482 chep->lengthc = rp->isop->height - chep->height;
1483 chep->kinp->lengthc = rp->isop->height - chep->height;
1487 /* log-likelihood given height ht at root */
1488 double rheightlkl(double ht)
1492 /* adjust branch lengths */
1494 len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1495 rootbr->lengthc = len;
1496 rootbr->kinp->lengthc = len;
1498 /* compute likelihood */
1499 Ctree->lklhdc = treelkl(Ctree);
1501 return -(Ctree->lklhdc); /* we use a minimizing procedure */
1504 /* optimize height of root */
1505 void optrheight(void)
1507 double he, fx, f2x, minh, len;
1509 /* current height */
1513 if (rootbr->height > rootbr->kinp->height)
1514 minh = rootbr->height;
1516 minh = rootbr->kinp->height;
1519 /* check borders for height */
1520 if (he < minh) he = minh;
1521 if (he > maxhroot) he = maxhroot;
1524 he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x);
1526 /* variance of height of root */
1528 if (1.0/(maxhroot*maxhroot) < f2x)
1531 varhroot = maxhroot*maxhroot;
1533 /* adjust branch lengths */
1535 len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1536 rootbr->lengthc = len;
1537 rootbr->kinp->lengthc = len;
1540 /* exact likelihood under the constaining assumption of
1541 clocklike branch lengths (with root on edge e) */
1542 double clock_lklhd(int e)
1557 /* optimize height of root */
1560 if (fabs(old - hroot) < HEPSILON) nconv++;
1562 /* optimize height of nodes */
1563 for (h = Numhts-1; h >= 0; h--) {
1565 /* pointer chep to current height node */
1568 /* store old value */
1571 /* find better height */
1575 if (fabs(old - chep->height) < HEPSILON) nconv++;
1578 if (nconv == Numhts+1) Convergc = TRUE;
1580 } while (Numitc < MAXIT && !Convergc);
1582 /* compute final likelihood */
1583 Ctree->lklhdc = treelkl(Ctree);
1585 return Ctree->lklhdc;
1588 /* find out the edge containing the root */
1592 double logbest, logtest;
1594 /* compute the likelihood for all edges and take the edge with
1595 best likelihood (using approximate ML) */
1598 logbest = clock_alklhd(0);
1600 for (e = 1; e < Numspc+Numibrnch; e++) {
1601 logtest = clock_alklhd(e);
1602 if (logtest > logbest) {
1606 } else if (logtest == logbest) {
1614 /* show heights and corresponding standard errors */
1615 void resultheights(FILE *fp)
1620 fprintf(fp, " height S.E. of node common to branches\n");
1621 for (h = 0; h < Numhts; h++) {
1622 fprintf(fp, "%.5f %.5f ", (heights[h]->height)*0.01,
1623 sqrt(heights[h]->varheight)*0.01);
1626 num = (cp->number) + 1;
1627 if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */
1628 fprintf(fp, "%d ", num);
1630 } while (cp != heights[h]);
1634 fprintf(fp, "%.5f %.5f of root at branch %d\n",
1635 hroot*0.01, sqrt(varhroot)*0.01, locroot+1);