5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
16 /* Modified by Christian Zmasek to:
19 !WARNING: Use ONLY together with FORESTER/RIO!
20 !For all other puposes download the excellent original!
22 last modification: 05/22/01
25 Node *internalnode(Tree *tr, char **chpp, int *ninode):
27 char ident[100], idcomp[11]; -> char ident[100], idcomp[27];
29 idcomp[10] = '\0'; -> idcomp[26] = '\0';
31 } while (!stop && (ff != 10)); -> } while (!stop && (ff != 26));
51 #ifndef PARALLEL /* because printf() runs significantly faster */
52 /* than fprintf(stdout) on an Apple McIntosh */
54 # define FPRINTF printf
57 # define FPRINTF fprintf
58 # define STDOUTFILE STDOUT,
61 /* prototypes for two functions of puzzle2.c */
62 void fputid10(FILE *, int);
63 int fputid(FILE *, int);
66 /******************************************************************************/
68 /******************************************************************************/
70 /* read user tree, drop all blanks, tabs, and newlines.
71 Drop edgelengths (after :) but keep internal
72 labels. Check whether all pairs of brackets match. */
73 void getusertree(FILE *itfp, cvector tr, int maxlen)
78 /* look for opening bracket */
84 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n");
87 if (ci == '[') comment = 1;
88 if ((ci == ']') && comment) {
92 } while (comment || ((char) ci != '('));
97 /* get next character (skip blanks, newlines, and tabs) */
101 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n");
104 if (ci == '[') comment = 1;
105 if ((ci == ']') && comment) {
109 } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t');
111 if ((char) ci == ':') { /* skip characters until a ,) appears */
115 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n");
118 if (ci == '[') comment = 1;
119 if ((ci == ']') && comment) {
123 } while (comment || ((char) ci != ',' && (char) ci != ')') );
126 if ((char) ci == '(') {
129 if ((char) ci == ')') {
136 } while (((char) ci != ';') && (n != maxlen-2));
139 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n");
144 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n");
153 Node *internalnode(Tree *tr, char **chpp, int *ninode)
156 int i, j, dvg, ff, stop, numc;
157 char ident[100], idcomp[27]; /* CZ 05/22/01 */
161 if (**chpp == '(') { /* process subgroup */
163 xp = internalnode(tr, chpp, ninode);
166 while (**chpp != ')') {
167 if (**chpp == '\0') {
168 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
172 /* insert edges around node */
173 np = internalnode(tr, chpp, ninode);
178 /* closing bracket reached */
182 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n");
186 if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */
187 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
191 rp = tr->ibrnchp[*ninode];
195 for (j = 0; j < Numspc; j++)
199 for (j = 0; j < Numspc; j++) {
200 if (xp->paths[j] == 1)
207 if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
208 if ((**chpp) == '\0') {
209 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
213 /* read internal label into rp->label (max. 20 characters) */
214 rp->label = new_cvector(21);
215 (rp->label)[0] = **chpp;
216 (rp->label)[1] = '\0';
217 for (numc = 1; numc < 20; numc++) {
219 if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
220 if ((**chpp) == '\0') {
221 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
224 (rp->label)[numc] = **chpp;
225 (rp->label)[numc+1] = '\0';
227 do { /* skip the rest of the internal label */
229 if ((**chpp) == '\0') {
230 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
233 } while (((**chpp) != ',' && (**chpp) != ')'));
237 } else { /* process species names */
238 /* read species name */
239 for (idp = ident; **chpp != ',' &&
240 **chpp != ')' && **chpp != '\0'; (*chpp)++) {
244 /* look for internal number */
245 idcomp[26] = '\0'; /* CZ 05/22/01 */
247 for (i = 0; i < Maxspc; i++) {
251 idcomp[ff] = Identif[i][ff];
253 if (idcomp[ff-1] == ' ') stop = TRUE;
254 } while (!stop && (ff != 26)); /* CZ 05/22/01 */
255 if (stop) idcomp[ff-1] = '\0';
257 if (!strcmp(ident, idcomp)) {
259 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence '");
260 FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident);
264 return tr->ebrnchp[i]->kinp;
268 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident);
271 return NULL; /* never returned but without some compilers complain */
274 /* make tree structure, the tree description may contain internal
275 labels but no edge lengths */
276 void constructtree(Tree *tr, cvector strtree)
285 usedtaxa = new_ivector(Maxspc);
286 for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE;
288 xp = internalnode(tr, &chp, &ninode);
291 while (*chp != ')') { /* look for closing bracket */
293 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
297 /* insert edges around node */
298 np = internalnode(tr, &chp, &ninode);
304 for (i = 0; i < Maxspc; i++)
305 if (usedtaxa[i] == FALSE) {
306 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n");
310 /* closing bracket reached */
312 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
317 Numbrnch = Numspc + ninode;
320 if (*chp == ';' || *chp == '\0') {
321 free_ivector(usedtaxa);
325 /* copy last internal label (max. 20 characters) */
326 xp->label = new_cvector(21);
327 (xp->label)[0] = *chp;
328 (xp->label)[1] = '\0';
329 for (numc = 1; numc < 20; numc++) {
331 if (*chp == ';' || *chp == '\0') {
332 free_ivector(usedtaxa);
335 (xp->label)[numc] = *chp;
336 (xp->label)[numc+1] = '\0';
339 free_ivector(usedtaxa);
344 /* remove possible basal bifurcation */
345 void removebasalbif(cvector strtree)
347 int n, c, brak, cutflag, h;
349 /* check how many OTUs on basal level */
354 if (strtree[n] == '(') brak++;
355 if (strtree[n] == ')') brak--;
357 if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */
360 } while (strtree[n] != '\0');
362 /* if only 1 OTU inside outer bracket stop now */
364 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n");
368 /* if only 2 OTUs inside outer bracket delete second pair of
369 brackets from the right to remove basal bifurcation */
375 cutflag = 0; /* not yet cutted */
378 if (strtree[n] == '(') brak++;
379 if (strtree[n] == ')') brak--;
381 if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */
382 if (brak == 1 && cutflag == 1) {
383 cutflag = 2; /* cutted */
384 /* leave out internal label */
387 } while (strtree[n+h] != ')' && strtree[n+h] != ',');
391 if (cutflag == 1) strtree[n] = strtree[n+1];
392 if (cutflag == 2) strtree[n-1] = strtree[n+h];
395 } while (strtree[n] != '\0');
400 void makeusertree(FILE *itfp)
404 strtree = new_cvector(23*Maxspc); /* for treefile */
405 getusertree(itfp, strtree, 23*Maxspc);
406 removebasalbif(strtree);
407 constructtree(Ctree, strtree);
408 free_cvector(strtree);
412 /******************************************************************************/
413 /* memory organisation for maximum likelihood tree */
414 /******************************************************************************/
416 /* initialise new tree */
417 Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint)
423 maxibrnch = maxspc - 3;
424 heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *));
425 if (heights == NULL) maerror("heights in new_tree");
426 tr = (Tree *) malloc(sizeof(Tree));
427 if (tr == NULL) maerror("tr in new_tree");
428 tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *));
429 if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree");
430 tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *));
431 if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree");
432 tr->condlkl = new_dmatrix(numcats, numptrn);
433 for (n = 0; n < maxspc; n++) {
434 dp = (Node *) malloc(sizeof(Node));
435 if (dp == NULL) maerror("dp in new_tree");
436 up = (Node *) malloc(sizeof(Node));
437 if (up == NULL) maerror("up in new_tree");
452 dp->paths = new_ivector(maxspc);
453 up->paths = dp->paths;
454 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
456 dp->eprob = seqconint[n];
459 up->partials = new_dcube(numcats, numptrn, tpmradix);
464 for (n = 0; n < maxibrnch; n++) {
465 dp = (Node *) malloc(sizeof(Node));
466 if (dp == NULL) maerror("dp in new_tree");
467 up = (Node *) malloc(sizeof(Node));
468 if (up == NULL) maerror("up in new_tree");
483 dp->paths = new_ivector(maxspc);
484 up->paths = dp->paths;
485 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
488 dp->partials = new_dcube(numcats, numptrn, tpmradix);
489 up->partials = new_dcube(numcats, numptrn, tpmradix);
497 * reserve memory for lengths of the tree branches
498 * and for the distance matrix as a vector
499 * (needed for LS estimation of tree branch lengths)
502 Brnlength = new_dvector(2 * maxspc - 3);
503 Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2);
509 /* initialise quartet tree */
510 Tree *new_quartet(int numptrn, cmatrix seqconint)
516 heights = (Node **) malloc((unsigned)2 * sizeof(Node *));
517 if (heights == NULL) maerror("heights in new_quartet");
518 /* reserve memory for tree */
519 tr = (Tree *) malloc(sizeof(Tree));
520 if (tr == NULL) maerror("tr in new_quartet");
521 tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *));
522 if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet");
523 tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *));
524 if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet");
525 tr->condlkl = new_dmatrix(numcats, numptrn);
526 /* reserve memory for nodes */
527 for (n = 0; n < 4; n++) {
528 dp = (Node *) malloc(sizeof(Node));
529 if (dp == NULL) maerror("dp in new_quartet");
530 up = (Node *) malloc(sizeof(Node));
531 if (up == NULL) maerror("dp in new_quartet");
545 dp->paths = new_ivector(4);
546 up->paths = dp->paths;
547 for (i = 0; i < 4; i++) dp->paths[i] = 0;
549 dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */
552 up->partials = new_dcube(numcats, numptrn, tpmradix);
556 /* reserve memory for internal branch */
557 dp = (Node *) malloc(sizeof(Node));
558 if (dp == NULL) maerror("dp in new_quartet");
559 up = (Node *) malloc(sizeof(Node));
560 if (up == NULL) maerror("dp in new_quartet");
561 dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */
562 up->isop = tr->ebrnchp[0]->kinp;
575 dp->paths = new_ivector(4);
576 up->paths = dp->paths;
583 dp->partials = new_dcube(numcats, numptrn, tpmradix);
584 up->partials = new_dcube(numcats, numptrn, tpmradix);
590 /* connect external branches */
591 tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp;
592 tr->ebrnchp[1]->kinp->isop = tr->rootp;
593 tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp;
594 tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp;
597 * reserve memory for lengths of the five branches
598 * of a quartet and for the six possible distances
599 * (needed for LS estimation of branch lengths)
601 Brnlength = new_dvector(NUMQBRNCH);
602 Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2);
608 /* free tree memory */
609 void free_tree(Tree *tr, int taxa)
615 free_dmatrix(tr->condlkl);
616 for (n = 0; n < taxa; n++) {
619 free_ivector(dp->paths);
620 free_dcube(up->partials);
625 for (n = 0; n < (taxa-3); n++) {
628 free_dcube(dp->partials);
629 free_dcube(up->partials);
630 free_ivector(dp->paths);
636 free_dvector(Brnlength); /* branch lengths (for LS estimation) */
637 free_dvector(Distanvec); /* distances (for LS estimation) */
641 /* make (a,b)-(c,d) quartet
647 species numbers range from 0 to Maxspc - 1 */
649 void make_quartet(int a, int b, int c, int d)
651 /* place sequences */
652 Ctree->ebrnchp[0]->eprob = Seqpat[a];
653 Ctree->ebrnchp[1]->eprob = Seqpat[b];
654 Ctree->ebrnchp[2]->eprob = Seqpat[c];
655 Ctree->ebrnchp[3]->eprob = Seqpat[d];
657 /* make distance vector */
658 Distanvec[0] = Distanmat[b][a];
659 Distanvec[1] = Distanmat[c][a];
660 Distanvec[2] = Distanmat[c][b];
661 Distanvec[3] = Distanmat[d][a];
662 Distanvec[4] = Distanmat[d][b];
663 Distanvec[5] = Distanmat[d][c];
666 /* write distance matrix as vector */
667 void changedistan(dmatrix distanmat, dvector distanvec, int numspc)
671 for (k = 0, i = 1; i < numspc; i++) {
672 for (j = 0; j < i; j++, k++)
673 distanvec[k] = distanmat[i][j];
678 /******************************************************************************/
679 /* computation of maximum likelihood tree */
680 /******************************************************************************/
683 /* compute the likelihood for (a,b)-(c,d) quartet */
684 double quartet_lklhd(int a, int b, int c, int d)
686 /* reserve memory for quartet if necessary */
687 if (mlmode != 1) { /* no quartet tree */
689 free_tree(Ctree, Numspc);
690 Ctree = new_quartet(Numptrn, Seqpat);
691 Numbrnch = NUMQBRNCH;
692 Numibrnch = NUMQIBRNCH;
697 /* make (a,b)-(c,d) quartet */
698 make_quartet(a,b,c,d);
700 clockmode = 0; /* nonclocklike branch lengths */
702 /* least square estimate for branch length */
703 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
705 /* compute likelihood */
706 Ctree->lklhd = optlkl(Ctree);
712 /* compute the approximate likelihood for (a,b)-(c,d) quartet */
713 double quartet_alklhd(int a, int b, int c, int d)
715 /* reserve memory for quartet if necessary */
716 if (mlmode != 1) { /* no quartet tree */
718 free_tree(Ctree, Numspc);
719 Ctree = new_quartet(Numptrn, Seqpat);
720 Numbrnch = NUMQBRNCH;
721 Numibrnch = NUMQIBRNCH;
726 /* make (a,b)-(c,d) quartet */
727 make_quartet(a,b,c,d);
729 clockmode = 0; /* nonclocklike branch lengths */
731 /* least square estimate for branch length */
732 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
734 /* compute likelihood */
735 Ctree->lklhd = treelkl(Ctree);
741 /* read usertree from file to memory */
742 void readusertree(FILE *ifp)
744 /* reserve memory for tree if necessary */
745 if (mlmode != 2) { /* no tree */
747 free_tree(Ctree, Numspc);
748 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
749 Numbrnch = 2*Maxspc-3;
750 Numibrnch = Maxspc-3;
760 /* compute the likelihood of a usertree */
761 double usertree_lklhd()
763 /* be sure to have a usertree in memory and
764 to have pairwise distances computed */
766 clockmode = 0; /* nonclocklike branch lengths */
768 /* least square estimate for branch length */
769 changedistan(Distanmat, Distanvec, Numspc);
770 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
772 /* compute likelihood */
773 Ctree->lklhd = optlkl(Ctree);
779 /* compute the approximate likelihood of a usertree */
780 double usertree_alklhd()
782 /* be sure to have a usertree in memory and
783 to have pairwise distances computed */
785 clockmode = 0; /* nonclocklike branch lengths */
787 /* least square estimate for branch length */
788 changedistan(Distanmat, Distanvec, Numspc);
789 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
791 /* compute likelihood */
792 Ctree->lklhd = treelkl(Ctree);
798 /* preparation for ML analysis */
801 /* number of states and code length */
802 tpmradix = gettpmradix();
804 /* declare variables */
805 Eval = new_dvector(tpmradix);
806 Evec = new_dmatrix(tpmradix,tpmradix);
807 Ievc = new_dmatrix(tpmradix,tpmradix);
808 iexp = new_dmatrix(tpmradix,tpmradix);
809 Alias = new_ivector(Maxsite);
811 /* process sequence information */
813 bestrate = new_ivector(Numptrn);
815 /* compute transition probability matrix */
818 /* non-zero rate categories */
819 Rates = new_dvector(numcats);
821 ltprobr = new_dcube(numcats, tpmradix,tpmradix);
823 /* compute distance matrix */
824 Distanmat = new_dmatrix(Maxspc, Maxspc);
827 /* initialize tree pointer for quartet tree */
829 Ctree = new_quartet(Numptrn, Seqpat);
830 Numbrnch = NUMQBRNCH;
831 Numibrnch = NUMQIBRNCH;
834 /* computing ML distances */
839 /* recompute ml distances for quartet only */
840 void distupdate(int a, int b, int c, int d)
842 /* update distance matrix */
843 /* consider only entries relevant to quartet */
844 Distanmat[a][b] = mldistance(a, b);
845 Distanmat[b][a] = Distanmat[a][b];
846 Distanmat[a][c] = mldistance(a, c);
847 Distanmat[c][a] = Distanmat[a][c];
848 Distanmat[a][d] = mldistance(a, d);
849 Distanmat[d][a] = Distanmat[a][d];
850 Distanmat[b][c] = mldistance(b, c);
851 Distanmat[c][b] = Distanmat[b][c];
852 Distanmat[b][d] = mldistance(b, d);
853 Distanmat[d][b] = Distanmat[b][d];
854 Distanmat[c][d] = mldistance(c, d);
855 Distanmat[d][c] = Distanmat[c][d];
859 /* cleanup after ML analysis */
863 free_tree(Ctree, Numspc);
864 free_ivector(bestrate);
866 free_cmatrix(Seqpat);
867 free_ivector(constpat);
868 free_ivector(Weight);
869 free_dmatrix(Distanmat);
879 /******************************************************************************/
881 /******************************************************************************/
889 void prbranch(Node *up, int depth, int m, int maxm,
890 ivector umbrella, ivector column, FILE *outfp)
892 int i, num, n, maxn, lim;
896 if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) {
897 column[depth] = MAXLENG;
900 column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3;
904 if (up->isop == NULL) { /* external branch */
905 num = up->number + 1; /* offset */
906 if (m == 1) umbrella[depth - 1] = TRUE;
907 for (i = 0; i < depth; i++) {
909 fprintf(outfp, "%*c", column[i], ':');
911 fprintf(outfp, "%*c", column[i], ' ');
914 umbrella[depth - 1] = FALSE;
915 for (i = 0, lim = column[depth] - 3; i < lim; i++)
917 fprintf(outfp, "-%d ", num);
919 fputid(outfp, up->number);
927 num = up->number + 1 + Numspc; /* offset, internal branch */
928 for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++)
930 for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) {
931 prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp);
932 if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE;
934 for (i = 0; i < depth; i++) {
936 fprintf(outfp, "%*c", column[i], ':');
938 fprintf(outfp, "%*c", column[i], ' ');
940 if (n == maxn / 2) { /* internal branch */
941 for (i = 0, lim = column[depth] - 3; i < lim; i++)
944 fprintf(outfp, "--%d", num);
946 fprintf(outfp, "-%2d", num);
948 fprintf(outfp, "%3d", num);
951 fprintf(outfp, "%*c", column[depth], ':');
953 fprintf(outfp, "%*c", column[depth], ' ');
958 if (m == maxm) umbrella[depth - 1] = FALSE;
964 void getproportion(double *proportion, dvector distanvec, int numspc)
969 maxpair = (numspc*(numspc-1))/2;
972 for (i = 0; i < maxpair; i++) {
973 if (distanvec[i] > maxdis) {
974 maxdis = distanvec[i];
977 *proportion = (double) MAXCOLUMN / (maxdis * 3.0);
978 if (*proportion > 1.0) *proportion = 1.0;
982 void prtopology(FILE *outfp)
989 getproportion(&Proportion, Distanvec, Numspc);
991 umbrella = new_ivector(Numspc);
992 column = new_ivector(Numspc);
994 for (n = 0; n < Numspc; n++) {
1002 /* original code: rp = Ctree->rootp */
1003 /* but we want to print the first group in the
1004 trichotomy as outgroup at the bottom! */
1005 rp = Ctree->rootp->isop;
1007 for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++)
1016 prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp);
1017 if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':');
1020 free_ivector(umbrella);
1021 free_ivector(column);
1025 /* print unrooted tree file with branch lengths */
1026 void fputphylogeny(FILE *fp)
1031 cp = rp = Ctree->rootp;
1035 cp = cp->isop->kinp;
1036 if (cp->isop == NULL) { /* external node */
1041 n += fputid(fp, cp->number);
1042 fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1045 } else { /* internal node */
1060 /* internal label */
1061 if (cp->kinp->label != NULL) {
1062 fprintf(fp, "%s", cp->kinp->label);
1063 n += strlen(cp->kinp->label);
1065 fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1069 if (!cp->descen && !cp->isop->descen && cp != rp) {
1070 putc(',', fp); /* not last subtree */
1075 /* internal label */
1076 if (cp->label != NULL)
1077 fprintf(fp, "%s", cp->label);
1082 void resulttree(FILE *outfp)
1084 int n, ne, closeflag;
1091 fprintf(outfp, "\n branch length nc/c");
1092 fprintf(outfp, " branch length nc/c (= non-clock/clock)\n");
1094 fprintf(outfp, "\n branch length S.E.");
1095 fprintf(outfp, " branch length S.E.\n");
1097 for (n = 0; n < Numspc; n++) {
1098 ep = Ctree->ebrnchp[n];
1100 fputid10(outfp, ne);
1102 fprintf(outfp, "%3d", ne + 1);
1103 blen = (clockmode ? ep->lengthc : ep->length);
1104 fprintf(outfp, "%9.5f", blen*0.01);
1105 if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1107 fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc));
1109 fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen));
1110 if (n < Numibrnch) {
1111 ip = Ctree->ibrnchp[n];
1112 fprintf(outfp, "%8d", n + 1 + Numspc);
1113 blen = (clockmode ? ip->lengthc : ip->length);
1114 fprintf(outfp, "%9.5f", blen*0.01);
1115 if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1117 fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc));
1119 fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen));
1122 if (n == Numspc - 3) {
1124 } else if (n == Numspc - 2) {
1127 fprintf(outfp, " No convergence after %d iterations!\n", Numitc);
1129 fprintf(outfp, " %d iterations until convergence\n", Numitc);
1132 fprintf(outfp, " No convergence after %d iterations!\n", Numit);
1134 fprintf(outfp, " %d iterations until convergence\n", Numit);
1136 } else if (n == Numspc - 1) {
1137 fprintf(outfp, " log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd));
1144 fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n");
1148 /******************************************************************************/
1149 /* Neighbor-joining tree */
1150 /******************************************************************************/
1153 /* compute NJ tree and write to file */
1154 void njtree(FILE *fp)
1156 /* reserve memory for tree if necessary */
1157 if (mlmode != 3) { /* no tree */
1159 free_tree(Ctree, Numspc);
1160 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
1161 Numbrnch = 2*Maxspc-3;
1162 Numibrnch = Maxspc-3;
1167 /* construct NJ tree from distance matrix */
1168 njdistantree(Ctree);
1174 /* construct NJ tree from distance matrix */
1175 void njdistantree(Tree *tr)
1177 int i, j, otui=0, otuj=0, otuk, nsp2, cinode, step, restsp, k;
1178 double dij, bix, bjx, bkx, sij, smax, dnsp2;
1181 Node **psotu, *cp, *ip, *jp, *kp;
1183 distan = new_dmatrix(Maxspc,Maxspc);
1184 for (i = 0; i < Maxspc; i++)
1185 for (j = 0; j < Maxspc; j++)
1186 distan[i][j] = Distanmat[i][j];
1191 r = new_dvector(Maxspc);
1193 psotu = (Node **) malloc((unsigned)Maxspc * sizeof(Node *));
1194 if (psotu == NULL) maerror("psotu in njdistantree");
1196 /* external branches are start OTUs */
1197 for (i = 0; i < Maxspc; i++)
1198 psotu[i] = tr->ebrnchp[i]->kinp;
1201 cinode = 0; /* counter for internal nodes */
1203 for (step = 0; restsp > 3; step++) { /* NJ clustering steps */
1205 for (i = 0; i < Maxspc; i++) {
1206 if (psotu[i] != NULL) {
1207 for (j = 0, r[i] = 0.0; j < Maxspc; j++)
1208 if (psotu[j] != NULL)
1209 r[i] += distan[i][j];
1214 for (i = 0; i < Maxspc-1; i++) {
1215 if (psotu[i] != NULL) {
1217 for (j = i+1; j < Maxspc; j++) {
1218 if (psotu[j] != NULL)
1220 sij = ( r[i] + r[j] ) * dnsp2 - distan[i][j];
1232 /* new pair: otui and otuj */
1234 dij = distan[otui][otuj];
1235 bix = (dij + r[otui]/nsp2 - r[otuj]/nsp2) * 0.5;
1238 cp = tr->ibrnchp[cinode];
1247 ip->kinp->length = ip->length;
1248 jp->kinp->length = jp->length;
1252 for (k = 0; k < Maxspc; k++)
1254 if (psotu[k] != NULL && k != otui && k != otuj)
1256 dij = (distan[otui][k] + distan[otuj][k] - distan[otui][otuj]) * 0.5;
1257 distan[otui][k] = dij;
1258 distan[k][otui] = dij;
1261 distan[otui][otui] = 0.0;
1273 otui = otuj = otuk = -1;
1274 for (i = 0; i < Maxspc; i++)
1276 if (psotu[i] != NULL) {
1277 if (otui == -1) otui = i;
1278 else if (otuj == -1) otuj = i;
1282 bix = (distan[otui][otuj] + distan[otui][otuk] - distan[otuj][otuk]) * 0.5;
1283 bjx = distan[otui][otuj] - bix;
1284 bkx = distan[otui][otuk] - bix;
1294 ip->kinp->length = ip->length;
1295 jp->kinp->length = jp->length;
1296 kp->kinp->length = kp->length;
1301 free_dmatrix(distan);
1302 free((Node *) psotu);
1305 /******************************************************************************/
1306 /* find best assignment of rate categories */
1307 /******************************************************************************/
1309 /* find best assignment of rate categories */
1310 void findbestratecombination()
1313 double bestvalue, fv2;
1317 cdl = Ctree->condlkl;
1318 catprob = new_dvector(numcats+1);
1319 fv2 = (1.0-fracinv)/(double) numcats;
1321 for (k = 0; k < Numptrn; k++) {
1323 if (constpat[k] == TRUE)
1324 catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]];
1327 /* non-zero-rates */
1328 for (u = 1; u < numcats+1; u++)
1329 catprob[u] = fv2*cdl[u-1][k];
1331 bestvalue = catprob[0];
1333 for (u = 1; u < numcats+1; u++)
1334 if (catprob[u] >= bestvalue) {
1335 bestvalue = catprob[u];
1339 free_dvector(catprob);
1343 /* print best assignment of rate categories */
1344 void printbestratecombination(FILE *fp)
1348 for (s = 0; s < Maxsite; s++) {
1350 fprintf(fp, "%2d", bestrate[k]);
1351 if ((s+1) % 30 == 0)
1353 else if ((s+1) % 10 == 0)
1361 /******************************************************************************/
1362 /* computation of clocklike branch lengths */
1363 /******************************************************************************/
1365 /* checks wether e is a valid edge specification */
1366 int checkedge(int e)
1368 /* there are Numspc external branches:
1370 there are Numibrnch internal branches:
1371 Numspc - Numspc+Numibrnch-1
1374 if (e < 0) return FALSE;
1375 if (e < Numspc+Numibrnch) return TRUE;
1379 /* print topology of subtree */
1380 void fputsubstree(FILE *fp, Node *ip)
1384 if (ip->isop == NULL) { /* terminal nodes */
1385 numtc += fputid(fp, ip->number);
1391 cp = cp->isop->kinp;
1392 if (cp->isop == NULL) { /* external node */
1393 numtc += fputid(fp, cp->number);
1394 fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1397 } else { /* internal node */
1398 if (cp->height > 0.0) {
1401 } else if (cp->height < 0.0) {
1408 /* internal label */
1409 if (cp->kinp->label != NULL) {
1410 fprintf(fp, "%s", cp->kinp->label);
1411 numtc += strlen(cp->kinp->label);
1417 fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1425 if (cp->height <= 0.0 && cp->isop->height <= 0.0 &&
1427 putc(',', fp); /* not last subtree */
1434 } while (cp->isop != ip);
1445 /* print rooted tree file */
1446 void fputrooted(FILE *fp, int e)
1450 /* to be called only after clocklike branch
1451 lengths have been computed */
1453 /* pointer to root branch */
1454 if (e < Numspc) rootbr = Ctree->ebrnchp[e];
1455 else rootbr = Ctree->ibrnchp[e - Numspc];
1459 fputsubstree(fp, rootbr);
1460 /* internal label */
1461 if (rootbr->label != NULL) {
1462 fprintf(fp, "%s", rootbr->label);
1463 numtc += strlen(rootbr->label);
1469 fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01);
1475 fputsubstree(fp, rootbr->kinp);
1476 /* internal label */
1477 if (rootbr->kinp->label != NULL) {
1478 fprintf(fp, "%s", rootbr->kinp->label);
1479 numtc += strlen(rootbr->kinp->label);
1485 fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01);
1488 /* finds heights in subtree */
1489 void findheights(Node *ip)
1493 if (ip->isop != NULL) { /* forget terminal nodes */
1497 /* initialise node */
1498 cp->height = 1.0; /* up */
1500 while (rp->isop != cp) {
1502 rp->height = -1.0; /* down */
1506 cp = cp->isop->kinp;
1507 if (cp->isop == NULL) { /* external node */
1509 } else { /* internal node */
1510 if (cp->height == 0.0) { /* node not yet visited */
1511 cp->height = 1.0; /* up */
1513 while (rp->isop != cp) {
1515 rp->height = -1.0; /* down */
1517 } else if (cp->kinp->height == 1.0) {
1518 /* cp->kinp is next height pointer */
1519 heights[Numhts] = cp->kinp;
1523 } while (cp->isop != ip);
1524 /* ip is last height pointer */
1525 heights[Numhts] = ip;
1531 /* initialise clocklike branch lengths (with root on edge e) */
1532 void initclock(int e)
1536 double sum, minh, aveh, len;
1538 /* be sure to have a Ctree in memory and
1539 to have pairwise distances computed */
1541 clockmode = 1; /* clocklike branch lengths */
1543 /* least square estimate for branch length */
1544 changedistan(Distanmat, Distanvec, Numspc);
1545 lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
1547 /* pointer to root branch */
1548 if (e < Numspc) rootbr = Ctree->ebrnchp[e];
1549 else rootbr = Ctree->ibrnchp[e - Numspc];
1551 /* clear all heights */
1552 for (n = 0; n < Numspc; n++) {
1553 Ctree->ebrnchp[n]->height = 0.0;
1554 Ctree->ebrnchp[n]->kinp->height = 0.0;
1555 Ctree->ebrnchp[n]->varheight = 0.0;
1556 Ctree->ebrnchp[n]->kinp->varheight = 0.0;
1557 if (n < Numibrnch) {
1558 Ctree->ibrnchp[n]->height = 0.0;
1559 Ctree->ibrnchp[n]->kinp->height = 0.0;
1560 Ctree->ibrnchp[n]->varheight = 0.0;
1561 Ctree->ibrnchp[n]->kinp->varheight = 0.0;
1565 /* collect pointers to height nodes */
1567 findheights(rootbr); /* one side */
1568 findheights(rootbr->kinp); /* other side */
1570 /* assign preliminary approximate heights and
1571 corresponding branch lengths */
1572 for (h = 0; h < Numhts; h++) {
1574 cp = rp = heights[h];
1578 while (rp->isop != cp) {
1581 sum += rp->lengthc + rp->kinp->height;
1582 if (rp->kinp->height > minh) minh = rp->kinp->height;
1584 aveh = sum / (double) count;
1585 if (aveh < minh + MINARC) aveh = minh + MINARC;
1588 while (rp->isop != cp) {
1590 len = aveh - rp->kinp->height;
1591 rp->kinp->lengthc = len;
1596 if (rootbr->height > rootbr->kinp->height) minh = rootbr->height;
1597 else minh = rootbr->kinp->height;
1598 aveh = 0.5*(rootbr->lengthc + rootbr->height + rootbr->kinp->height);
1599 if (aveh < minh + MINARC) aveh = minh + MINARC;
1601 maxhroot = RMHROOT*hroot; /* maximal possible hroot */
1602 len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1603 rootbr->lengthc = len;
1604 rootbr->kinp->lengthc = len;
1607 /* approximate likelihood under the constaining assumption of
1608 clocklike branch lengths (with root on edge e) */
1609 double clock_alklhd(int e)
1612 Ctree->lklhdc = treelkl(Ctree);
1614 return Ctree->lklhdc;
1617 /* log-likelihood given height ht at node pointed to by chep */
1618 double heightlkl(double ht)
1623 /* adjust branch lengths */
1625 /* descendent branches */
1627 while (rp->isop != chep) {
1629 len = chep->height - rp->kinp->height;
1630 rp->kinp->lengthc = len;
1634 if (chep == rootbr || chep->kinp == rootbr) {
1635 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1636 chep->lengthc = len;
1637 chep->kinp->lengthc = len;
1640 while (rp->isop->height <= 0.0)
1642 chep->lengthc = rp->isop->height - chep->height;
1643 chep->kinp->lengthc = rp->isop->height - chep->height;
1646 /* compute likelihood */
1647 Ctree->lklhdc = treelkl(Ctree);
1649 return -(Ctree->lklhdc); /* we use a minimizing procedure */
1652 /* optimize current height */
1653 void optheight(void)
1655 double he, fx, f2x, minh, maxh, len;
1658 /* current height */
1664 while (rp->isop != chep) {
1666 if (rp->kinp->height > minh)
1667 minh = rp->kinp->height;
1672 if (chep == rootbr || chep->kinp == rootbr) {
1676 while (rp->isop->height <= 0.0)
1678 maxh = rp->isop->height;
1682 /* check borders for height */
1683 if (he < minh) he = minh;
1684 if (he > maxh) he = maxh;
1687 if (!(he == minh && he == maxh))
1688 he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x);
1690 /* variance of height */
1692 if (1.0/(maxhroot*maxhroot) < f2x)
1693 chep->varheight = 1.0/f2x;
1695 chep->varheight = maxhroot*maxhroot;
1697 /* adjust branch lengths */
1699 /* descendent branches */
1701 while (rp->isop != chep) {
1703 len = chep->height - rp->kinp->height;
1704 rp->kinp->lengthc = len;
1708 if (chep == rootbr || chep->kinp == rootbr) {
1709 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1710 chep->lengthc = len;
1711 chep->kinp->lengthc = len;
1714 while (rp->isop->height <= 0.0)
1716 chep->lengthc = rp->isop->height - chep->height;
1717 chep->kinp->lengthc = rp->isop->height - chep->height;
1721 /* log-likelihood given height ht at root */
1722 double rheightlkl(double ht)
1726 /* adjust branch lengths */
1728 len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1729 rootbr->lengthc = len;
1730 rootbr->kinp->lengthc = len;
1732 /* compute likelihood */
1733 Ctree->lklhdc = treelkl(Ctree);
1735 return -(Ctree->lklhdc); /* we use a minimizing procedure */
1738 /* optimize height of root */
1739 void optrheight(void)
1741 double he, fx, f2x, minh, len;
1743 /* current height */
1747 if (rootbr->height > rootbr->kinp->height)
1748 minh = rootbr->height;
1750 minh = rootbr->kinp->height;
1753 /* check borders for height */
1754 if (he < minh) he = minh;
1755 if (he > maxhroot) he = maxhroot;
1758 he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x);
1760 /* variance of height of root */
1762 if (1.0/(maxhroot*maxhroot) < f2x)
1765 varhroot = maxhroot*maxhroot;
1767 /* adjust branch lengths */
1769 len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1770 rootbr->lengthc = len;
1771 rootbr->kinp->lengthc = len;
1774 /* exact likelihood under the constaining assumption of
1775 clocklike branch lengths (with root on edge e) */
1776 double clock_lklhd(int e)
1791 /* optimize height of root */
1794 if (fabs(old - hroot) < HEPSILON) nconv++;
1796 /* optimize height of nodes */
1797 for (h = Numhts-1; h >= 0; h--) {
1799 /* pointer chep to current height node */
1802 /* store old value */
1805 /* find better height */
1809 if (fabs(old - chep->height) < HEPSILON) nconv++;
1812 if (nconv == Numhts+1) Convergc = TRUE;
1814 } while (Numitc < MAXIT && !Convergc);
1816 /* compute final likelihood */
1817 Ctree->lklhdc = treelkl(Ctree);
1819 return Ctree->lklhdc;
1822 /* find out the edge containing the root */
1826 double logbest, logtest;
1828 /* compute the likelihood for all edges and take the edge with
1829 best likelihood (using approximate ML) */
1832 logbest = clock_alklhd(0);
1834 for (e = 1; e < Numspc+Numibrnch; e++) {
1835 logtest = clock_alklhd(e);
1836 if (logtest > logbest) {
1840 } else if (logtest == logbest) {
1848 /* show heights and corresponding standard errors */
1849 void resultheights(FILE *fp)
1854 fprintf(fp, " height S.E. of node common to branches\n");
1855 for (h = 0; h < Numhts; h++) {
1856 fprintf(fp, "%.5f %.5f ", (heights[h]->height)*0.01,
1857 sqrt(heights[h]->varheight)*0.01);
1860 num = (cp->number) + 1;
1861 if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */
1862 fprintf(fp, "%d ", num);
1864 } while (cp != heights[h]);
1868 fprintf(fp, "%.5f %.5f of root at branch %d\n",
1869 hroot*0.01, sqrt(varhroot)*0.01, locroot+1);