initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_mod / src / ml2.c
1 /*
2  * ml2.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 /* Modified by Christian Zmasek to:
17    - Names of 26 chars.
18
19    !WARNING: Use ONLY together with FORESTER/RIO!
20    !For all other puposes download the excellent original!
21    
22    last modification: 05/22/01
23
24
25    Node *internalnode(Tree *tr, char **chpp, int *ninode):
26
27    char ident[100], idcomp[11];           -> char ident[100], idcomp[27];
28
29    idcomp[10] = '\0';                     -> idcomp[26] = '\0';
30
31    } while (!stop && (ff != 10));         -> } while (!stop && (ff != 26)); 
32
33
34
35 */
36
37
38
39 #define EXTERN extern
40
41 /* prototypes */
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <math.h>
45 #include <ctype.h>
46 #include <string.h>
47 #include "util.h"
48 #include "ml.h"
49
50 #define STDOUT     stdout
51 #ifndef PARALLEL             /* because printf() runs significantly faster */
52                              /* than fprintf(stdout) on an Apple McIntosh  */
53                              /* (HS) */
54 #       define FPRINTF    printf
55 #       define STDOUTFILE
56 #else
57 #       define FPRINTF    fprintf
58 #       define STDOUTFILE STDOUT,
59 #endif
60
61 /* prototypes for two functions of puzzle2.c */
62 void fputid10(FILE *, int);
63 int fputid(FILE *, int);
64
65
66 /******************************************************************************/
67 /* user tree input                                                            */
68 /******************************************************************************/
69
70 /* read user tree, drop all blanks, tabs, and newlines.
71    Drop edgelengths (after :) but keep internal
72    labels. Check whether all pairs of brackets match. */
73 void getusertree(FILE *itfp, cvector tr, int maxlen)
74 {
75         int n, brac, ci;
76         int comment = 0;
77
78         /* look for opening bracket */
79         n = 0;
80         brac = 0;
81         do {
82                 ci = fgetc(itfp);
83                 if (ci == EOF) {
84                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n");
85                         exit(1);
86                 }
87                 if (ci == '[') comment = 1;
88                 if ((ci == ']') && comment) {
89                         comment = 0;
90                         ci = fgetc(itfp);
91                 }
92         } while (comment || ((char) ci != '('));
93         tr[n] = (char) ci;
94         brac++;
95         
96         do {
97                 /* get next character (skip blanks, newlines, and tabs) */
98                 do {
99                         ci = fgetc(itfp);
100                         if (ci == EOF) {
101                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n");
102                                 exit(1);
103                         }
104                         if (ci == '[') comment = 1;
105                         if ((ci == ']') && comment) {
106                                 comment = 0;
107                                 ci = fgetc(itfp);
108                         }
109                 } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t');
110         
111                 if ((char) ci == ':') { /* skip characters until a ,) appears  */
112                         do {
113                                 ci = fgetc(itfp);
114                                 if (ci == EOF) {
115                                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n");
116                                         exit(1);
117                                 }
118                                 if (ci == '[') comment = 1;
119                                 if ((ci == ']') && comment) {
120                                         comment = 0;
121                                         ci = fgetc(itfp);
122                                 }
123                         } while (comment || ((char) ci != ',' && (char) ci != ')') );
124                 }
125                 
126                 if ((char) ci == '(') {
127                         brac++;
128                 }
129                 if ((char) ci == ')') {
130                         brac--;
131                 }
132
133                 n++;
134                 tr[n] = (char) ci;
135         
136         } while (((char) ci != ';') && (n != maxlen-2));
137         
138         if (n == maxlen-2) {
139                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n");
140                 exit(1);
141         }
142         
143         if (brac != 0) {
144                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n");
145                 exit(1);
146         }
147         
148         n++;
149         tr[n] = '\0';
150 }
151
152
153 Node *internalnode(Tree *tr, char **chpp, int *ninode)
154 {
155         Node *xp, *np, *rp;
156         int i, j, dvg, ff, stop, numc;
157         char ident[100], idcomp[27];  /* CZ 05/22/01 */
158         char *idp;
159
160         (*chpp)++;
161         if (**chpp == '(') { /* process subgroup */
162                 
163                 xp = internalnode(tr, chpp, ninode);
164                 xp->isop = xp;
165                 dvg = 1;
166                 while (**chpp != ')') {
167                         if (**chpp == '\0') {
168                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
169                                 exit(1);
170                         }
171                         dvg++;
172                         /* insert edges around node */
173                         np = internalnode(tr, chpp, ninode);
174                         np->isop = xp->isop;
175                         xp->isop = np; 
176                         xp = np;
177                 }
178                 /* closing bracket reached */
179                 
180                 (*chpp)++;
181                 if (dvg < 2) {
182                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n");
183                         exit(1);
184                 }
185                 
186                 if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */
187                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
188                         exit(1);
189                 }
190
191                 rp = tr->ibrnchp[*ninode];
192                 rp->isop = xp->isop;
193                 xp->isop = rp;
194
195                 for (j = 0; j < Numspc; j++)
196                         rp->paths[j] = 0;
197                 xp = rp->isop;
198                 while (xp != rp) {
199                         for (j = 0; j < Numspc; j++) {
200                                 if (xp->paths[j] == 1)
201                                         rp->paths[j] = 1;
202                         }
203                         xp = xp->isop;
204                 }
205                 (*ninode)++;
206
207                 if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
208                 if ((**chpp) == '\0')  {
209                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
210                         exit(1);
211                 }
212
213                 /* read internal label into rp->label (max. 20 characters) */
214                 rp->label = new_cvector(21);
215                 (rp->label)[0] = **chpp;
216                 (rp->label)[1] = '\0';
217                 for (numc = 1; numc < 20; numc++) {
218                         (*chpp)++;
219                         if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
220                         if ((**chpp) == '\0')  {
221                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
222                                 exit(1);
223                         }
224                         (rp->label)[numc] = **chpp;
225                         (rp->label)[numc+1] = '\0';
226                 }       
227                 do { /* skip the rest of the internal label */
228                         (*chpp)++;
229                         if ((**chpp) == '\0')  {
230                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
231                                 exit(1);
232                         }
233                 } while (((**chpp) != ',' && (**chpp) != ')'));
234                         
235                 return rp->kinp;
236                 
237         } else { /* process species names */
238                 /* read species name */
239                 for (idp = ident; **chpp != ',' &&
240                         **chpp != ')' && **chpp != '\0'; (*chpp)++) {
241                         *idp++ = **chpp;        
242                 }
243                 *idp = '\0';
244                 /* look for internal number */
245                 idcomp[26] = '\0'; /* CZ 05/22/01 */
246                 
247                 for (i = 0; i < Maxspc; i++) {
248                         ff = 0;
249                         stop = FALSE;
250                         do {
251                                 idcomp[ff] = Identif[i][ff];
252                                 ff++;
253                                 if (idcomp[ff-1] == ' ') stop = TRUE;
254                         } while (!stop && (ff != 26)); /* CZ 05/22/01 */
255                         if (stop) idcomp[ff-1] = '\0';
256                         
257                         if (!strcmp(ident, idcomp)) {
258                                 if (usedtaxa[i]) {
259                                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence '");
260                                         FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident);
261                                         exit(1);
262                                 }
263                                 usedtaxa[i] = TRUE;
264                                 return tr->ebrnchp[i]->kinp;
265                         }
266                 }
267                 
268                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident);
269                 exit(1);
270         }
271         return NULL; /* never returned but without some compilers complain */
272 }
273
274 /* make tree structure, the tree description may contain internal
275     labels but no edge lengths */
276 void constructtree(Tree *tr, cvector strtree)
277 {
278         char *chp;
279         int ninode, i;
280         int dvg, numc;
281         Node *xp, *np;
282
283         ninode = 0;
284         chp = strtree;
285         usedtaxa = new_ivector(Maxspc);
286         for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE;
287
288         xp = internalnode(tr, &chp, &ninode);
289         xp->isop = xp;
290         dvg = 1;
291         while (*chp != ')') { /* look for closing bracket */
292                 if (*chp == '\0') {
293                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
294                         exit(1);
295                 }
296                 dvg++;
297                 /* insert edges around node */
298                 np = internalnode(tr, &chp, &ninode);
299                 np->isop = xp->isop;
300                 xp->isop = np; 
301                 xp = np;
302         }
303         
304         for (i = 0; i < Maxspc; i++)
305                 if (usedtaxa[i] == FALSE) {
306                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n");
307                         exit(1);        
308                 }
309         
310         /* closing bracket reached */
311         if (dvg < 3) {
312                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
313                 exit(1);
314         }
315         tr->rootp = xp;
316         Numibrnch = ninode;
317         Numbrnch = Numspc + ninode;
318
319         chp++;
320         if (*chp == ';' || *chp == '\0') {
321                 free_ivector(usedtaxa);
322                 return;
323         }
324
325         /* copy last internal label (max. 20 characters) */
326         xp->label = new_cvector(21);
327         (xp->label)[0] = *chp;
328         (xp->label)[1] = '\0';          
329         for (numc = 1; numc < 20; numc++) {
330                 chp++;
331                 if (*chp == ';' || *chp == '\0') {
332                         free_ivector(usedtaxa);
333                         return;
334                 } else {
335                         (xp->label)[numc] = *chp;
336                         (xp->label)[numc+1] = '\0';
337                 }
338         }
339         free_ivector(usedtaxa); 
340         return;
341 }
342
343
344 /* remove possible basal bifurcation */
345 void removebasalbif(cvector strtree)
346 {
347         int n, c, brak, cutflag, h;
348         
349         /* check how many OTUs on basal level */
350         n = 0;
351         c = 0;
352         brak = 0;
353         do {
354                 if (strtree[n] == '(') brak++;
355                 if (strtree[n] == ')') brak--;
356                 
357                 if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */
358                 
359                 n++;
360         } while (strtree[n] != '\0');
361         
362         /* if only 1 OTU inside outer bracket stop now */
363         if (c == 0) {
364                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n");
365                 exit(1);
366         }       
367         
368         /* if only 2 OTUs inside outer bracket delete second pair of
369            brackets from the right to remove basal bifurcation */
370
371         if (c == 1) {
372                         
373                 n = 0;
374                 brak = 0;
375                 cutflag = 0; /* not yet cutted */
376                 h = 0;
377                 do {
378                         if (strtree[n] == '(') brak++;
379                         if (strtree[n] == ')') brak--;
380                         
381                         if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */
382                         if (brak == 1 && cutflag == 1) {
383                                 cutflag = 2; /* cutted */
384                                 /* leave out internal label */
385                                 do {
386                                         h++;    
387                                 } while (strtree[n+h] != ')' && strtree[n+h] != ',');
388
389                         }
390                         
391                         if (cutflag == 1) strtree[n] = strtree[n+1];
392                         if (cutflag == 2) strtree[n-1] = strtree[n+h];
393                 
394                         n++;
395                 } while (strtree[n] != '\0');
396         }
397 }
398
399
400 void makeusertree(FILE *itfp)
401 {
402         cvector strtree;
403         
404         strtree = new_cvector(23*Maxspc); /* for treefile */ 
405         getusertree(itfp, strtree, 23*Maxspc);
406         removebasalbif(strtree);
407         constructtree(Ctree, strtree);
408         free_cvector(strtree);
409 }
410
411
412 /******************************************************************************/
413 /* memory organisation for maximum likelihood tree                            */
414 /******************************************************************************/
415
416 /* initialise new tree */
417 Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint)
418 {
419         int n, i, maxibrnch;
420         Tree *tr;
421         Node *dp, *up;
422         
423         maxibrnch = maxspc - 3;
424         heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *));
425         if (heights == NULL) maerror("heights in new_tree");
426         tr = (Tree *) malloc(sizeof(Tree));
427         if (tr == NULL) maerror("tr in new_tree");
428         tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *));
429         if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree");
430         tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *));
431         if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree");
432         tr->condlkl = new_dmatrix(numcats, numptrn);    
433         for (n = 0; n < maxspc; n++) {
434                 dp = (Node *) malloc(sizeof(Node));
435                 if (dp == NULL) maerror("dp in new_tree");
436                 up = (Node *) malloc(sizeof(Node));
437                 if (up == NULL) maerror("up in new_tree");
438                 dp->isop = NULL;
439                 up->isop = NULL;
440                 dp->kinp = up;
441                 up->kinp = dp;
442                 dp->descen = TRUE;
443                 up->descen = FALSE;
444                 dp->number = n;
445                 up->number = n;
446                 dp->length = 0.0;
447                 up->length = 0.0;
448                 dp->lengthc = 0.0;
449                 up->lengthc = 0.0;
450                 dp->varlen = 0.0;
451                 up->varlen = 0.0;
452                 dp->paths = new_ivector(maxspc);
453                 up->paths = dp->paths;
454                 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
455                 dp->paths[n] = 1;
456                 dp->eprob = seqconint[n];
457                 up->eprob = NULL;
458                 dp->partials = NULL;
459                 up->partials = new_dcube(numcats, numptrn, tpmradix);
460                 tr->ebrnchp[n] = dp;
461                 up->label = NULL;
462                 dp->label = NULL;
463         }
464         for (n = 0; n < maxibrnch; n++) {
465                 dp = (Node *) malloc(sizeof(Node));
466                 if (dp == NULL) maerror("dp in new_tree");
467                 up = (Node *) malloc(sizeof(Node));
468                 if (up == NULL) maerror("up in new_tree");
469                 dp->isop = NULL;
470                 up->isop = NULL;
471                 dp->kinp = up;
472                 up->kinp = dp;
473                 dp->descen = TRUE;
474                 up->descen = FALSE;
475                 dp->number = n;
476                 up->number = n;
477                 dp->length = 0.0;
478                 up->length = 0.0;
479                 dp->lengthc = 0.0;
480                 up->lengthc = 0.0;
481                 dp->varlen = 0.0;
482                 up->varlen = 0.0;
483                 dp->paths = new_ivector(maxspc);
484                 up->paths = dp->paths;
485                 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
486                 dp->eprob = NULL;
487                 up->eprob = NULL;
488                 dp->partials = new_dcube(numcats, numptrn, tpmradix);
489                 up->partials = new_dcube(numcats, numptrn, tpmradix);
490                 tr->ibrnchp[n] = dp;
491                 up->label = NULL;
492                 dp->label = NULL;
493         }
494         tr->rootp = NULL;
495         
496         /*
497          * reserve memory for lengths of the tree branches
498          * and for the distance matrix as a vector
499          * (needed for LS estimation of tree branch lengths)
500          */ 
501          
502         Brnlength = new_dvector(2 * maxspc - 3); 
503         Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2);
504
505         return tr;
506 }
507
508
509 /* initialise quartet tree */
510 Tree *new_quartet(int numptrn, cmatrix seqconint)
511 {
512         int n, i;
513         Tree *tr;
514         Node *dp, *up;
515
516         heights = (Node **) malloc((unsigned)2 * sizeof(Node *));
517         if (heights == NULL) maerror("heights in new_quartet");
518         /* reserve memory for tree */
519         tr = (Tree *) malloc(sizeof(Tree));
520         if (tr == NULL) maerror("tr in new_quartet");
521         tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *));
522         if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet");
523         tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *));
524         if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet");
525         tr->condlkl = new_dmatrix(numcats, numptrn);
526         /* reserve memory for nodes */
527         for (n = 0; n < 4; n++) {
528                 dp = (Node *) malloc(sizeof(Node));
529                 if (dp == NULL) maerror("dp in new_quartet");
530                 up = (Node *) malloc(sizeof(Node));
531                 if (up == NULL) maerror("dp in new_quartet");
532                 dp->isop = NULL;
533                 dp->kinp = up;
534                 up->kinp = dp;
535                 dp->descen = TRUE;
536                 up->descen = FALSE;
537                 dp->number = n;
538                 up->number = n;
539                 dp->length = 0.0;
540                 up->length = 0.0;
541                 dp->lengthc = 0.0;
542                 up->lengthc = 0.0;
543                 dp->varlen = 0.0;
544                 up->varlen = 0.0;
545                 dp->paths = new_ivector(4);
546                 up->paths = dp->paths;
547                 for (i = 0; i < 4; i++) dp->paths[i] = 0;
548                 dp->paths[n] = 1;
549                 dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */
550                 up->eprob = NULL;               
551                 dp->partials = NULL;
552                 up->partials = new_dcube(numcats, numptrn, tpmradix);
553                 tr->ebrnchp[n] = dp;
554         }
555
556         /* reserve memory for internal branch */        
557         dp = (Node *) malloc(sizeof(Node));
558         if (dp == NULL) maerror("dp in new_quartet");   
559         up = (Node *) malloc(sizeof(Node));
560         if (up == NULL) maerror("dp in new_quartet");   
561         dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */
562         up->isop = tr->ebrnchp[0]->kinp;
563         dp->kinp = up;
564         up->kinp = dp;
565         dp->descen = TRUE;
566         up->descen = FALSE;
567         dp->number = 0;
568         up->number = 0;
569         dp->length = 0.0;
570         up->length = 0.0;
571         dp->lengthc = 0.0;
572         up->lengthc = 0.0;
573         dp->varlen = 0.0;
574         up->varlen = 0.0;
575         dp->paths = new_ivector(4);
576         up->paths = dp->paths;
577         up->paths[0] = 0;
578         up->paths[1] = 0;
579         up->paths[2] = 1;
580         up->paths[3] = 1;
581         dp->eprob = NULL;
582         up->eprob = NULL;       
583         dp->partials = new_dcube(numcats, numptrn, tpmradix);
584         up->partials = new_dcube(numcats, numptrn, tpmradix);   
585         tr->ibrnchp[0] = dp;
586         
587         /* place root */
588         tr->rootp = up;
589
590         /* connect external branches */ 
591         tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp;
592         tr->ebrnchp[1]->kinp->isop = tr->rootp;
593         tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp;
594         tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp;
595         
596         /*
597          * reserve memory for lengths of the five branches
598          * of a quartet and for the six possible distances
599          * (needed for LS estimation of branch lengths)
600          */
601         Brnlength = new_dvector(NUMQBRNCH); 
602         Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2);
603
604         return tr;
605 }
606
607
608 /* free tree memory */
609 void free_tree(Tree *tr, int taxa)
610 {       
611         int n;
612         Node *dp, *up;
613
614         free(heights);
615         free_dmatrix(tr->condlkl);
616         for (n = 0; n < taxa; n++) {
617                 dp = tr->ebrnchp[n];
618                 up = dp->kinp;
619                 free_ivector(dp->paths);                
620                 free_dcube(up->partials);               
621                 free(dp);
622                 free(up);
623         }
624         free(tr->ebrnchp);
625         for (n = 0; n < (taxa-3); n++) {
626                 dp = tr->ibrnchp[n];
627                 up = dp->kinp;
628                 free_dcube(dp->partials);
629                 free_dcube(up->partials);
630                 free_ivector(dp->paths);
631                 free(dp);
632                 free(up);
633         }
634         free(tr->ibrnchp);
635         free(tr);
636         free_dvector(Brnlength); /* branch lengths (for LS estimation) */
637         free_dvector(Distanvec); /* distances (for LS estimation) */
638 }
639
640
641 /* make (a,b)-(c,d) quartet
642
643         a ---+     +--- c
644              +-----+
645         b ---+     +--- d
646
647         species numbers range from 0 to Maxspc - 1  */
648
649 void make_quartet(int a, int b, int c, int d)
650 {
651         /* place sequences */
652         Ctree->ebrnchp[0]->eprob = Seqpat[a];
653         Ctree->ebrnchp[1]->eprob = Seqpat[b];
654         Ctree->ebrnchp[2]->eprob = Seqpat[c];
655         Ctree->ebrnchp[3]->eprob = Seqpat[d];
656         
657         /* make distance vector */
658         Distanvec[0] = Distanmat[b][a];
659         Distanvec[1] = Distanmat[c][a];
660         Distanvec[2] = Distanmat[c][b];
661         Distanvec[3] = Distanmat[d][a];
662         Distanvec[4] = Distanmat[d][b];
663         Distanvec[5] = Distanmat[d][c];
664 }
665
666 /* write distance matrix as vector */
667 void changedistan(dmatrix distanmat, dvector distanvec, int numspc)
668 {
669         int i, j, k;
670
671         for (k = 0, i = 1; i < numspc; i++) {
672                 for (j = 0; j < i; j++, k++)
673                         distanvec[k] = distanmat[i][j];
674         }
675 }
676
677
678 /******************************************************************************/
679 /* computation of maximum likelihood tree                                     */
680 /******************************************************************************/
681
682
683 /* compute the likelihood for (a,b)-(c,d) quartet */
684 double quartet_lklhd(int a, int b, int c, int d)
685 {
686         /* reserve memory for quartet if necessary */
687         if (mlmode != 1) { /* no quartet tree */
688                 if (Ctree != NULL)
689                         free_tree(Ctree, Numspc);
690                 Ctree = new_quartet(Numptrn, Seqpat);
691                 Numbrnch = NUMQBRNCH;
692                 Numibrnch = NUMQIBRNCH;
693                 Numspc = NUMQSPC;
694                 mlmode = 1;
695         }
696         
697         /* make (a,b)-(c,d) quartet */
698         make_quartet(a,b,c,d);
699
700         clockmode = 0; /* nonclocklike branch lengths */
701         
702         /* least square estimate for branch length */   
703         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
704
705         /* compute likelihood */
706         Ctree->lklhd = optlkl(Ctree);
707
708         return Ctree->lklhd;
709 }
710
711
712 /* compute the approximate likelihood for (a,b)-(c,d) quartet */
713 double quartet_alklhd(int a, int b, int c, int d)
714 {
715         /* reserve memory for quartet if necessary */
716         if (mlmode != 1) { /* no quartet tree */
717                 if (Ctree != NULL)
718                         free_tree(Ctree, Numspc);
719                 Ctree = new_quartet(Numptrn, Seqpat);
720                 Numbrnch = NUMQBRNCH;
721                 Numibrnch = NUMQIBRNCH;
722                 Numspc = NUMQSPC;
723                 mlmode = 1; 
724         }
725         
726         /* make (a,b)-(c,d) quartet */
727         make_quartet(a,b,c,d);
728
729         clockmode = 0; /* nonclocklike branch lengths */
730         
731         /* least square estimate for branch length */   
732         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
733
734         /* compute likelihood */
735         Ctree->lklhd = treelkl(Ctree);
736
737         return Ctree->lklhd;
738 }
739
740
741 /* read usertree from file to memory */
742 void readusertree(FILE *ifp)
743 {
744         /* reserve memory for tree if necessary */
745         if (mlmode != 2) { /* no tree */
746                 if (Ctree != NULL)
747                         free_tree(Ctree, Numspc);
748                 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
749                 Numbrnch = 2*Maxspc-3;
750                 Numibrnch = Maxspc-3;
751                 Numspc = Maxspc;
752                 mlmode = 2;
753         }
754
755         /* read tree */
756         makeusertree(ifp);
757 }
758
759
760 /* compute the likelihood of a usertree */
761 double usertree_lklhd()
762 {
763         /* be sure to have a usertree in memory and
764            to have pairwise distances computed */
765
766         clockmode = 0; /* nonclocklike branch lengths */
767
768         /* least square estimate for branch length */
769         changedistan(Distanmat, Distanvec, Numspc);     
770         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
771
772         /* compute likelihood */
773         Ctree->lklhd = optlkl(Ctree);
774
775         return Ctree->lklhd;
776 }
777
778
779 /* compute the approximate likelihood of a usertree */
780 double usertree_alklhd()
781 {
782         /* be sure to have a usertree in memory and
783            to have pairwise distances computed */
784
785         clockmode = 0; /* nonclocklike branch lengths */
786
787         /* least square estimate for branch length */
788         changedistan(Distanmat, Distanvec, Numspc);
789         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
790
791         /* compute likelihood */
792         Ctree->lklhd = treelkl(Ctree);
793
794         return Ctree->lklhd;
795 }
796
797
798 /* preparation for ML analysis */
799 void mlstart()
800 {
801         /* number of states and code length */
802         tpmradix = gettpmradix();
803         
804         /* declare variables */
805         Eval = new_dvector(tpmradix);
806         Evec = new_dmatrix(tpmradix,tpmradix);
807         Ievc = new_dmatrix(tpmradix,tpmradix);
808         iexp = new_dmatrix(tpmradix,tpmradix);
809         Alias = new_ivector(Maxsite);
810         
811         /* process sequence information */
812         evaluateseqs();
813         bestrate = new_ivector(Numptrn);
814
815         /* compute transition probability matrix */     
816         tranprobmat();
817
818         /* non-zero rate categories */
819         Rates = new_dvector(numcats);
820         updaterates();
821         ltprobr = new_dcube(numcats, tpmradix,tpmradix);
822
823         /* compute distance matrix */
824         Distanmat = new_dmatrix(Maxspc, Maxspc);
825         initdistan();
826                 
827         /* initialize tree pointer for quartet tree */
828         mlmode = 1;
829         Ctree = new_quartet(Numptrn, Seqpat);
830         Numbrnch = NUMQBRNCH;
831         Numibrnch = NUMQIBRNCH;
832         Numspc = NUMQSPC;
833
834         /* computing ML distances */
835         computedistan();
836 }
837
838
839 /* recompute ml distances for quartet only */
840 void distupdate(int a, int b, int c, int d)
841 {
842         /* update distance matrix */
843         /* consider only entries relevant to quartet */
844         Distanmat[a][b] = mldistance(a, b);
845         Distanmat[b][a] = Distanmat[a][b];
846         Distanmat[a][c] = mldistance(a, c);
847         Distanmat[c][a] = Distanmat[a][c];
848         Distanmat[a][d] = mldistance(a, d);
849         Distanmat[d][a] = Distanmat[a][d];
850         Distanmat[b][c] = mldistance(b, c);
851         Distanmat[c][b] = Distanmat[b][c];
852         Distanmat[b][d] = mldistance(b, d);
853         Distanmat[d][b] = Distanmat[b][d];
854         Distanmat[c][d] = mldistance(c, d);
855         Distanmat[d][c] = Distanmat[c][d];
856 }
857
858
859 /* cleanup after ML analysis */
860 void mlfinish()
861 {
862         if (Ctree != NULL)
863                 free_tree(Ctree, Numspc);
864         free_ivector(bestrate);
865         free_ivector(Alias);
866         free_cmatrix(Seqpat);
867         free_ivector(constpat);
868         free_ivector(Weight);
869         free_dmatrix(Distanmat);
870         free_dvector(Eval);
871         free_dmatrix(Evec);
872         free_dmatrix(Ievc);
873         free_dvector(Rates);
874         free_dcube(ltprobr);
875         free_dmatrix(iexp);
876 }
877
878
879 /******************************************************************************/
880 /* tree output                                                                */
881 /******************************************************************************/
882
883
884 #define MAXOVER    50
885 #define MAXLENG    30
886 #define MAXCOLUMN  80
887
888
889 void prbranch(Node *up, int depth, int m, int maxm,
890         ivector umbrella, ivector column, FILE *outfp)
891 {
892         int i, num, n, maxn, lim;
893         Node *cp;
894         char bch;
895
896         if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) {
897                 column[depth] = MAXLENG;
898                 bch = '+';
899         } else {
900                 column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3;
901                 bch = '-';
902         }
903
904         if (up->isop == NULL) { /* external branch */
905                 num = up->number + 1; /* offset */
906                 if (m == 1) umbrella[depth - 1] = TRUE;
907                 for (i = 0; i < depth; i++) {
908                         if (umbrella[i])
909                                 fprintf(outfp, "%*c", column[i], ':');
910                         else
911                                 fprintf(outfp, "%*c", column[i], ' ');
912                 }
913                 if (m == maxm)
914                         umbrella[depth - 1] = FALSE;
915                 for (i = 0, lim = column[depth] - 3; i < lim; i++)
916                         fputc(bch, outfp);
917                 fprintf(outfp, "-%d ", num);
918                                 
919                 fputid(outfp, up->number);
920                 
921                 
922                 fputc('\n', outfp);
923                 fputc(' ', outfp);
924                 return;
925         }
926
927         num = up->number + 1 + Numspc; /* offset, internal branch */
928         for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++)
929                 ;
930         for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) {
931                 prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp);
932                 if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE;
933                 if (n != maxn) {
934                         for (i = 0; i < depth; i++) {
935                                 if (umbrella[i])
936                                         fprintf(outfp, "%*c", column[i], ':');
937                                 else
938                                         fprintf(outfp, "%*c", column[i], ' ');
939                         }
940                         if (n == maxn / 2) { /* internal branch */
941                                 for (i = 0, lim = column[depth] - 3; i < lim; i++)
942                                         fputc(bch, outfp);
943                                 if (num < 10)
944                                         fprintf(outfp, "--%d", num);
945                                 else if (num < 100)
946                                         fprintf(outfp, "-%2d", num);
947                                 else
948                                         fprintf(outfp, "%3d", num);
949                         } else {
950                                 if (umbrella[depth])
951                                         fprintf(outfp, "%*c", column[depth], ':');
952                                 else
953                                         fprintf(outfp, "%*c", column[depth], ' ');
954                         }
955                         fputc('\n', outfp);
956                         fputc(' ', outfp);
957                 }
958                 if (m == maxm) umbrella[depth - 1] = FALSE;
959         }
960         return;
961 }
962
963
964 void getproportion(double *proportion, dvector distanvec, int numspc)
965 {
966         int i, maxpair;
967         double maxdis;
968         
969         maxpair = (numspc*(numspc-1))/2;
970
971         maxdis = 0.0;
972         for (i = 0; i < maxpair; i++) {
973                 if (distanvec[i] > maxdis) {
974                         maxdis = distanvec[i];
975                 }
976         }
977         *proportion = (double) MAXCOLUMN / (maxdis * 3.0);
978         if (*proportion > 1.0) *proportion = 1.0;
979 }
980
981
982 void prtopology(FILE *outfp)
983 {
984         int n, maxn, depth;
985         ivector umbrella;
986         ivector column;
987         Node *cp, *rp;
988         
989         getproportion(&Proportion, Distanvec, Numspc);
990
991         umbrella = new_ivector(Numspc);
992         column = new_ivector(Numspc);
993
994         for (n = 0; n < Numspc; n++) {
995                 umbrella[n] = FALSE;
996                 column[n] = 3;
997         }
998         column[0] = 1;
999         
1000         fputc(' ', outfp);
1001
1002         /* original code: rp = Ctree->rootp */
1003         /* but we want to print the first group in the 
1004            trichotomy as outgroup at the bottom! */
1005         rp = Ctree->rootp->isop;
1006         
1007         for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++)
1008                 ;
1009         depth = 1;
1010         n = 0;
1011
1012         cp = rp;
1013         do {
1014                 cp = cp->isop;
1015                 n++;
1016                 prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp);
1017                 if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':');
1018         } while (cp != rp);
1019
1020         free_ivector(umbrella);
1021         free_ivector(column);
1022 }
1023
1024
1025 /* print unrooted tree file with branch lengths */
1026 void fputphylogeny(FILE *fp)
1027 {
1028         Node *cp, *rp;
1029         int n;
1030
1031         cp = rp = Ctree->rootp;
1032         putc('(', fp);
1033         n = 1;
1034         do {
1035                 cp = cp->isop->kinp;
1036                 if (cp->isop == NULL) { /* external node */
1037                         if (n > 60) {
1038                                 fprintf(fp, "\n");
1039                                 n = 2;
1040                         }
1041                         n += fputid(fp, cp->number);
1042                         fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1043                         n += 7;
1044                         cp = cp->kinp;
1045                 } else { /* internal node */
1046                         if (cp->descen) {
1047                                 if (n > 60) {
1048                                         fprintf(fp, "\n");
1049                                         n = 1;
1050                                 }
1051                                 putc('(', fp);
1052                                 n++;
1053                         } else {
1054                                 putc(')', fp);
1055                                 n++;
1056                                 if (n > 60) {
1057                                         fprintf(fp, "\n");
1058                                         n = 1;
1059                                 }
1060                                 /* internal label */
1061                                 if (cp->kinp->label != NULL) {
1062                                         fprintf(fp, "%s", cp->kinp->label);
1063                                         n += strlen(cp->kinp->label);
1064                                 }
1065                                 fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1066                                 n += 7;
1067                         }
1068                 }
1069                 if (!cp->descen && !cp->isop->descen && cp != rp) {
1070                         putc(',', fp); /* not last subtree */
1071                         n++;
1072                 }
1073         } while (cp != rp);
1074         fprintf(fp, ")");
1075         /* internal label */
1076         if (cp->label != NULL)
1077                 fprintf(fp, "%s", cp->label);
1078         fprintf(fp, ";\n");
1079 }
1080
1081
1082 void resulttree(FILE *outfp)
1083 {
1084         int n, ne, closeflag;
1085         Node *ep, *ip;
1086         double blen;
1087         
1088         closeflag = FALSE;
1089         
1090         if (clockmode) {
1091                 fprintf(outfp, "\n         branch  length     nc/c");
1092                 fprintf(outfp, "   branch  length     nc/c (= non-clock/clock)\n");
1093         } else {
1094                 fprintf(outfp, "\n         branch  length     S.E.");
1095                 fprintf(outfp, "   branch  length     S.E.\n");
1096         }
1097         for (n = 0; n < Numspc; n++) {
1098                 ep = Ctree->ebrnchp[n];
1099                 ne = ep->number;
1100                 fputid10(outfp, ne);
1101                 fputs("  ", outfp);
1102                 fprintf(outfp, "%3d", ne + 1);
1103                 blen = (clockmode ? ep->lengthc : ep->length);
1104                 fprintf(outfp, "%9.5f", blen*0.01);
1105                 if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1106                 if (clockmode)
1107                         fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc));
1108                 else
1109                         fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen));   
1110                 if (n < Numibrnch) {
1111                         ip = Ctree->ibrnchp[n];
1112                         fprintf(outfp, "%8d", n + 1 + Numspc);
1113                         blen = (clockmode ? ip->lengthc : ip->length);
1114                         fprintf(outfp, "%9.5f", blen*0.01);
1115                         if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1116                         if (clockmode)
1117                                 fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc));
1118                         else
1119                                 fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen));   
1120                         fputc('\n', outfp);
1121                 } else {
1122                         if (n == Numspc - 3) {
1123                                 fputc('\n', outfp);
1124                         } else if (n == Numspc - 2) {
1125                                 if (clockmode) {
1126                                         if (!Convergc) 
1127                                                 fprintf(outfp, "     No convergence after %d iterations!\n", Numitc);
1128                                         else
1129                                                 fprintf(outfp, "     %d iterations until convergence\n", Numitc);
1130                                 } else {
1131                                         if (!Converg) 
1132                                                 fprintf(outfp, "     No convergence after %d iterations!\n", Numit);
1133                                         else
1134                                                 fprintf(outfp, "     %d iterations until convergence\n", Numit);                                
1135                                 }               
1136                         } else if (n == Numspc - 1) {
1137                                 fprintf(outfp, "     log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd));
1138                         } else {
1139                                 fputc('\n', outfp);
1140                         }
1141                 }
1142         }
1143         if(closeflag)
1144                 fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n");
1145 }
1146
1147
1148 /******************************************************************************/
1149 /* Neighbor-joining tree                                                      */
1150 /******************************************************************************/
1151
1152
1153 /* compute NJ tree and write to file */
1154 void njtree(FILE *fp)
1155 {
1156         /* reserve memory for tree if necessary */
1157         if (mlmode != 3) { /* no tree */
1158                 if (Ctree != NULL)
1159                         free_tree(Ctree, Numspc);
1160                 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
1161                 Numbrnch = 2*Maxspc-3;
1162                 Numibrnch = Maxspc-3;
1163                 Numspc = Maxspc;
1164                 mlmode = 3;
1165         }
1166
1167         /* construct NJ tree from distance matrix */
1168         njdistantree(Ctree);
1169         
1170         fputphylogeny(fp);
1171 }
1172
1173
1174 /* construct  NJ tree from distance matrix */
1175 void njdistantree(Tree *tr)
1176 {
1177         int i, j, otui=0, otuj=0, otuk, nsp2, cinode, step, restsp, k;
1178         double dij, bix, bjx, bkx, sij, smax, dnsp2;
1179         dvector r;
1180         dmatrix distan;
1181         Node **psotu, *cp, *ip, *jp, *kp;
1182
1183         distan = new_dmatrix(Maxspc,Maxspc);
1184         for (i = 0; i < Maxspc; i++)
1185                 for (j = 0; j < Maxspc; j++)
1186                         distan[i][j] = Distanmat[i][j];
1187
1188         nsp2 = Maxspc - 2;
1189         dnsp2 = 1.0 / nsp2;
1190         
1191         r = new_dvector(Maxspc);
1192         
1193         psotu = (Node **) malloc((unsigned)Maxspc * sizeof(Node *));
1194         if (psotu == NULL) maerror("psotu in njdistantree");
1195
1196         /* external branches are start OTUs */
1197         for (i = 0; i < Maxspc; i++)
1198                 psotu[i] = tr->ebrnchp[i]->kinp;
1199         
1200         restsp = Maxspc;
1201         cinode = 0; /* counter for internal nodes */
1202         
1203         for (step = 0; restsp > 3; step++) { /* NJ clustering steps */
1204         
1205                 for (i = 0; i < Maxspc; i++) {
1206                         if (psotu[i] != NULL) {
1207                                 for (j = 0, r[i] = 0.0; j < Maxspc; j++)
1208                                         if (psotu[j] != NULL)
1209                                                 r[i] += distan[i][j];
1210                         }
1211                 }
1212                 
1213                 smax = -1.0;
1214                 for (i = 0; i < Maxspc-1; i++) {
1215                         if (psotu[i] != NULL) {
1216                                 
1217                                 for (j = i+1; j < Maxspc; j++) {
1218                                         if (psotu[j] != NULL)
1219                                         {
1220                                                 sij = ( r[i] + r[j] ) * dnsp2 - distan[i][j];
1221                         
1222                                                 if (sij > smax) {
1223                                                         smax = sij;
1224                                                         otui = i;
1225                                                         otuj = j;
1226                                                 }
1227                                         }
1228                                 }
1229                         }
1230                 }
1231
1232                 /* new pair: otui and otuj */
1233
1234                 dij = distan[otui][otuj];
1235                 bix = (dij + r[otui]/nsp2 - r[otuj]/nsp2) * 0.5;
1236                 bjx = dij - bix;
1237                 
1238                 cp = tr->ibrnchp[cinode];
1239                 
1240                 ip = psotu[otui];
1241                 jp = psotu[otuj];
1242                 cp->isop = ip;
1243                 ip->isop = jp;
1244                 jp->isop = cp;
1245                 ip->length = bix;
1246                 jp->length = bjx;
1247                 ip->kinp->length = ip->length;
1248                 jp->kinp->length = jp->length;
1249                 
1250                 cp = cp->kinp;
1251                 
1252                 for (k = 0; k < Maxspc; k++)
1253                 {
1254                         if (psotu[k] != NULL && k != otui && k != otuj)
1255                         {
1256                                 dij = (distan[otui][k] + distan[otuj][k] - distan[otui][otuj]) * 0.5;
1257                                 distan[otui][k] = dij;
1258                                 distan[k][otui] = dij;
1259                         }
1260                 }
1261                 distan[otui][otui] = 0.0;
1262
1263                 psotu[otui] = cp;
1264                 psotu[otuj] = NULL;
1265                 
1266                 cinode++;
1267                 
1268                 restsp--;
1269                 nsp2--;
1270                 dnsp2 = 1.0 / nsp2;
1271         }
1272  
1273         otui = otuj = otuk = -1;
1274         for (i = 0; i < Maxspc; i++)
1275         {
1276                 if (psotu[i] != NULL) {
1277                         if (otui == -1) otui = i;
1278                         else if (otuj == -1) otuj = i;
1279                         else otuk = i;
1280                 }
1281         }
1282         bix = (distan[otui][otuj] + distan[otui][otuk] - distan[otuj][otuk]) * 0.5;
1283         bjx = distan[otui][otuj] - bix;
1284         bkx = distan[otui][otuk] - bix;
1285         ip = psotu[otui];
1286         jp = psotu[otuj];
1287         kp = psotu[otuk];
1288         ip->isop = jp;
1289         jp->isop = kp;
1290         kp->isop = ip;
1291         ip->length = bix;
1292         jp->length = bjx;
1293         kp->length = bkx;
1294         ip->kinp->length = ip->length;
1295         jp->kinp->length = jp->length;
1296         kp->kinp->length = kp->length;
1297
1298         tr->rootp = kp;
1299
1300         free_dvector(r);
1301         free_dmatrix(distan);
1302         free((Node *) psotu);
1303 }
1304
1305 /******************************************************************************/
1306 /* find best assignment of rate categories                                    */
1307 /******************************************************************************/
1308
1309 /* find best assignment of rate categories */
1310 void findbestratecombination()
1311 {
1312         int k, u;
1313         double bestvalue, fv2;
1314         dvector catprob;
1315         dmatrix cdl;
1316
1317         cdl = Ctree->condlkl;
1318         catprob = new_dvector(numcats+1);
1319         fv2 = (1.0-fracinv)/(double) numcats;
1320
1321         for (k = 0; k < Numptrn; k++) {
1322                 /* zero rate */
1323                 if (constpat[k] == TRUE)
1324                         catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]];
1325                 else 
1326                         catprob[0] = 0.0;
1327                 /* non-zero-rates */
1328                 for (u = 1; u < numcats+1; u++)
1329                         catprob[u] = fv2*cdl[u-1][k];
1330                 /* find best */
1331                 bestvalue = catprob[0];
1332                 bestrate[k] = 0;
1333                 for (u = 1; u < numcats+1; u++)
1334                         if (catprob[u] >= bestvalue) {
1335                                 bestvalue = catprob[u];
1336                                 bestrate[k] = u;
1337                         }
1338         }
1339         free_dvector(catprob);
1340         bestratefound = 1;
1341 }
1342
1343 /* print best assignment of rate categories */
1344 void printbestratecombination(FILE *fp)
1345 {
1346         int s, k;
1347
1348         for (s = 0; s < Maxsite; s++) {
1349                 k = Alias[s];
1350                 fprintf(fp, "%2d", bestrate[k]);
1351                 if ((s+1) % 30 == 0)
1352                         fprintf(fp, "\n");
1353                 else if ((s+1) % 10 == 0)
1354                         fprintf(fp, "  ");
1355         }
1356         if (s % 70 != 0)
1357                 fprintf(fp, "\n");
1358 }
1359
1360
1361 /******************************************************************************/
1362 /* computation of clocklike branch lengths                                    */
1363 /******************************************************************************/
1364
1365 /* checks wether e is a valid edge specification */
1366 int checkedge(int e)
1367 {
1368         /* there are Numspc external branches:
1369              0 - Numspc-1
1370            there are Numibrnch internal branches:
1371              Numspc - Numspc+Numibrnch-1
1372         */
1373         
1374         if (e < 0) return FALSE;
1375         if (e < Numspc+Numibrnch) return TRUE;
1376         else return FALSE;  
1377 }
1378
1379 /* print topology of subtree */
1380 void fputsubstree(FILE *fp, Node *ip)
1381 {
1382         Node *cp;
1383         
1384         if (ip->isop == NULL) { /* terminal nodes */
1385                 numtc += fputid(fp, ip->number);
1386         } else {        
1387                 cp = ip;
1388                 fprintf(fp, "(");
1389                 numtc += 1;
1390                 do {            
1391                         cp = cp->isop->kinp;
1392                         if (cp->isop == NULL) { /* external node */
1393                                 numtc += fputid(fp, cp->number);
1394                                 fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1395                                 numtc += 7;
1396                                 cp = cp->kinp;
1397                         } else { /* internal node */
1398                                 if (cp->height > 0.0) {
1399                                         fprintf(fp, "(");
1400                                         numtc += 1;
1401                                 } else if (cp->height < 0.0) {
1402                                         fprintf(fp, ")");
1403                                         numtc += 1;
1404                                         if (numtc > 60) {
1405                                                 fprintf(fp, "\n");
1406                                                 numtc = 1;
1407                                         }
1408                                         /* internal label */
1409                                         if (cp->kinp->label != NULL) {
1410                                                 fprintf(fp, "%s", cp->kinp->label);
1411                                                 numtc += strlen(cp->kinp->label);
1412                                         }
1413                                         if (numtc > 60) {
1414                                                 fprintf(fp, "\n");
1415                                                 numtc = 1;
1416                                         }
1417                                         fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1418                                         numtc += 6;
1419                                         if (numtc > 60) {
1420                                                 fprintf(fp, "\n");
1421                                                 numtc = 1;
1422                                         }
1423                                 }
1424                         }
1425                         if (cp->height <= 0.0 && cp->isop->height <= 0.0 &&
1426                                 cp->isop != ip) {
1427                                 putc(',', fp); /* not last subtree */
1428                                 numtc += 1;
1429                                 if (numtc > 60) {
1430                                         fprintf(fp, "\n");
1431                                         numtc = 1;
1432                                 }
1433                         }
1434                 } while (cp->isop != ip);
1435                 fprintf(fp, ")");
1436                 numtc += 1;
1437         }
1438         if (numtc > 60) {
1439                 fprintf(fp, "\n");
1440                 numtc = 1;
1441         }
1442
1443 }
1444
1445 /* print rooted tree file  */
1446 void fputrooted(FILE *fp, int e)
1447 {
1448         Node *rootbr;
1449         
1450         /* to be called only after clocklike branch
1451            lengths have been computed */
1452          
1453          /* pointer to root branch */
1454         if (e < Numspc) rootbr = Ctree->ebrnchp[e];
1455         else rootbr = Ctree->ibrnchp[e - Numspc];
1456
1457         fprintf(fp, "(");
1458         numtc = 2;
1459         fputsubstree(fp, rootbr);
1460         /* internal label */
1461         if (rootbr->label != NULL) {
1462                 fprintf(fp, "%s", rootbr->label);
1463                 numtc += strlen(rootbr->label);
1464         }
1465         if (numtc > 60) {
1466                 fprintf(fp, "\n");
1467                 numtc = 1;
1468         }
1469         fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01);
1470         numtc += 7;
1471         if (numtc > 60) {
1472                 fprintf(fp, "\n");
1473                 numtc = 1;
1474         }
1475         fputsubstree(fp, rootbr->kinp);
1476         /* internal label */
1477         if (rootbr->kinp->label != NULL) {
1478                 fprintf(fp, "%s", rootbr->kinp->label);
1479                 numtc += strlen(rootbr->kinp->label);
1480         }
1481         if (numtc > 60) {
1482                 fprintf(fp, "\n");
1483                 numtc = 1;
1484         }
1485         fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01);
1486 }
1487
1488 /* finds heights in subtree */
1489 void findheights(Node *ip)
1490 {
1491         Node *cp, *rp;
1492         
1493         if (ip->isop != NULL) { /* forget terminal nodes */
1494                 
1495                 cp = ip;
1496                 
1497                 /* initialise node  */
1498                 cp->height = 1.0; /* up */
1499                 rp = cp;
1500                 while (rp->isop != cp) {
1501                         rp = rp->isop;
1502                         rp->height = -1.0; /* down */
1503                 }
1504
1505                 do {            
1506                         cp = cp->isop->kinp;
1507                         if (cp->isop == NULL) { /* external node */
1508                                 cp = cp->kinp;
1509                         } else { /* internal node */
1510                                 if (cp->height == 0.0) { /* node not yet visited */
1511                                         cp->height = 1.0; /* up */
1512                                         rp = cp;
1513                                         while (rp->isop != cp) {
1514                                                 rp = rp->isop;
1515                                                 rp->height = -1.0; /* down */
1516                                         }
1517                                 } else if (cp->kinp->height == 1.0) {
1518                                         /* cp->kinp is next height pointer  */
1519                                         heights[Numhts] = cp->kinp;
1520                                         Numhts++;
1521                                 }
1522                         }
1523                 } while (cp->isop != ip);
1524                 /* ip is last height pointer */
1525                 heights[Numhts] = ip;
1526                 Numhts++;
1527         }       
1528 }
1529
1530
1531 /* initialise clocklike branch lengths (with root on edge e) */
1532 void initclock(int e)
1533 {
1534         int n, h, count;
1535         Node *cp, *rp;
1536         double sum, minh, aveh, len;
1537
1538         /* be sure to have a Ctree in memory and
1539            to have pairwise distances computed */
1540
1541         clockmode = 1; /* clocklike branch lengths */
1542
1543         /* least square estimate for branch length */
1544         changedistan(Distanmat, Distanvec, Numspc);
1545         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
1546
1547         /* pointer to root branch */
1548         if (e < Numspc) rootbr = Ctree->ebrnchp[e];
1549         else rootbr = Ctree->ibrnchp[e - Numspc];
1550            
1551         /* clear all heights */
1552         for (n = 0; n < Numspc; n++) {
1553                 Ctree->ebrnchp[n]->height = 0.0;
1554                 Ctree->ebrnchp[n]->kinp->height = 0.0;
1555                 Ctree->ebrnchp[n]->varheight = 0.0;
1556                 Ctree->ebrnchp[n]->kinp->varheight = 0.0;
1557                 if (n < Numibrnch) {
1558                         Ctree->ibrnchp[n]->height = 0.0;
1559                         Ctree->ibrnchp[n]->kinp->height = 0.0;
1560                         Ctree->ibrnchp[n]->varheight = 0.0;
1561                         Ctree->ibrnchp[n]->kinp->varheight = 0.0;
1562                 }
1563         }
1564         
1565         /* collect pointers to height nodes */
1566         Numhts = 0;
1567         findheights(rootbr); /* one side */
1568         findheights(rootbr->kinp); /* other side */
1569
1570         /* assign preliminary approximate heights and
1571            corresponding branch lengths */ 
1572         for (h = 0; h < Numhts; h++) {
1573                 
1574                 cp = rp = heights[h];
1575                 sum = 0;
1576                 count = 0;
1577                 minh = 0.0;
1578                 while (rp->isop != cp) {
1579                         count++;
1580                         rp = rp->isop;
1581                         sum += rp->lengthc + rp->kinp->height;
1582                         if (rp->kinp->height > minh) minh = rp->kinp->height;
1583                 }
1584                 aveh = sum / (double) count;
1585                 if (aveh < minh + MINARC) aveh = minh + MINARC;
1586                 cp->height = aveh;
1587                 rp = cp;
1588                 while (rp->isop != cp) {
1589                         rp = rp->isop;
1590                         len = aveh - rp->kinp->height;
1591                         rp->kinp->lengthc = len;
1592                         rp->lengthc = len;
1593                 }
1594                 
1595         }
1596         if (rootbr->height > rootbr->kinp->height) minh = rootbr->height;
1597         else minh = rootbr->kinp->height;
1598         aveh = 0.5*(rootbr->lengthc + rootbr->height + rootbr->kinp->height);
1599         if (aveh < minh + MINARC) aveh = minh + MINARC; 
1600         hroot = aveh;
1601         maxhroot = RMHROOT*hroot; /* maximal possible hroot */
1602         len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1603         rootbr->lengthc = len;
1604         rootbr->kinp->lengthc = len;
1605 }
1606
1607 /* approximate likelihood under the constaining assumption of
1608    clocklike branch lengths (with root on edge e) */
1609 double clock_alklhd(int e)
1610 {
1611         initclock(e);
1612         Ctree->lklhdc = treelkl(Ctree);
1613
1614         return Ctree->lklhdc;   
1615 }
1616
1617 /* log-likelihood given height ht at node pointed to by chep */
1618 double heightlkl(double ht)
1619 {
1620         Node *rp;
1621         double len;     
1622         
1623         /* adjust branch lengths */
1624         chep->height = ht;
1625         /* descendent branches */
1626         rp = chep;
1627         while (rp->isop != chep) {
1628                 rp = rp->isop;
1629                 len = chep->height - rp->kinp->height;
1630                 rp->kinp->lengthc = len;
1631                 rp->lengthc = len;
1632         }
1633         /* upward branch */
1634         if (chep == rootbr || chep->kinp == rootbr) {
1635                 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1636                 chep->lengthc = len;
1637                 chep->kinp->lengthc = len;
1638         } else {
1639                 rp = chep->kinp;
1640                 while (rp->isop->height <= 0.0)
1641                         rp = rp->isop;
1642                 chep->lengthc = rp->isop->height - chep->height;
1643                 chep->kinp->lengthc = rp->isop->height - chep->height;
1644         }
1645
1646         /* compute likelihood */
1647         Ctree->lklhdc = treelkl(Ctree);
1648
1649         return -(Ctree->lklhdc); /* we use a minimizing procedure */
1650 }
1651
1652 /* optimize current height */
1653 void optheight(void)
1654 {
1655         double he, fx, f2x, minh, maxh, len;
1656         Node *rp;
1657
1658         /* current height */
1659         he = chep->height;
1660         
1661         /* minimum */
1662         minh = 0.0;
1663         rp = chep;
1664         while (rp->isop != chep) {
1665                 rp = rp->isop;
1666                 if (rp->kinp->height > minh)
1667                         minh = rp->kinp->height;
1668         }
1669         minh += MINARC;
1670         
1671         /* maximum */
1672         if (chep == rootbr || chep->kinp == rootbr) {
1673                 maxh = hroot;
1674         } else {
1675                 rp = chep->kinp;
1676                 while (rp->isop->height <= 0.0)
1677                         rp = rp->isop;
1678                 maxh = rp->isop->height;
1679         }
1680         maxh -= MINARC;
1681
1682         /* check borders for height */
1683         if (he < minh) he = minh;
1684         if (he > maxh) he = maxh;
1685
1686         /* optimization */
1687         if (!(he == minh && he == maxh))
1688                 he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x);
1689         
1690         /* variance of height */
1691         f2x = fabs(f2x);
1692         if (1.0/(maxhroot*maxhroot) < f2x)
1693                 chep->varheight = 1.0/f2x;
1694         else
1695                 chep->varheight = maxhroot*maxhroot;
1696         
1697         /* adjust branch lengths */
1698         chep->height = he;
1699         /* descendent branches */
1700         rp = chep;
1701         while (rp->isop != chep) {
1702                 rp = rp->isop;
1703                 len = chep->height - rp->kinp->height;
1704                 rp->kinp->lengthc = len;
1705                 rp->lengthc = len;
1706         }
1707         /* upward branch */
1708         if (chep == rootbr || chep->kinp == rootbr) {
1709                 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1710                 chep->lengthc = len;
1711                 chep->kinp->lengthc = len;
1712         } else {
1713                 rp = chep->kinp;
1714                 while (rp->isop->height <= 0.0)
1715                         rp = rp->isop;
1716                 chep->lengthc = rp->isop->height - chep->height;
1717                 chep->kinp->lengthc = rp->isop->height - chep->height;
1718         }
1719 }
1720
1721 /* log-likelihood given height ht at root */
1722 double rheightlkl(double ht)
1723 {
1724         double len;     
1725         
1726         /* adjust branch lengths */
1727         hroot = ht;
1728         len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1729         rootbr->lengthc = len;
1730         rootbr->kinp->lengthc = len;
1731
1732         /* compute likelihood */
1733         Ctree->lklhdc = treelkl(Ctree);
1734
1735         return -(Ctree->lklhdc); /* we use a minimizing procedure */
1736 }
1737
1738 /* optimize height of root */
1739 void optrheight(void)
1740 {
1741         double he, fx, f2x, minh, len;
1742
1743         /* current height */
1744         he = hroot;
1745         
1746         /* minimum */
1747         if (rootbr->height > rootbr->kinp->height)
1748                 minh = rootbr->height;
1749         else
1750                 minh = rootbr->kinp->height;
1751         minh += MINARC;
1752                 
1753         /* check borders for height */
1754         if (he < minh) he = minh;
1755         if (he > maxhroot) he = maxhroot;
1756
1757         /* optimization */
1758         he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x);
1759         
1760         /* variance of height of root */
1761         f2x = fabs(f2x);
1762         if (1.0/(maxhroot*maxhroot) < f2x)
1763                 varhroot = 1.0/f2x;
1764         else
1765                 varhroot = maxhroot*maxhroot;
1766
1767         /* adjust branch lengths */
1768         hroot = he;
1769         len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1770         rootbr->lengthc = len;
1771         rootbr->kinp->lengthc = len;
1772 }
1773
1774 /* exact likelihood under the constaining assumption of
1775    clocklike branch lengths (with root on edge e) */
1776 double clock_lklhd(int e)
1777 {
1778         int h, nconv;
1779         double old;
1780         
1781         Numitc = 0;
1782         Convergc = FALSE;
1783         
1784         initclock(e);
1785         
1786         do {
1787         
1788                 Numitc++;
1789                 nconv = 0;
1790
1791                 /* optimize height of root */
1792                 old = hroot;
1793                 optrheight();
1794                 if (fabs(old - hroot) < HEPSILON) nconv++;
1795
1796                 /* optimize height of nodes */
1797                 for (h = Numhts-1; h >= 0; h--) {
1798                 
1799                         /* pointer chep to current height node */
1800                         chep = heights[h];
1801                         
1802                         /* store old value */
1803                         old = chep->height;
1804
1805                         /* find better height */
1806                         optheight();
1807                         
1808                         /* converged ? */
1809                         if (fabs(old - chep->height) < HEPSILON) nconv++;
1810                 }
1811
1812                 if (nconv == Numhts+1) Convergc = TRUE;
1813                         
1814         } while (Numitc < MAXIT && !Convergc);
1815                 
1816         /* compute final likelihood */
1817         Ctree->lklhdc = treelkl(Ctree);
1818
1819         return Ctree->lklhdc;
1820 }
1821
1822 /* find out the edge containing the root */
1823 int findrootedge()
1824 {
1825         int e, ebest;
1826         double logbest, logtest;
1827         
1828         /* compute the likelihood for all edges and take the edge with
1829            best likelihood (using approximate ML) */
1830
1831         ebest = 0;
1832         logbest = clock_alklhd(0);
1833         numbestroot = 1;
1834         for (e = 1; e < Numspc+Numibrnch; e++) {
1835                 logtest = clock_alklhd(e);
1836                 if (logtest > logbest) {
1837                         ebest = e;
1838                         logbest = logtest;
1839                         numbestroot = 1;
1840                 } else if (logtest == logbest) {
1841                         numbestroot++;
1842                 }
1843         }
1844
1845         return ebest;
1846 }
1847
1848 /* show heights and corresponding standard errors */
1849 void resultheights(FILE *fp)
1850 {
1851         int h, num;
1852         Node *cp;
1853         
1854         fprintf(fp, " height    S.E.    of node common to branches\n");
1855         for (h = 0; h < Numhts; h++) {
1856                 fprintf(fp, "%.5f  %.5f    ", (heights[h]->height)*0.01,
1857                         sqrt(heights[h]->varheight)*0.01);
1858                 cp = heights[h];
1859                 do {
1860                         num = (cp->number) + 1;
1861                         if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */                        
1862                         fprintf(fp, "%d  ", num);
1863                         cp = cp->isop;
1864                 } while (cp != heights[h]);     
1865                 fprintf(fp, "\n");
1866                 
1867         }
1868         fprintf(fp, "%.5f  %.5f   of root at branch %d\n",
1869                 hroot*0.01, sqrt(varhroot)*0.01, locroot+1);
1870 }
1871