initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_dqo / src / ml2.c
1 /*
2  * ml2.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 #define EXTERN extern
17
18 /* prototypes */
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <math.h>
22 #include <ctype.h>
23 #include <string.h>
24 #include "util.h"
25 #include "ml.h"
26
27 #define STDOUT     stdout
28 #ifndef PARALLEL             /* because printf() runs significantly faster */
29                              /* than fprintf(stdout) on an Apple McIntosh  */
30                              /* (HS) */
31 #       define FPRINTF    printf
32 #       define STDOUTFILE
33 #else
34 #       define FPRINTF    fprintf
35 #       define STDOUTFILE STDOUT,
36 #endif
37
38 /* prototypes for two functions of puzzle2.c */
39 void fputid10(FILE *, int);
40 int fputid(FILE *, int);
41
42
43 /******************************************************************************/
44 /* user tree input                                                            */
45 /******************************************************************************/
46
47 /* read user tree, drop all blanks, tabs, and newlines.
48    Drop edgelengths (after :) but keep internal
49    labels. Check whether all pairs of brackets match. */
50 void getusertree(FILE *itfp, cvector tr, int maxlen)
51 {
52         int n, brac, ci;
53         int comment = 0;
54
55         /* look for opening bracket */
56         n = 0;
57         brac = 0;
58         do {
59                 ci = fgetc(itfp);
60                 if (ci == EOF) {
61                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n");
62                         exit(1);
63                 }
64                 if (ci == '[') comment = 1;
65                 if ((ci == ']') && comment) {
66                         comment = 0;
67                         ci = fgetc(itfp);
68                 }
69         } while (comment || ((char) ci != '('));
70         tr[n] = (char) ci;
71         brac++;
72         
73         do {
74                 /* get next character (skip blanks, newlines, and tabs) */
75                 do {
76                         ci = fgetc(itfp);
77                         if (ci == EOF) {
78                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n");
79                                 exit(1);
80                         }
81                         if (ci == '[') comment = 1;
82                         if ((ci == ']') && comment) {
83                                 comment = 0;
84                                 ci = fgetc(itfp);
85                         }
86                 } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t');
87         
88                 if ((char) ci == ':') { /* skip characters until a ,) appears  */
89                         do {
90                                 ci = fgetc(itfp);
91                                 if (ci == EOF) {
92                                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n");
93                                         exit(1);
94                                 }
95                                 if (ci == '[') comment = 1;
96                                 if ((ci == ']') && comment) {
97                                         comment = 0;
98                                         ci = fgetc(itfp);
99                                 }
100                         } while (comment || ((char) ci != ',' && (char) ci != ')') );
101                 }
102                 
103                 if ((char) ci == '(') {
104                         brac++;
105                 }
106                 if ((char) ci == ')') {
107                         brac--;
108                 }
109
110                 n++;
111                 tr[n] = (char) ci;
112         
113         } while (((char) ci != ';') && (n != maxlen-2));
114         
115         if (n == maxlen-2) {
116                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n");
117                 exit(1);
118         }
119         
120         if (brac != 0) {
121                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n");
122                 exit(1);
123         }
124         
125         n++;
126         tr[n] = '\0';
127 }
128
129
130 Node *internalnode(Tree *tr, char **chpp, int *ninode)
131 {
132         Node *xp, *np, *rp;
133         int i, j, dvg, ff, stop, numc;
134         char ident[100], idcomp[27]; /*CZ*/
135         char *idp;
136
137         (*chpp)++;
138         if (**chpp == '(') { /* process subgroup */
139                 
140                 xp = internalnode(tr, chpp, ninode);
141                 xp->isop = xp;
142                 dvg = 1;
143                 while (**chpp != ')') {
144                         if (**chpp == '\0') {
145                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
146                                 exit(1);
147                         }
148                         dvg++;
149                         /* insert edges around node */
150                         np = internalnode(tr, chpp, ninode);
151                         np->isop = xp->isop;
152                         xp->isop = np; 
153                         xp = np;
154                 }
155                 /* closing bracket reached */
156                 
157                 (*chpp)++;
158                 if (dvg < 2) {
159                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n");
160                         exit(1);
161                 }
162                 
163                 if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */
164                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
165                         exit(1);
166                 }
167
168                 rp = tr->ibrnchp[*ninode];
169                 rp->isop = xp->isop;
170                 xp->isop = rp;
171
172                 for (j = 0; j < Numspc; j++)
173                         rp->paths[j] = 0;
174                 xp = rp->isop;
175                 while (xp != rp) {
176                         for (j = 0; j < Numspc; j++) {
177                                 if (xp->paths[j] == 1)
178                                         rp->paths[j] = 1;
179                         }
180                         xp = xp->isop;
181                 }
182                 (*ninode)++;
183
184                 if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
185                 if ((**chpp) == '\0')  {
186                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
187                         exit(1);
188                 }
189
190                 /* read internal label into rp->label (max. 20 characters) */
191                 rp->label = new_cvector(21);
192                 (rp->label)[0] = **chpp;
193                 (rp->label)[1] = '\0';
194                 for (numc = 1; numc < 20; numc++) {
195                         (*chpp)++;
196                         if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp;
197                         if ((**chpp) == '\0')  {
198                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
199                                 exit(1);
200                         }
201                         (rp->label)[numc] = **chpp;
202                         (rp->label)[numc+1] = '\0';
203                 }       
204                 do { /* skip the rest of the internal label */
205                         (*chpp)++;
206                         if ((**chpp) == '\0')  {
207                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
208                                 exit(1);
209                         }
210                 } while (((**chpp) != ',' && (**chpp) != ')'));
211                         
212                 return rp->kinp;
213                 
214         } else { /* process species names */
215                 /* read species name */
216                 for (idp = ident; **chpp != ',' &&
217                         **chpp != ')' && **chpp != '\0'; (*chpp)++) {
218                         *idp++ = **chpp;        
219                 }
220                 *idp = '\0';
221                 /* look for internal number */
222                 idcomp[26] = '\0';  /*CZ*/
223                 
224                 for (i = 0; i < Maxspc; i++) {
225                         ff = 0;
226                         stop = FALSE;
227                         do {
228                                 idcomp[ff] = Identif[i][ff];
229                                 ff++;
230                                 if (idcomp[ff-1] == ' ') stop = TRUE;
231                         } while (!stop && (ff != 26)); /*CZ*/
232                         if (stop) idcomp[ff-1] = '\0';
233                         
234                         if (!strcmp(ident, idcomp)) {
235                                 if (usedtaxa[i]) {
236                                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence '");
237                                         FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident);
238                                         exit(1);
239                                 }
240                                 usedtaxa[i] = TRUE;
241                                 return tr->ebrnchp[i]->kinp;
242                         }
243                 }
244                 
245                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident);
246                 exit(1);
247         }
248         return NULL; /* never returned but without some compilers complain */
249 }
250
251 /* make tree structure, the tree description may contain internal
252     labels but no edge lengths */
253 void constructtree(Tree *tr, cvector strtree)
254 {
255         char *chp;
256         int ninode, i;
257         int dvg, numc;
258         Node *xp, *np;
259
260         ninode = 0;
261         chp = strtree;
262         usedtaxa = new_ivector(Maxspc);
263         for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE;
264
265         xp = internalnode(tr, &chp, &ninode);
266         xp->isop = xp;
267         dvg = 1;
268         while (*chp != ')') { /* look for closing bracket */
269                 if (*chp == '\0') {
270                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n");
271                         exit(1);
272                 }
273                 dvg++;
274                 /* insert edges around node */
275                 np = internalnode(tr, &chp, &ninode);
276                 np->isop = xp->isop;
277                 xp->isop = np; 
278                 xp = np;
279         }
280         
281         for (i = 0; i < Maxspc; i++)
282                 if (usedtaxa[i] == FALSE) {
283                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n");
284                         exit(1);        
285                 }
286         
287         /* closing bracket reached */
288         if (dvg < 3) {
289                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n");
290                 exit(1);
291         }
292         tr->rootp = xp;
293         Numibrnch = ninode;
294         Numbrnch = Numspc + ninode;
295
296         chp++;
297         if (*chp == ';' || *chp == '\0') {
298                 free_ivector(usedtaxa);
299                 return;
300         }
301
302         /* copy last internal label (max. 20 characters) */
303         xp->label = new_cvector(21);
304         (xp->label)[0] = *chp;
305         (xp->label)[1] = '\0';          
306         for (numc = 1; numc < 20; numc++) {
307                 chp++;
308                 if (*chp == ';' || *chp == '\0') {
309                         free_ivector(usedtaxa);
310                         return;
311                 } else {
312                         (xp->label)[numc] = *chp;
313                         (xp->label)[numc+1] = '\0';
314                 }
315         }
316         free_ivector(usedtaxa); 
317         return;
318 }
319
320
321 /* remove possible basal bifurcation */
322 void removebasalbif(cvector strtree)
323 {
324         int n, c, brak, cutflag, h;
325         
326         /* check how many OTUs on basal level */
327         n = 0;
328         c = 0;
329         brak = 0;
330         do {
331                 if (strtree[n] == '(') brak++;
332                 if (strtree[n] == ')') brak--;
333                 
334                 if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */
335                 
336                 n++;
337         } while (strtree[n] != '\0');
338         
339         /* if only 1 OTU inside outer bracket stop now */
340         if (c == 0) {
341                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n");
342                 exit(1);
343         }       
344         
345         /* if only 2 OTUs inside outer bracket delete second pair of
346            brackets from the right to remove basal bifurcation */
347
348         if (c == 1) {
349                         
350                 n = 0;
351                 brak = 0;
352                 cutflag = 0; /* not yet cutted */
353                 h = 0;
354                 do {
355                         if (strtree[n] == '(') brak++;
356                         if (strtree[n] == ')') brak--;
357                         
358                         if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */
359                         if (brak == 1 && cutflag == 1) {
360                                 cutflag = 2; /* cutted */
361                                 /* leave out internal label */
362                                 do {
363                                         h++;    
364                                 } while (strtree[n+h] != ')' && strtree[n+h] != ',');
365
366                         }
367                         
368                         if (cutflag == 1) strtree[n] = strtree[n+1];
369                         if (cutflag == 2) strtree[n-1] = strtree[n+h];
370                 
371                         n++;
372                 } while (strtree[n] != '\0');
373         }
374 }
375
376
377 void makeusertree(FILE *itfp)
378 {
379         cvector strtree;
380         
381         strtree = new_cvector(23*Maxspc); /* for treefile */ 
382         getusertree(itfp, strtree, 23*Maxspc);
383         removebasalbif(strtree);
384         constructtree(Ctree, strtree);
385         free_cvector(strtree);
386 }
387
388
389 /******************************************************************************/
390 /* memory organisation for maximum likelihood tree                            */
391 /******************************************************************************/
392
393 /* initialise new tree */
394 Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint)
395 {
396         int n, i, maxibrnch;
397         Tree *tr;
398         Node *dp, *up;
399         
400         maxibrnch = maxspc - 3;
401         heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *));
402         if (heights == NULL) maerror("heights in new_tree");
403         tr = (Tree *) malloc(sizeof(Tree));
404         if (tr == NULL) maerror("tr in new_tree");
405         tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *));
406         if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree");
407         tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *));
408         if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree");
409         tr->condlkl = new_dmatrix(numcats, numptrn);    
410         for (n = 0; n < maxspc; n++) {
411                 dp = (Node *) malloc(sizeof(Node));
412                 if (dp == NULL) maerror("dp in new_tree");
413                 up = (Node *) malloc(sizeof(Node));
414                 if (up == NULL) maerror("up in new_tree");
415                 dp->isop = NULL;
416                 up->isop = NULL;
417                 dp->kinp = up;
418                 up->kinp = dp;
419                 dp->descen = TRUE;
420                 up->descen = FALSE;
421                 dp->number = n;
422                 up->number = n;
423                 dp->length = 0.0;
424                 up->length = 0.0;
425                 dp->lengthc = 0.0;
426                 up->lengthc = 0.0;
427                 dp->varlen = 0.0;
428                 up->varlen = 0.0;
429                 dp->paths = new_ivector(maxspc);
430                 up->paths = dp->paths;
431                 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
432                 dp->paths[n] = 1;
433                 dp->eprob = seqconint[n];
434                 up->eprob = NULL;
435                 dp->partials = NULL;
436                 up->partials = new_dcube(numcats, numptrn, tpmradix);
437                 tr->ebrnchp[n] = dp;
438                 up->label = NULL;
439                 dp->label = NULL;
440         }
441         for (n = 0; n < maxibrnch; n++) {
442                 dp = (Node *) malloc(sizeof(Node));
443                 if (dp == NULL) maerror("dp in new_tree");
444                 up = (Node *) malloc(sizeof(Node));
445                 if (up == NULL) maerror("up in new_tree");
446                 dp->isop = NULL;
447                 up->isop = NULL;
448                 dp->kinp = up;
449                 up->kinp = dp;
450                 dp->descen = TRUE;
451                 up->descen = FALSE;
452                 dp->number = n;
453                 up->number = n;
454                 dp->length = 0.0;
455                 up->length = 0.0;
456                 dp->lengthc = 0.0;
457                 up->lengthc = 0.0;
458                 dp->varlen = 0.0;
459                 up->varlen = 0.0;
460                 dp->paths = new_ivector(maxspc);
461                 up->paths = dp->paths;
462                 for (i = 0; i < maxspc; i++) dp->paths[i] = 0;
463                 dp->eprob = NULL;
464                 up->eprob = NULL;
465                 dp->partials = new_dcube(numcats, numptrn, tpmradix);
466                 up->partials = new_dcube(numcats, numptrn, tpmradix);
467                 tr->ibrnchp[n] = dp;
468                 up->label = NULL;
469                 dp->label = NULL;
470         }
471         tr->rootp = NULL;
472         
473         /*
474          * reserve memory for lengths of the tree branches
475          * and for the distance matrix as a vector
476          * (needed for LS estimation of tree branch lengths)
477          */ 
478          
479         Brnlength = new_dvector(2 * maxspc - 3); 
480         Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2);
481
482         return tr;
483 }
484
485
486 /* initialise quartet tree */
487 Tree *new_quartet(int numptrn, cmatrix seqconint)
488 {
489         int n, i;
490         Tree *tr;
491         Node *dp, *up;
492
493         heights = (Node **) malloc((unsigned)2 * sizeof(Node *));
494         if (heights == NULL) maerror("heights in new_quartet");
495         /* reserve memory for tree */
496         tr = (Tree *) malloc(sizeof(Tree));
497         if (tr == NULL) maerror("tr in new_quartet");
498         tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *));
499         if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet");
500         tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *));
501         if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet");
502         tr->condlkl = new_dmatrix(numcats, numptrn);
503         /* reserve memory for nodes */
504         for (n = 0; n < 4; n++) {
505                 dp = (Node *) malloc(sizeof(Node));
506                 if (dp == NULL) maerror("dp in new_quartet");
507                 up = (Node *) malloc(sizeof(Node));
508                 if (up == NULL) maerror("dp in new_quartet");
509                 dp->isop = NULL;
510                 dp->kinp = up;
511                 up->kinp = dp;
512                 dp->descen = TRUE;
513                 up->descen = FALSE;
514                 dp->number = n;
515                 up->number = n;
516                 dp->length = 0.0;
517                 up->length = 0.0;
518                 dp->lengthc = 0.0;
519                 up->lengthc = 0.0;
520                 dp->varlen = 0.0;
521                 up->varlen = 0.0;
522                 dp->paths = new_ivector(4);
523                 up->paths = dp->paths;
524                 for (i = 0; i < 4; i++) dp->paths[i] = 0;
525                 dp->paths[n] = 1;
526                 dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */
527                 up->eprob = NULL;               
528                 dp->partials = NULL;
529                 up->partials = new_dcube(numcats, numptrn, tpmradix);
530                 tr->ebrnchp[n] = dp;
531         }
532
533         /* reserve memory for internal branch */        
534         dp = (Node *) malloc(sizeof(Node));
535         if (dp == NULL) maerror("dp in new_quartet");   
536         up = (Node *) malloc(sizeof(Node));
537         if (up == NULL) maerror("dp in new_quartet");   
538         dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */
539         up->isop = tr->ebrnchp[0]->kinp;
540         dp->kinp = up;
541         up->kinp = dp;
542         dp->descen = TRUE;
543         up->descen = FALSE;
544         dp->number = 0;
545         up->number = 0;
546         dp->length = 0.0;
547         up->length = 0.0;
548         dp->lengthc = 0.0;
549         up->lengthc = 0.0;
550         dp->varlen = 0.0;
551         up->varlen = 0.0;
552         dp->paths = new_ivector(4);
553         up->paths = dp->paths;
554         up->paths[0] = 0;
555         up->paths[1] = 0;
556         up->paths[2] = 1;
557         up->paths[3] = 1;
558         dp->eprob = NULL;
559         up->eprob = NULL;       
560         dp->partials = new_dcube(numcats, numptrn, tpmradix);
561         up->partials = new_dcube(numcats, numptrn, tpmradix);   
562         tr->ibrnchp[0] = dp;
563         
564         /* place root */
565         tr->rootp = up;
566
567         /* connect external branches */ 
568         tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp;
569         tr->ebrnchp[1]->kinp->isop = tr->rootp;
570         tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp;
571         tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp;
572         
573         /*
574          * reserve memory for lengths of the five branches
575          * of a quartet and for the six possible distances
576          * (needed for LS estimation of branch lengths)
577          */
578         Brnlength = new_dvector(NUMQBRNCH); 
579         Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2);
580
581         return tr;
582 }
583
584
585 /* free tree memory */
586 void free_tree(Tree *tr, int taxa)
587 {       
588         int n;
589         Node *dp, *up;
590
591         free(heights);
592         free_dmatrix(tr->condlkl);
593         for (n = 0; n < taxa; n++) {
594                 dp = tr->ebrnchp[n];
595                 up = dp->kinp;
596                 free_ivector(dp->paths);                
597                 free_dcube(up->partials);               
598                 free(dp);
599                 free(up);
600         }
601         free(tr->ebrnchp);
602         for (n = 0; n < (taxa-3); n++) {
603                 dp = tr->ibrnchp[n];
604                 up = dp->kinp;
605                 free_dcube(dp->partials);
606                 free_dcube(up->partials);
607                 free_ivector(dp->paths);
608                 free(dp);
609                 free(up);
610         }
611         free(tr->ibrnchp);
612         free(tr);
613         free_dvector(Brnlength); /* branch lengths (for LS estimation) */
614         free_dvector(Distanvec); /* distances (for LS estimation) */
615 }
616
617
618 /* make (a,b)-(c,d) quartet
619
620         a ---+     +--- c
621              +-----+
622         b ---+     +--- d
623
624         species numbers range from 0 to Maxspc - 1  */
625
626 void make_quartet(int a, int b, int c, int d)
627 {
628         /* place sequences */
629         /*Ctree->ebrnchp[0]->eprob = Seqpat[a];
630         Ctree->ebrnchp[1]->eprob = Seqpat[b];
631         Ctree->ebrnchp[2]->eprob = Seqpat[c];
632         Ctree->ebrnchp[3]->eprob = Seqpat[d];
633         CZ */
634         /* make distance vector */
635         /*Distanvec[0] = Distanmat[b][a];
636         Distanvec[1] = Distanmat[c][a];
637         Distanvec[2] = Distanmat[c][b];
638         Distanvec[3] = Distanmat[d][a];
639         Distanvec[4] = Distanmat[d][b];
640         Distanvec[5] = Distanmat[d][c];
641     CZ */
642 }
643
644 /* write distance matrix as vector */
645 void changedistan(dmatrix distanmat, dvector distanvec, int numspc)
646 {
647         int i, j, k;
648
649         for (k = 0, i = 1; i < numspc; i++) {
650                 for (j = 0; j < i; j++, k++)
651                         distanvec[k] = distanmat[i][j];
652         }
653 }
654
655
656 /******************************************************************************/
657 /* computation of maximum likelihood tree                                     */
658 /******************************************************************************/
659
660
661 /* compute the likelihood for (a,b)-(c,d) quartet */
662 double quartet_lklhd(int a, int b, int c, int d)
663 {
664         /* reserve memory for quartet if necessary */
665         if (mlmode != 1) { /* no quartet tree */
666                 if (Ctree != NULL)
667                         free_tree(Ctree, Numspc);
668                 Ctree = new_quartet(Numptrn, Seqpat);
669                 Numbrnch = NUMQBRNCH;
670                 Numibrnch = NUMQIBRNCH;
671                 Numspc = NUMQSPC;
672                 mlmode = 1;
673         }
674         
675         /* make (a,b)-(c,d) quartet */
676         make_quartet(a,b,c,d);
677
678         clockmode = 0; /* nonclocklike branch lengths */
679         
680         /* least square estimate for branch length */   
681         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
682
683         /* compute likelihood */
684         Ctree->lklhd = optlkl(Ctree);
685
686         return Ctree->lklhd;
687 }
688
689
690 /* compute the approximate likelihood for (a,b)-(c,d) quartet */
691 double quartet_alklhd(int a, int b, int c, int d)
692 {
693         /* reserve memory for quartet if necessary */
694         if (mlmode != 1) { /* no quartet tree */
695                 if (Ctree != NULL)
696                         free_tree(Ctree, Numspc);
697                 Ctree = new_quartet(Numptrn, Seqpat);
698                 Numbrnch = NUMQBRNCH;
699                 Numibrnch = NUMQIBRNCH;
700                 Numspc = NUMQSPC;
701                 mlmode = 1; 
702         }
703         
704         /* make (a,b)-(c,d) quartet */
705         make_quartet(a,b,c,d);
706
707         clockmode = 0; /* nonclocklike branch lengths */
708         
709         /* least square estimate for branch length */   
710         lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength);
711
712         /* compute likelihood */
713         Ctree->lklhd = treelkl(Ctree);
714
715         return Ctree->lklhd;
716 }
717
718
719 /* read usertree from file to memory */
720 void readusertree(FILE *ifp)
721 {
722         /* reserve memory for tree if necessary */
723         if (mlmode != 2) { /* no tree */
724                 if (Ctree != NULL)
725                         free_tree(Ctree, Numspc);
726                 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
727                 Numbrnch = 2*Maxspc-3;
728                 Numibrnch = Maxspc-3;
729                 Numspc = Maxspc;
730                 mlmode = 2;
731         }
732
733         /* read tree */
734         makeusertree(ifp);
735 }
736
737
738 /* compute the likelihood of a usertree */
739 double usertree_lklhd()
740 {
741         
742     /* CZ 05/16/01 */
743     
744         return 6.66;
745 }
746
747
748 /* compute the approximate likelihood of a usertree */
749 double usertree_alklhd()
750 {
751         /* CZ 05/16/01 */
752
753         return 6.66;
754 }
755
756
757 /* preparation for ML analysis */
758 void mlstart()
759 {
760         /* number of states and code length */
761         tpmradix = gettpmradix();
762         
763         /* declare variables */
764         Eval = new_dvector(tpmradix);
765         Evec = new_dmatrix(tpmradix,tpmradix);
766         Ievc = new_dmatrix(tpmradix,tpmradix);
767         iexp = new_dmatrix(tpmradix,tpmradix);
768         Alias = new_ivector(Maxsite);
769         
770         /* process sequence information */
771         evaluateseqs();
772         bestrate = new_ivector(Numptrn);
773
774         /* compute transition probability matrix */     
775         tranprobmat();
776
777         /* non-zero rate categories */
778         Rates = new_dvector(numcats);
779         updaterates();
780         ltprobr = new_dcube(numcats, tpmradix,tpmradix);
781
782         /* compute distance matrix */
783         Distanmat = new_dvector( Maxspc - 1 );
784         initdistan();
785                 
786         /* initialize tree pointer for quartet tree */
787         /*mlmode = 1;
788         Ctree = new_quartet(Numptrn, Seqpat);
789         Numbrnch = NUMQBRNCH;
790         Numibrnch = NUMQIBRNCH;
791         Numspc = NUMQSPC;
792     CZ, */
793         /* computing ML distances */
794         computedistan();
795 }
796
797
798 /* recompute ml distances for quartet only */
799 void distupdate(int a, int b, int c, int d)
800 {
801         /* update distance matrix */
802         /* consider only entries relevant to quartet */
803         /*
804     Distanmat[a][b] = mldistance(a, b);
805         Distanmat[b][a] = Distanmat[a][b];
806         Distanmat[a][c] = mldistance(a, c);
807         Distanmat[c][a] = Distanmat[a][c];
808         Distanmat[a][d] = mldistance(a, d);
809         Distanmat[d][a] = Distanmat[a][d];
810         Distanmat[b][c] = mldistance(b, c);
811         Distanmat[c][b] = Distanmat[b][c];
812         Distanmat[b][d] = mldistance(b, d);
813         Distanmat[d][b] = Distanmat[b][d];
814         Distanmat[c][d] = mldistance(c, d);
815         Distanmat[d][c] = Distanmat[c][d];
816     CZ */
817 }
818
819
820 /* cleanup after ML analysis */
821 void mlfinish()
822 {
823         if (Ctree != NULL)
824                 free_tree(Ctree, Numspc);
825         free_ivector(bestrate);
826         free_ivector(Alias);
827         free_cmatrix(Seqpat);
828         free_ivector(constpat);
829         free_ivector(Weight);
830         free_dvector(Distanmat); /* CZ  */
831         free_dvector(Eval);
832         free_dmatrix(Evec);
833         free_dmatrix(Ievc);
834         free_dvector(Rates);
835         free_dcube(ltprobr);
836         free_dmatrix(iexp);
837 }
838
839
840 /******************************************************************************/
841 /* tree output                                                                */
842 /******************************************************************************/
843
844
845 #define MAXOVER    50
846 #define MAXLENG    30
847 #define MAXCOLUMN  80
848
849
850 void prbranch(Node *up, int depth, int m, int maxm,
851         ivector umbrella, ivector column, FILE *outfp)
852 {
853         int i, num, n, maxn, lim;
854         Node *cp;
855         char bch;
856
857         if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) {
858                 column[depth] = MAXLENG;
859                 bch = '+';
860         } else {
861                 column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3;
862                 bch = '-';
863         }
864
865         if (up->isop == NULL) { /* external branch */
866                 num = up->number + 1; /* offset */
867                 if (m == 1) umbrella[depth - 1] = TRUE;
868                 for (i = 0; i < depth; i++) {
869                         if (umbrella[i])
870                                 fprintf(outfp, "%*c", column[i], ':');
871                         else
872                                 fprintf(outfp, "%*c", column[i], ' ');
873                 }
874                 if (m == maxm)
875                         umbrella[depth - 1] = FALSE;
876                 for (i = 0, lim = column[depth] - 3; i < lim; i++)
877                         fputc(bch, outfp);
878                 fprintf(outfp, "-%d ", num);
879                                 
880                 fputid(outfp, up->number);
881                 
882                 
883                 fputc('\n', outfp);
884                 fputc(' ', outfp);
885                 return;
886         }
887
888         num = up->number + 1 + Numspc; /* offset, internal branch */
889         for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++)
890                 ;
891         for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) {
892                 prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp);
893                 if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE;
894                 if (n != maxn) {
895                         for (i = 0; i < depth; i++) {
896                                 if (umbrella[i])
897                                         fprintf(outfp, "%*c", column[i], ':');
898                                 else
899                                         fprintf(outfp, "%*c", column[i], ' ');
900                         }
901                         if (n == maxn / 2) { /* internal branch */
902                                 for (i = 0, lim = column[depth] - 3; i < lim; i++)
903                                         fputc(bch, outfp);
904                                 if (num < 10)
905                                         fprintf(outfp, "--%d", num);
906                                 else if (num < 100)
907                                         fprintf(outfp, "-%2d", num);
908                                 else
909                                         fprintf(outfp, "%3d", num);
910                         } else {
911                                 if (umbrella[depth])
912                                         fprintf(outfp, "%*c", column[depth], ':');
913                                 else
914                                         fprintf(outfp, "%*c", column[depth], ' ');
915                         }
916                         fputc('\n', outfp);
917                         fputc(' ', outfp);
918                 }
919                 if (m == maxm) umbrella[depth - 1] = FALSE;
920         }
921         return;
922 }
923
924
925 void getproportion(double *proportion, dvector distanvec, int numspc)
926 {
927         int i, maxpair;
928         double maxdis;
929         
930         maxpair = (numspc*(numspc-1))/2;
931
932         maxdis = 0.0;
933         for (i = 0; i < maxpair; i++) {
934                 if (distanvec[i] > maxdis) {
935                         maxdis = distanvec[i];
936                 }
937         }
938         *proportion = (double) MAXCOLUMN / (maxdis * 3.0);
939         if (*proportion > 1.0) *proportion = 1.0;
940 }
941
942
943 void prtopology(FILE *outfp)
944 {
945         int n, maxn, depth;
946         ivector umbrella;
947         ivector column;
948         Node *cp, *rp;
949         
950         getproportion(&Proportion, Distanvec, Numspc);
951
952         umbrella = new_ivector(Numspc);
953         column = new_ivector(Numspc);
954
955         for (n = 0; n < Numspc; n++) {
956                 umbrella[n] = FALSE;
957                 column[n] = 3;
958         }
959         column[0] = 1;
960         
961         fputc(' ', outfp);
962
963         /* original code: rp = Ctree->rootp */
964         /* but we want to print the first group in the 
965            trichotomy as outgroup at the bottom! */
966         rp = Ctree->rootp->isop;
967         
968         for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++)
969                 ;
970         depth = 1;
971         n = 0;
972
973         cp = rp;
974         do {
975                 cp = cp->isop;
976                 n++;
977                 prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp);
978                 if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':');
979         } while (cp != rp);
980
981         free_ivector(umbrella);
982         free_ivector(column);
983 }
984
985
986 /* print unrooted tree file with branch lengths */
987 void fputphylogeny(FILE *fp)
988 {
989         Node *cp, *rp;
990         int n;
991
992         cp = rp = Ctree->rootp;
993         putc('(', fp);
994         n = 1;
995         do {
996                 cp = cp->isop->kinp;
997                 if (cp->isop == NULL) { /* external node */
998                         if (n > 60) {
999                                 fprintf(fp, "\n");
1000                                 n = 2;
1001                         }
1002                         n += fputid(fp, cp->number);
1003                         fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1004                         n += 7;
1005                         cp = cp->kinp;
1006                 } else { /* internal node */
1007                         if (cp->descen) {
1008                                 if (n > 60) {
1009                                         fprintf(fp, "\n");
1010                                         n = 1;
1011                                 }
1012                                 putc('(', fp);
1013                                 n++;
1014                         } else {
1015                                 putc(')', fp);
1016                                 n++;
1017                                 if (n > 60) {
1018                                         fprintf(fp, "\n");
1019                                         n = 1;
1020                                 }
1021                                 /* internal label */
1022                                 if (cp->kinp->label != NULL) {
1023                                         fprintf(fp, "%s", cp->kinp->label);
1024                                         n += strlen(cp->kinp->label);
1025                                 }
1026                                 fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01);
1027                                 n += 7;
1028                         }
1029                 }
1030                 if (!cp->descen && !cp->isop->descen && cp != rp) {
1031                         putc(',', fp); /* not last subtree */
1032                         n++;
1033                 }
1034         } while (cp != rp);
1035         fprintf(fp, ")");
1036         /* internal label */
1037         if (cp->label != NULL)
1038                 fprintf(fp, "%s", cp->label);
1039         fprintf(fp, ";\n");
1040 }
1041
1042
1043 void resulttree(FILE *outfp)
1044 {
1045         int n, ne, closeflag;
1046         Node *ep, *ip;
1047         double blen;
1048         
1049         closeflag = FALSE;
1050         
1051         if (clockmode) {
1052                 fprintf(outfp, "\n         branch  length     nc/c");
1053                 fprintf(outfp, "   branch  length     nc/c (= non-clock/clock)\n");
1054         } else {
1055                 fprintf(outfp, "\n         branch  length     S.E.");
1056                 fprintf(outfp, "   branch  length     S.E.\n");
1057         }
1058         for (n = 0; n < Numspc; n++) {
1059                 ep = Ctree->ebrnchp[n];
1060                 ne = ep->number;
1061                 fputid10(outfp, ne);
1062                 fputs("  ", outfp);
1063                 fprintf(outfp, "%3d", ne + 1);
1064                 blen = (clockmode ? ep->lengthc : ep->length);
1065                 fprintf(outfp, "%9.5f", blen*0.01);
1066                 if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1067                 if (clockmode)
1068                         fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc));
1069                 else
1070                         fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen));   
1071                 if (n < Numibrnch) {
1072                         ip = Ctree->ibrnchp[n];
1073                         fprintf(outfp, "%8d", n + 1 + Numspc);
1074                         blen = (clockmode ? ip->lengthc : ip->length);
1075                         fprintf(outfp, "%9.5f", blen*0.01);
1076                         if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE;
1077                         if (clockmode)
1078                                 fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc));
1079                         else
1080                                 fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen));   
1081                         fputc('\n', outfp);
1082                 } else {
1083                         if (n == Numspc - 3) {
1084                                 fputc('\n', outfp);
1085                         } else if (n == Numspc - 2) {
1086                                 if (clockmode) {
1087                                         if (!Convergc) 
1088                                                 fprintf(outfp, "     No convergence after %d iterations!\n", Numitc);
1089                                         else
1090                                                 fprintf(outfp, "     %d iterations until convergence\n", Numitc);
1091                                 } else {
1092                                         if (!Converg) 
1093                                                 fprintf(outfp, "     No convergence after %d iterations!\n", Numit);
1094                                         else
1095                                                 fprintf(outfp, "     %d iterations until convergence\n", Numit);                                
1096                                 }               
1097                         } else if (n == Numspc - 1) {
1098                                 fprintf(outfp, "     log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd));
1099                         } else {
1100                                 fputc('\n', outfp);
1101                         }
1102                 }
1103         }
1104         if(closeflag)
1105                 fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n");
1106 }
1107
1108
1109 /******************************************************************************/
1110 /* Neighbor-joining tree                                                      */
1111 /******************************************************************************/
1112
1113
1114 /* compute NJ tree and write to file */
1115 void njtree(FILE *fp)
1116 {
1117         /* reserve memory for tree if necessary */
1118         if (mlmode != 3) { /* no tree */
1119                 if (Ctree != NULL)
1120                         free_tree(Ctree, Numspc);
1121                 Ctree = new_tree(Maxspc, Numptrn, Seqpat);
1122                 Numbrnch = 2*Maxspc-3;
1123                 Numibrnch = Maxspc-3;
1124                 Numspc = Maxspc;
1125                 mlmode = 3;
1126         }
1127
1128         /* construct NJ tree from distance matrix */
1129         njdistantree(Ctree);
1130         
1131         fputphylogeny(fp);
1132 }
1133
1134
1135 /* construct  NJ tree from distance matrix */
1136 void njdistantree(Tree *tr)
1137 {
1138         /* removed, CZ, 05/16/01 */
1139 }
1140
1141 /******************************************************************************/
1142 /* find best assignment of rate categories                                    */
1143 /******************************************************************************/
1144
1145 /* find best assignment of rate categories */
1146 void findbestratecombination()
1147 {
1148         int k, u;
1149         double bestvalue, fv2;
1150         dvector catprob;
1151         dmatrix cdl;
1152
1153         cdl = Ctree->condlkl;
1154         catprob = new_dvector(numcats+1);
1155         fv2 = (1.0-fracinv)/(double) numcats;
1156
1157         for (k = 0; k < Numptrn; k++) {
1158                 /* zero rate */
1159                 if (constpat[k] == TRUE)
1160                         catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]];
1161                 else 
1162                         catprob[0] = 0.0;
1163                 /* non-zero-rates */
1164                 for (u = 1; u < numcats+1; u++)
1165                         catprob[u] = fv2*cdl[u-1][k];
1166                 /* find best */
1167                 bestvalue = catprob[0];
1168                 bestrate[k] = 0;
1169                 for (u = 1; u < numcats+1; u++)
1170                         if (catprob[u] >= bestvalue) {
1171                                 bestvalue = catprob[u];
1172                                 bestrate[k] = u;
1173                         }
1174         }
1175         free_dvector(catprob);
1176         bestratefound = 1;
1177 }
1178
1179 /* print best assignment of rate categories */
1180 void printbestratecombination(FILE *fp)
1181 {
1182         int s, k;
1183
1184         for (s = 0; s < Maxsite; s++) {
1185                 k = Alias[s];
1186                 fprintf(fp, "%2d", bestrate[k]);
1187                 if ((s+1) % 30 == 0)
1188                         fprintf(fp, "\n");
1189                 else if ((s+1) % 10 == 0)
1190                         fprintf(fp, "  ");
1191         }
1192         if (s % 70 != 0)
1193                 fprintf(fp, "\n");
1194 }
1195
1196
1197 /******************************************************************************/
1198 /* computation of clocklike branch lengths                                    */
1199 /******************************************************************************/
1200
1201 /* checks wether e is a valid edge specification */
1202 int checkedge(int e)
1203 {
1204         /* there are Numspc external branches:
1205              0 - Numspc-1
1206            there are Numibrnch internal branches:
1207              Numspc - Numspc+Numibrnch-1
1208         */
1209         
1210         if (e < 0) return FALSE;
1211         if (e < Numspc+Numibrnch) return TRUE;
1212         else return FALSE;  
1213 }
1214
1215 /* print topology of subtree */
1216 void fputsubstree(FILE *fp, Node *ip)
1217 {
1218         Node *cp;
1219         
1220         if (ip->isop == NULL) { /* terminal nodes */
1221                 numtc += fputid(fp, ip->number);
1222         } else {        
1223                 cp = ip;
1224                 fprintf(fp, "(");
1225                 numtc += 1;
1226                 do {            
1227                         cp = cp->isop->kinp;
1228                         if (cp->isop == NULL) { /* external node */
1229                                 numtc += fputid(fp, cp->number);
1230                                 fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1231                                 numtc += 7;
1232                                 cp = cp->kinp;
1233                         } else { /* internal node */
1234                                 if (cp->height > 0.0) {
1235                                         fprintf(fp, "(");
1236                                         numtc += 1;
1237                                 } else if (cp->height < 0.0) {
1238                                         fprintf(fp, ")");
1239                                         numtc += 1;
1240                                         if (numtc > 60) {
1241                                                 fprintf(fp, "\n");
1242                                                 numtc = 1;
1243                                         }
1244                                         /* internal label */
1245                                         if (cp->kinp->label != NULL) {
1246                                                 fprintf(fp, "%s", cp->kinp->label);
1247                                                 numtc += strlen(cp->kinp->label);
1248                                         }
1249                                         if (numtc > 60) {
1250                                                 fprintf(fp, "\n");
1251                                                 numtc = 1;
1252                                         }
1253                                         fprintf(fp, ":%.5f", (cp->lengthc)*0.01);
1254                                         numtc += 6;
1255                                         if (numtc > 60) {
1256                                                 fprintf(fp, "\n");
1257                                                 numtc = 1;
1258                                         }
1259                                 }
1260                         }
1261                         if (cp->height <= 0.0 && cp->isop->height <= 0.0 &&
1262                                 cp->isop != ip) {
1263                                 putc(',', fp); /* not last subtree */
1264                                 numtc += 1;
1265                                 if (numtc > 60) {
1266                                         fprintf(fp, "\n");
1267                                         numtc = 1;
1268                                 }
1269                         }
1270                 } while (cp->isop != ip);
1271                 fprintf(fp, ")");
1272                 numtc += 1;
1273         }
1274         if (numtc > 60) {
1275                 fprintf(fp, "\n");
1276                 numtc = 1;
1277         }
1278
1279 }
1280
1281 /* print rooted tree file  */
1282 void fputrooted(FILE *fp, int e)
1283 {
1284         Node *rootbr;
1285         
1286         /* to be called only after clocklike branch
1287            lengths have been computed */
1288          
1289          /* pointer to root branch */
1290         if (e < Numspc) rootbr = Ctree->ebrnchp[e];
1291         else rootbr = Ctree->ibrnchp[e - Numspc];
1292
1293         fprintf(fp, "(");
1294         numtc = 2;
1295         fputsubstree(fp, rootbr);
1296         /* internal label */
1297         if (rootbr->label != NULL) {
1298                 fprintf(fp, "%s", rootbr->label);
1299                 numtc += strlen(rootbr->label);
1300         }
1301         if (numtc > 60) {
1302                 fprintf(fp, "\n");
1303                 numtc = 1;
1304         }
1305         fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01);
1306         numtc += 7;
1307         if (numtc > 60) {
1308                 fprintf(fp, "\n");
1309                 numtc = 1;
1310         }
1311         fputsubstree(fp, rootbr->kinp);
1312         /* internal label */
1313         if (rootbr->kinp->label != NULL) {
1314                 fprintf(fp, "%s", rootbr->kinp->label);
1315                 numtc += strlen(rootbr->kinp->label);
1316         }
1317         if (numtc > 60) {
1318                 fprintf(fp, "\n");
1319                 numtc = 1;
1320         }
1321         fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01);
1322 }
1323
1324 /* finds heights in subtree */
1325 void findheights(Node *ip)
1326 {
1327         Node *cp, *rp;
1328         
1329         if (ip->isop != NULL) { /* forget terminal nodes */
1330                 
1331                 cp = ip;
1332                 
1333                 /* initialise node  */
1334                 cp->height = 1.0; /* up */
1335                 rp = cp;
1336                 while (rp->isop != cp) {
1337                         rp = rp->isop;
1338                         rp->height = -1.0; /* down */
1339                 }
1340
1341                 do {            
1342                         cp = cp->isop->kinp;
1343                         if (cp->isop == NULL) { /* external node */
1344                                 cp = cp->kinp;
1345                         } else { /* internal node */
1346                                 if (cp->height == 0.0) { /* node not yet visited */
1347                                         cp->height = 1.0; /* up */
1348                                         rp = cp;
1349                                         while (rp->isop != cp) {
1350                                                 rp = rp->isop;
1351                                                 rp->height = -1.0; /* down */
1352                                         }
1353                                 } else if (cp->kinp->height == 1.0) {
1354                                         /* cp->kinp is next height pointer  */
1355                                         heights[Numhts] = cp->kinp;
1356                                         Numhts++;
1357                                 }
1358                         }
1359                 } while (cp->isop != ip);
1360                 /* ip is last height pointer */
1361                 heights[Numhts] = ip;
1362                 Numhts++;
1363         }       
1364 }
1365
1366
1367 /* initialise clocklike branch lengths (with root on edge e) */
1368 void initclock(int e)
1369 {
1370         /* CZ */
1371 }
1372
1373 /* approximate likelihood under the constaining assumption of
1374    clocklike branch lengths (with root on edge e) */
1375 double clock_alklhd(int e)
1376 {
1377         initclock(e);
1378         Ctree->lklhdc = treelkl(Ctree);
1379
1380         return Ctree->lklhdc;   
1381 }
1382
1383 /* log-likelihood given height ht at node pointed to by chep */
1384 double heightlkl(double ht)
1385 {
1386         Node *rp;
1387         double len;     
1388         
1389         /* adjust branch lengths */
1390         chep->height = ht;
1391         /* descendent branches */
1392         rp = chep;
1393         while (rp->isop != chep) {
1394                 rp = rp->isop;
1395                 len = chep->height - rp->kinp->height;
1396                 rp->kinp->lengthc = len;
1397                 rp->lengthc = len;
1398         }
1399         /* upward branch */
1400         if (chep == rootbr || chep->kinp == rootbr) {
1401                 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1402                 chep->lengthc = len;
1403                 chep->kinp->lengthc = len;
1404         } else {
1405                 rp = chep->kinp;
1406                 while (rp->isop->height <= 0.0)
1407                         rp = rp->isop;
1408                 chep->lengthc = rp->isop->height - chep->height;
1409                 chep->kinp->lengthc = rp->isop->height - chep->height;
1410         }
1411
1412         /* compute likelihood */
1413         Ctree->lklhdc = treelkl(Ctree);
1414
1415         return -(Ctree->lklhdc); /* we use a minimizing procedure */
1416 }
1417
1418 /* optimize current height */
1419 void optheight(void)
1420 {
1421         double he, fx, f2x, minh, maxh, len;
1422         Node *rp;
1423
1424         /* current height */
1425         he = chep->height;
1426         
1427         /* minimum */
1428         minh = 0.0;
1429         rp = chep;
1430         while (rp->isop != chep) {
1431                 rp = rp->isop;
1432                 if (rp->kinp->height > minh)
1433                         minh = rp->kinp->height;
1434         }
1435         minh += MINARC;
1436         
1437         /* maximum */
1438         if (chep == rootbr || chep->kinp == rootbr) {
1439                 maxh = hroot;
1440         } else {
1441                 rp = chep->kinp;
1442                 while (rp->isop->height <= 0.0)
1443                         rp = rp->isop;
1444                 maxh = rp->isop->height;
1445         }
1446         maxh -= MINARC;
1447
1448         /* check borders for height */
1449         if (he < minh) he = minh;
1450         if (he > maxh) he = maxh;
1451
1452         /* optimization */
1453         if (!(he == minh && he == maxh))
1454                 he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x);
1455         
1456         /* variance of height */
1457         f2x = fabs(f2x);
1458         if (1.0/(maxhroot*maxhroot) < f2x)
1459                 chep->varheight = 1.0/f2x;
1460         else
1461                 chep->varheight = maxhroot*maxhroot;
1462         
1463         /* adjust branch lengths */
1464         chep->height = he;
1465         /* descendent branches */
1466         rp = chep;
1467         while (rp->isop != chep) {
1468                 rp = rp->isop;
1469                 len = chep->height - rp->kinp->height;
1470                 rp->kinp->lengthc = len;
1471                 rp->lengthc = len;
1472         }
1473         /* upward branch */
1474         if (chep == rootbr || chep->kinp == rootbr) {
1475                 len = (hroot - chep->height) + (hroot - chep->kinp->height);
1476                 chep->lengthc = len;
1477                 chep->kinp->lengthc = len;
1478         } else {
1479                 rp = chep->kinp;
1480                 while (rp->isop->height <= 0.0)
1481                         rp = rp->isop;
1482                 chep->lengthc = rp->isop->height - chep->height;
1483                 chep->kinp->lengthc = rp->isop->height - chep->height;
1484         }
1485 }
1486
1487 /* log-likelihood given height ht at root */
1488 double rheightlkl(double ht)
1489 {
1490         double len;     
1491         
1492         /* adjust branch lengths */
1493         hroot = ht;
1494         len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1495         rootbr->lengthc = len;
1496         rootbr->kinp->lengthc = len;
1497
1498         /* compute likelihood */
1499         Ctree->lklhdc = treelkl(Ctree);
1500
1501         return -(Ctree->lklhdc); /* we use a minimizing procedure */
1502 }
1503
1504 /* optimize height of root */
1505 void optrheight(void)
1506 {
1507         double he, fx, f2x, minh, len;
1508
1509         /* current height */
1510         he = hroot;
1511         
1512         /* minimum */
1513         if (rootbr->height > rootbr->kinp->height)
1514                 minh = rootbr->height;
1515         else
1516                 minh = rootbr->kinp->height;
1517         minh += MINARC;
1518                 
1519         /* check borders for height */
1520         if (he < minh) he = minh;
1521         if (he > maxhroot) he = maxhroot;
1522
1523         /* optimization */
1524         he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x);
1525         
1526         /* variance of height of root */
1527         f2x = fabs(f2x);
1528         if (1.0/(maxhroot*maxhroot) < f2x)
1529                 varhroot = 1.0/f2x;
1530         else
1531                 varhroot = maxhroot*maxhroot;
1532
1533         /* adjust branch lengths */
1534         hroot = he;
1535         len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height);
1536         rootbr->lengthc = len;
1537         rootbr->kinp->lengthc = len;
1538 }
1539
1540 /* exact likelihood under the constaining assumption of
1541    clocklike branch lengths (with root on edge e) */
1542 double clock_lklhd(int e)
1543 {
1544         int h, nconv;
1545         double old;
1546         
1547         Numitc = 0;
1548         Convergc = FALSE;
1549         
1550         initclock(e);
1551         
1552         do {
1553         
1554                 Numitc++;
1555                 nconv = 0;
1556
1557                 /* optimize height of root */
1558                 old = hroot;
1559                 optrheight();
1560                 if (fabs(old - hroot) < HEPSILON) nconv++;
1561
1562                 /* optimize height of nodes */
1563                 for (h = Numhts-1; h >= 0; h--) {
1564                 
1565                         /* pointer chep to current height node */
1566                         chep = heights[h];
1567                         
1568                         /* store old value */
1569                         old = chep->height;
1570
1571                         /* find better height */
1572                         optheight();
1573                         
1574                         /* converged ? */
1575                         if (fabs(old - chep->height) < HEPSILON) nconv++;
1576                 }
1577
1578                 if (nconv == Numhts+1) Convergc = TRUE;
1579                         
1580         } while (Numitc < MAXIT && !Convergc);
1581                 
1582         /* compute final likelihood */
1583         Ctree->lklhdc = treelkl(Ctree);
1584
1585         return Ctree->lklhdc;
1586 }
1587
1588 /* find out the edge containing the root */
1589 int findrootedge()
1590 {
1591         int e, ebest;
1592         double logbest, logtest;
1593         
1594         /* compute the likelihood for all edges and take the edge with
1595            best likelihood (using approximate ML) */
1596
1597         ebest = 0;
1598         logbest = clock_alklhd(0);
1599         numbestroot = 1;
1600         for (e = 1; e < Numspc+Numibrnch; e++) {
1601                 logtest = clock_alklhd(e);
1602                 if (logtest > logbest) {
1603                         ebest = e;
1604                         logbest = logtest;
1605                         numbestroot = 1;
1606                 } else if (logtest == logbest) {
1607                         numbestroot++;
1608                 }
1609         }
1610
1611         return ebest;
1612 }
1613
1614 /* show heights and corresponding standard errors */
1615 void resultheights(FILE *fp)
1616 {
1617         int h, num;
1618         Node *cp;
1619         
1620         fprintf(fp, " height    S.E.    of node common to branches\n");
1621         for (h = 0; h < Numhts; h++) {
1622                 fprintf(fp, "%.5f  %.5f    ", (heights[h]->height)*0.01,
1623                         sqrt(heights[h]->varheight)*0.01);
1624                 cp = heights[h];
1625                 do {
1626                         num = (cp->number) + 1;
1627                         if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */                        
1628                         fprintf(fp, "%d  ", num);
1629                         cp = cp->isop;
1630                 } while (cp != heights[h]);     
1631                 fprintf(fp, "\n");
1632                 
1633         }
1634         fprintf(fp, "%.5f  %.5f   of root at branch %d\n",
1635                 hroot*0.01, sqrt(varhroot)*0.01, locroot+1);
1636 }
1637