Wrapper for Clustal Omega.
[jabaws.git] / binaries / src / clustalo / src / clustal / muscle_tree.c
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4;  indent-tabs-mode: nil -*- */
2
3 /* This a mix of tree functions and data-structures from
4  * Bob Edgar's Muscle (version 3.7) ported to pure C, topped up with
5  * some of our own stuff.
6  *
7  * Used files: phy.cpp, tree.h, phytofile.cpp and phyfromclust.cpp
8  *
9  * Muscle's code is public domain and so is this code here.
10
11  * From http://www.drive5.com/muscle/license.htm:
12  * """
13  * MUSCLE is public domain software
14  *
15  * The MUSCLE software, including object and source code and
16  * documentation, is hereby donated to the public domain.
17  *
18  * Disclaimer of warranty
19  * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
20  * EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * """
23  *
24  */
25
26 /*
27  *  RCS $Id: muscle_tree.c 230 2011-04-09 15:37:50Z andreas $
28  */
29
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <limits.h>
34 #include <assert.h>
35 #include <ctype.h>
36
37 #include "util.h"
38 #include "log.h"
39 #include "muscle_tree.h"
40
41
42 static const double VERY_NEGATIVE_DOUBLE = -9e29;
43 /*const double dInsane = VERY_NEGATIVE_DOUBLE;*/
44 static const double dInsane = -9e29;
45 static const unsigned uInsane = 8888888;
46
47 typedef enum 
48 {
49     NTT_Unknown,
50
51     /* Returned from Tree::GetToken: */
52     NTT_Lparen,
53     NTT_Rparen,
54     NTT_Colon,
55     NTT_Comma,
56     NTT_Semicolon,
57     NTT_String,
58     
59     /* Following are never returned from Tree::GetToken: */
60     NTT_SingleQuotedString,
61     NTT_DoubleQuotedString,
62     NTT_Comment
63 } NEWICK_TOKEN_TYPE;
64
65
66 static void
67 InitCache(uint uCacheCount, tree_t *tree);
68 static void
69 TreeZero(tree_t *tree);
70 static uint
71 GetNeighborCount(unsigned uNodeIndex, tree_t *tree);
72 static bool
73 IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
74 static bool
75 HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree);
76 static void
77 TreeToFileNodeRooted(tree_t *tree, uint m_uRootNodeIndex, FILE *fp);
78 static void
79 ValidateNode(uint uNodeIndex, tree_t *tree);
80 static void
81 AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
82 static void
83 ExpandCache(tree_t *tree);
84 static void
85 TreeCreateRooted(tree_t *tree);
86 static bool
87 GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree);
88 static NEWICK_TOKEN_TYPE
89 GetToken(FILE *fp, char szToken[], uint uBytes);
90 /* stuff from textfile.cpp */
91 static void
92 FileSkipWhite(FILE *fp);
93 static bool
94 FileSkipWhiteX(FILE *fp);
95 static void
96 SetLeafName(uint uNodeIndex, const char *ptrName, tree_t *tree);
97 uint
98 AppendBranch(tree_t *tree, uint uExistingLeafIndex);
99 static void
100 SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
101               double dLength, tree_t *tree);
102 static uint
103 UnrootFromFile(tree_t *tree);
104 uint
105 GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *tree);
106 static void
107 InitNode(tree_t *prTree, uint uNodeIndex);
108
109
110 /**
111  * @param[in] uNodeIndex
112  * node to examine
113  * @param[in] tree
114  * tree to examine
115  * @return id of left node
116  * @note called GetRight in Muscle3.7
117  */
118 uint
119 GetLeft(uint uNodeIndex, tree_t *tree) 
120 {
121     assert(NULL != tree);
122     assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
123     return tree->m_uNeighbor2[uNodeIndex];
124 }
125 /***   end: GetLeft   ***/
126
127
128
129 /**
130  * @param[in] uNodeIndex
131  * node to examine
132  * @param[in] tree
133  * tree to examine
134  * @return id of right node
135  * @note called GetRight in Muscle3.7
136  */
137 uint
138 GetRight(uint uNodeIndex, tree_t *tree)
139 {
140     assert(NULL != tree);
141     assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
142     return tree->m_uNeighbor3[uNodeIndex];
143 }
144 /***   end: GetRight   ***/
145
146
147
148 /**
149  * @param[in] uNodeIndex
150  * node to examine
151  * @param[in] tree
152  * tree to examine
153  * @return leaf id of current node
154  */
155 uint
156 GetLeafId(uint uNodeIndex, tree_t *tree)
157 {
158     assert(NULL != tree);
159     assert(uNodeIndex < tree->m_uNodeCount);
160     assert(IsLeaf(uNodeIndex, tree));
161     
162     return tree->m_Ids[uNodeIndex];
163 }
164 /***   end: GetLeafId   ***/
165
166
167
168 /**
169  * @note originally called GetLeafName
170  *
171  */
172 char *
173 GetLeafName(unsigned uNodeIndex, tree_t *tree)
174 {
175     assert(NULL != tree);
176     assert(uNodeIndex < tree->m_uNodeCount);
177     assert(IsLeaf(uNodeIndex, tree));
178     return tree->m_ptrName[uNodeIndex];
179 }
180 /***   end: GetLeafName   ***/
181
182
183
184 /**
185  * @brief returns first leaf node for a depth-first traversal of tree
186  *
187  * @param[in] tree
188  * tree to traverse
189  *
190  * @return node index of first leaf node for depth-first traversal
191  *
192  * @note called FirstDepthFirstNode in Muscle3.7
193  *
194  */
195 uint
196 FirstDepthFirstNode(tree_t *tree)
197 {
198     uint uNodeIndex;
199         
200     assert(NULL != tree);
201     assert(IsRooted(tree));    
202     
203     /* Descend via left branches until we hit a leaf */
204     uNodeIndex =  tree->m_uRootNodeIndex;
205     while (!IsLeaf(uNodeIndex, tree))
206         uNodeIndex = GetLeft(uNodeIndex, tree);
207     return uNodeIndex;
208 }
209 /***   end: FirstDepthFirstNode   ***/
210
211
212 /**
213  * @brief returns next leaf node index for depth-first traversal of
214  * tree
215  *
216  * @param[in] tree
217  * tree to traverse
218  * @param[in] uNodeIndex
219  * Current node index
220  *
221  * @return node index of next leaf node during depth-first traversal
222  *
223  * @note called NextDepthFirstNode in Muscle3.7
224  */
225 uint
226 NextDepthFirstNode(uint uNodeIndex, tree_t *tree)
227 {
228     uint uParent;
229     
230     assert(NULL != tree);
231     assert(IsRooted(tree));
232     assert(uNodeIndex < tree->m_uNodeCount);
233     
234     if (IsRoot(uNodeIndex, tree))
235     {
236         /* Node uNodeIndex is root, end of traversal */
237         return NULL_NEIGHBOR;
238     }
239     
240     uParent = GetParent(uNodeIndex, tree);
241     if (GetRight(uParent, tree) == uNodeIndex) {
242         /* Is right branch, return parent=uParent */
243         return uParent;
244     }
245     
246     uNodeIndex = GetRight(uParent, tree);
247     /* Descend left from right sibling uNodeIndex */
248     while (!IsLeaf(uNodeIndex, tree)) {
249         uNodeIndex = GetLeft(uNodeIndex, tree);
250     }
251     
252     /*  bottom out at leaf uNodeIndex */
253     return uNodeIndex;
254 }
255 /***   end: NextDepthFirstNode   ***/
256
257
258
259 /**
260  * @brief check if tree is a rooted tree
261  * @param[in] tree
262  * tree to check
263  * @return TRUE if given tree is rooted, FALSE otherwise
264  *
265  */
266 bool
267 IsRooted(tree_t *tree) 
268 {
269     assert(NULL != tree);
270     return tree->m_bRooted;
271 }
272 /***   end: IsRooted   ***/
273
274
275 /**
276  *
277  */
278 void
279 FreeMuscleTree(tree_t *tree)
280 {
281     uint i;
282
283     assert(tree!=NULL);
284     
285
286     /* FIXME use GetLeafNodeIndex? or
287        for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
288        {
289        if (tree.IsLeaf(uNodeIndex))
290        {
291        const char *ptrName =
292        tree.GetLeafName(uNodeIndex);
293     */
294     /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
295      * so free first
296      */
297     for (i=0; i<tree->m_uNodeCount; i++) {
298         /* IsLeaf needs neighbour count, so free those guys later */
299         if (IsLeaf(i, tree)) {
300             CKFREE(tree->m_ptrName[i]);
301         }
302     }
303     CKFREE(tree->m_ptrName);
304
305     CKFREE(tree->m_uNeighbor1);
306     CKFREE(tree->m_uNeighbor2);
307     CKFREE(tree->m_uNeighbor3);
308     
309     CKFREE(tree->m_Ids);
310     
311     CKFREE(tree->m_dEdgeLength1);
312     CKFREE(tree->m_dEdgeLength2);
313     CKFREE(tree->m_dEdgeLength3);
314 #if USE_HEIGHT
315     CKFREE(tree->m_dHeight);
316     CKFREE(tree->m_bHasHeight);
317 #endif 
318     CKFREE(tree->m_bHasEdgeLength1);
319     CKFREE(tree->m_bHasEdgeLength2);
320     CKFREE(tree->m_bHasEdgeLength3);
321     
322     TreeZero(tree);
323
324     free(tree);
325 }
326 /***   end: FreeMuscleTree   ***/
327
328
329
330 /**
331  * @brief create a muscle tree
332  *
333  * @note Original comment in Muscle code: "Create rooted tree from a
334  * vector description. Node indexes are 0..N-1 for leaves, N..2N-2 for
335  * internal nodes. Vector subscripts are i-N and have values for
336  * internal nodes only, but those values are node indexes 0..2N-2. So
337  * e.g. if N=6 and Left[2]=1, this means that the third internal node
338  * (node index 8) has the second leaf (node index 1) as its left
339  * child. uRoot gives the vector subscript of the root, so add N to
340  * get the node index."
341  *
342  * @param[out] tree
343  * newly created tree
344  * @param[in] uLeafCount
345  * number of leaf nodes
346  * @param[in] uRoot
347  * Internal node index of root node
348  * @param[in] Left
349  * Node index of left sibling of an internal node.
350  * Index range: 0--uLeafCount-1
351  * @param[in] Right
352  * Node index of right sibling of an internal node.
353  * Index range: 0--uLeafCount-1
354  * @param[in] LeftLength
355  * Branch length of left branch of an internal node.
356  * Index range: 0--uLeafCount-1
357  * @param[in] RightLength
358  * Branch length of right branch of an internal node.
359  * Index range: 0--uLeafCount-1
360  * @param[in] LeafIds
361  * Leaf id. Index range: 0--uLeafCount
362  * @param[in] LeafNames
363  * Leaf label. Index range: 0--uLeafCount
364  *
365  */
366 void
367 MuscleTreeCreate(tree_t *tree,
368                   uint uLeafCount, uint uRoot,
369                   const uint *Left, const uint *Right,
370                   const float *LeftLength, const float* RightLength,
371                   const uint *LeafIds, char **LeafNames)
372 {
373     uint uNodeIndex;
374
375         TreeZero(tree);
376     tree->m_uNodeCount = 2*uLeafCount - 1;
377         InitCache(tree->m_uNodeCount, tree);
378     
379         for (uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex) {
380                 tree->m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
381                 tree->m_ptrName[uNodeIndex] = CkStrdup(LeafNames[uNodeIndex]);
382     }
383
384         for (uNodeIndex = uLeafCount; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
385                 uint v = uNodeIndex - uLeafCount;
386                 uint uLeft = Left[v];
387                 uint uRight = Right[v];
388                 float fLeft = LeftLength[v];
389                 float fRight = RightLength[v];
390         
391                 tree->m_uNeighbor2[uNodeIndex] = uLeft;
392                 tree->m_uNeighbor3[uNodeIndex] = uRight;
393         
394                 tree->m_bHasEdgeLength2[uNodeIndex] = TRUE;
395                 tree->m_bHasEdgeLength3[uNodeIndex] = TRUE;
396
397                 tree->m_dEdgeLength2[uNodeIndex] = fLeft;
398                 tree->m_dEdgeLength3[uNodeIndex] = fRight;
399
400                 tree->m_uNeighbor1[uLeft] = uNodeIndex;
401                 tree->m_uNeighbor1[uRight] = uNodeIndex;
402
403                 tree->m_dEdgeLength1[uLeft] = fLeft;
404                 tree->m_dEdgeLength1[uRight] = fRight;
405
406                 tree->m_bHasEdgeLength1[uLeft] = TRUE;
407                 tree->m_bHasEdgeLength1[uRight] = TRUE;
408     }
409
410         tree->m_bRooted = TRUE;
411         tree->m_uRootNodeIndex = uRoot + uLeafCount;
412 #ifndef NDEBUG
413         TreeValidate(tree);
414 #endif
415 }
416 /***   end: MuscleTreeCreate   ***/
417
418
419
420 /**
421  * @param[in] tree
422  * tree to write
423  * @param[out] fp
424  * filepointer to write to2
425  * 
426  * @brief write a muscle tree to a file in newick format (distances
427  * and all names)
428  *
429  * 
430  */
431 void
432 MuscleTreeToFile(FILE *fp, tree_t *tree)
433 {
434     assert(NULL != tree);
435     if (IsRooted(tree)) {
436         TreeToFileNodeRooted(tree, tree->m_uRootNodeIndex, fp);
437         fprintf(fp, ";\n");
438         return;
439     } else {
440         Log(&rLog, LOG_FATAL, "FIXME: output of unrooted muscle trees not implemented");
441     }
442 }
443 /***   end: MuscleTreeToFile   ***/
444
445
446
447 /**
448  * @brief check if given node is a leaf node
449  *
450  * @param[in] uNodeIndex
451  * the node index
452  * @param tree
453  * the tree
454  *
455  * @return TRUE if given node is a leaf, FALSE otherwise
456  *
457  * @note called IsLeaf in Muscle3.7. See tree.h in original code
458  */
459 bool
460 IsLeaf(uint uNodeIndex, tree_t *tree)
461 {
462     assert(NULL != tree);
463     assert(uNodeIndex < tree->m_uNodeCount);
464     if (1 == tree->m_uNodeCount)
465         return TRUE;
466     return 1 == GetNeighborCount(uNodeIndex, tree);
467 }
468 /***   end: IsLeaf   ***/
469
470
471
472 /**
473  */
474 bool
475 IsRoot(uint uNodeIndex, tree_t *tree)
476 {
477     assert(NULL != tree);
478     return IsRooted(tree) && tree->m_uRootNodeIndex == uNodeIndex;
479 }
480 /***   end: IsRoot   ***/
481
482
483
484 /**
485  */
486 uint
487 GetNeighborCount(uint uNodeIndex, tree_t *tree)
488 {
489     uint n1, n2, n3;
490     assert(uNodeIndex < tree->m_uNodeCount);
491     assert(NULL != tree);
492     assert(NULL != tree->m_uNeighbor1);
493     assert(NULL != tree->m_uNeighbor2);
494     assert(NULL != tree->m_uNeighbor3);
495     n1 = tree->m_uNeighbor1[uNodeIndex];
496     n2 = tree->m_uNeighbor2[uNodeIndex];
497     n3 = tree->m_uNeighbor3[uNodeIndex];
498     return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
499 }
500 /***   end: GetNeighborCount   ***/
501
502
503 /**
504  */
505 uint
506 GetParent(unsigned uNodeIndex, tree_t *tree)
507 {
508     assert(NULL != tree);
509     assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
510     return tree->m_uNeighbor1[uNodeIndex];
511 }
512 /***   end: GetParent   ***/
513
514
515
516 /**
517  */
518 bool
519 IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
520 {
521     assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
522     assert(NULL != tree);
523     
524     return tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
525         tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
526         tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
527 }
528 /***   end: IsEdge   ***/
529
530
531
532 /**
533  */
534 bool
535 HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
536 {
537     assert(NULL != tree);
538     assert(uNodeIndex1 < tree->m_uNodeCount);
539     assert(uNodeIndex2 < tree->m_uNodeCount);
540     assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
541     
542     if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
543         return tree->m_bHasEdgeLength1[uNodeIndex1];
544     else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
545         return tree->m_bHasEdgeLength2[uNodeIndex1];
546     assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
547     return tree->m_bHasEdgeLength3[uNodeIndex1];
548 }
549 /***   end:   ***/
550
551
552 /**
553  */
554 double
555 GetEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
556 {
557     assert(NULL != tree);
558     assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
559     if (!HasEdgeLength(uNodeIndex1, uNodeIndex2, tree))
560     {
561         Log(&rLog, LOG_FATAL, "Missing edge length in tree %u-%u", uNodeIndex1, uNodeIndex2);
562     }
563     
564     if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
565         return tree->m_dEdgeLength1[uNodeIndex1];
566     else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
567         return tree->m_dEdgeLength2[uNodeIndex1];
568     assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
569     return tree->m_dEdgeLength3[uNodeIndex1];
570 }
571 /***   end: GetEdgeLength   ***/
572
573
574 /**
575  *
576  */
577 void
578 InitNode(tree_t *prTree, uint uNodeIndex)
579 {
580     prTree->m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
581     prTree->m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
582     prTree->m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
583     prTree->m_bHasEdgeLength1[uNodeIndex] = FALSE;
584     prTree->m_bHasEdgeLength2[uNodeIndex] = FALSE;
585     prTree->m_bHasEdgeLength3[uNodeIndex] = FALSE;
586 #if USE_HEIGHT
587     prTree->m_bHasHeight[uNodeIndex] = FALSE;
588     prTree->m_dHeight[uNodeIndex] = dInsane;
589 #endif
590     prTree->m_dEdgeLength1[uNodeIndex] = dInsane;
591     prTree->m_dEdgeLength2[uNodeIndex] = dInsane;
592     prTree->m_dEdgeLength3[uNodeIndex] = dInsane;
593     
594     prTree->m_ptrName[uNodeIndex] = NULL;
595     prTree->m_Ids[uNodeIndex] = uInsane;
596 }
597 /***   end: InitNode   ***/
598
599
600 /**
601  *
602  */
603 void
604 InitCache(uint uCacheCount, tree_t *tree)
605 {
606     uint uNodeIndex;
607     
608     tree->m_uCacheCount = uCacheCount;
609     
610     tree->m_uNeighbor1 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
611     tree->m_uNeighbor2 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
612     tree->m_uNeighbor3 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
613     
614     tree->m_Ids = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
615     
616     tree->m_dEdgeLength1 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
617     tree->m_dEdgeLength2 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
618     tree->m_dEdgeLength3 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
619 #if USE_HEIGHT
620     tree->m_dHeight = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
621     tree->m_bHasHeight = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
622 #endif    
623     tree->m_bHasEdgeLength1 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
624     tree->m_bHasEdgeLength2 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
625     tree->m_bHasEdgeLength3 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
626
627     tree->m_ptrName = (char **) CKMALLOC(sizeof(char *) * tree->m_uCacheCount);
628     
629     for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
630         InitNode(tree, uNodeIndex);
631     }
632 }
633 /***   end: InitCache   ***/
634
635
636
637
638 /**
639  *
640  * @note Replacing Tree::Clear but no freeing of memory! Just setting
641  * everything to 0/NULL
642  */
643 void
644 TreeZero(tree_t *tree)
645 {
646     assert(NULL != tree);
647     tree->m_uNodeCount = 0;
648     tree->m_uCacheCount = 0;
649
650     tree->m_uNeighbor1 = NULL;
651     tree->m_uNeighbor2 = NULL;
652     tree->m_uNeighbor3 = NULL;
653     
654     tree->m_dEdgeLength1 = NULL;
655     tree->m_dEdgeLength2 = NULL;
656     tree->m_dEdgeLength3 = NULL;
657     
658 #if USE_HEIGHT
659     tree->m_dHeight = NULL;
660     tree->m_bHasHeight = NULL;
661 #endif
662     tree->m_bHasEdgeLength1 = NULL;
663     tree->m_bHasEdgeLength2 = NULL;
664     tree->m_bHasEdgeLength3 = NULL;
665
666     tree->m_ptrName = NULL;
667     tree->m_Ids = NULL;
668     
669     tree->m_bRooted = FALSE;
670     tree->m_uRootNodeIndex = 0;
671 }
672 /* end: TreeZero */
673
674
675
676 /**
677  *
678  */
679 void
680 TreeToFileNodeRooted(tree_t *tree, uint uNodeIndex, FILE *fp)
681 {
682     bool bGroup;
683     
684     assert(IsRooted(tree));
685     assert(NULL != tree);
686     bGroup = !IsLeaf(uNodeIndex, tree) || IsRoot(uNodeIndex, tree);
687     
688     if (bGroup) 
689         fprintf(fp, "(\n");
690     
691
692     if (IsLeaf(uNodeIndex, tree)) {
693         /* File.PutString(GetName(uNodeIndex)); */
694         fprintf(fp, "%s", tree->m_ptrName[uNodeIndex]);
695     } else {
696         TreeToFileNodeRooted(tree, GetLeft(uNodeIndex, tree), fp);
697         fprintf(fp, ",\n");
698         TreeToFileNodeRooted(tree, GetRight(uNodeIndex, tree), fp);
699     }
700
701     if (bGroup)
702         fprintf(fp, ")");
703
704     if (!IsRoot(uNodeIndex, tree)) {
705         uint uParent = GetParent(uNodeIndex, tree);
706         if (HasEdgeLength(uNodeIndex, uParent, tree))
707             fprintf(fp, ":%g", GetEdgeLength(uNodeIndex, uParent, tree));
708     }
709     fprintf(fp, "\n");
710 }
711 /***   end: TreeToFileNodeRooted   ***/
712
713
714 /**
715  *
716  *
717  */
718 void
719 TreeValidate(tree_t *tree)
720 {
721     uint uNodeIndex;
722     assert(NULL != tree);
723     for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
724         ValidateNode(uNodeIndex, tree);
725     }
726     /* FIXME: maybe set negative length to zero? What impact would
727      * that have? */
728 }
729 /***   end TreeValidate   ***/
730
731
732
733 /**
734  *
735  *
736  */
737 void
738 ValidateNode(uint uNodeIndex, tree_t *tree)
739 {
740     uint uNeighborCount;
741     uint n1, n2, n3;
742     assert(NULL != tree);
743     
744     if (uNodeIndex >= tree->m_uNodeCount)
745         Log(&rLog, LOG_FATAL, "ValidateNode(%u), %u nodes", uNodeIndex, tree->m_uNodeCount);
746
747     uNeighborCount = GetNeighborCount(uNodeIndex, tree);
748     
749
750     if (2 == uNeighborCount) {
751         if (!tree->m_bRooted) {
752             Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
753                   uNodeIndex);
754         }
755         if (uNodeIndex != tree->m_uRootNodeIndex) {
756             Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
757                   uNodeIndex, tree->m_uRootNodeIndex);
758         }
759     }
760
761     n1 = tree->m_uNeighbor1[uNodeIndex];
762     n2 = tree->m_uNeighbor2[uNodeIndex];
763     n3 = tree->m_uNeighbor3[uNodeIndex];
764     
765     if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3) {
766         Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
767     }
768     if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2) {
769         Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
770     }
771     
772     if (n1 != NULL_NEIGHBOR)
773         AssertAreNeighbors(uNodeIndex, n1, tree);
774     if (n2 != NULL_NEIGHBOR)
775         AssertAreNeighbors(uNodeIndex, n2, tree);
776     if (n3 != NULL_NEIGHBOR)
777         AssertAreNeighbors(uNodeIndex, n3, tree);
778
779
780     
781     if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3)) {
782         Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
783     }
784     if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3)) {
785         Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
786     }
787     if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2)) {
788         Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
789     }
790
791
792     if (IsRooted(tree)) {
793         if (NULL_NEIGHBOR == GetParent(uNodeIndex, tree)) {
794             if (uNodeIndex != tree->m_uRootNodeIndex) {
795                 Log(&rLog, LOG_FATAL, "Tree::ValiateNode(%u), no parent", uNodeIndex);
796             }
797         } else if (GetLeft(GetParent(uNodeIndex, tree), tree) != uNodeIndex &&
798                    GetRight(GetParent(uNodeIndex, tree), tree) != uNodeIndex) {
799             Log(&rLog, LOG_FATAL, "Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
800         }
801     }
802 }
803 /***   end: ValidateNode   ***/
804
805
806 /**
807  *
808  *
809  */
810 void
811 AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
812 {
813     bool Has12, Has21;
814     assert(NULL != tree);
815
816     if (uNodeIndex1 >= tree->m_uNodeCount || uNodeIndex2 >= tree->m_uNodeCount)
817         Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u), are %u nodes",
818               uNodeIndex1, uNodeIndex2, tree->m_uNodeCount);
819
820     if (tree->m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
821         tree->m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
822         tree->m_uNeighbor3[uNodeIndex1] != uNodeIndex2) {
823         Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
824     }
825
826     
827     if (tree->m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
828         tree->m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
829         tree->m_uNeighbor3[uNodeIndex2] != uNodeIndex1) {
830         Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
831     }
832
833
834     Has12 = HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
835     Has21 = HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
836     if (Has12 != Has21) {
837         HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
838         HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
839         Log(&rLog, LOG_ERROR, "HasEdgeLength(%u, %u)=%c HasEdgeLength(%u, %u)=%c\n",
840               uNodeIndex1,
841               uNodeIndex2,
842               Has12 ? 'T' : 'F',
843               uNodeIndex2,
844               uNodeIndex1,
845               Has21 ? 'T' : 'F');
846         Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, HasEdgeLength not symmetric");
847     }
848
849     
850     if (Has12) {
851         double d12 = GetEdgeLength(uNodeIndex1, uNodeIndex2, tree);
852         double d21 = GetEdgeLength(uNodeIndex2, uNodeIndex1, tree);
853         if (d12 != d21) {
854             Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, Edge length disagrees %u-%u=%.3g, %u-%u=%.3g",
855                   uNodeIndex1, uNodeIndex2, d12,
856                   uNodeIndex2, uNodeIndex1, d21);
857         }
858     }
859 }
860 /***   end: AssertAreNeighbors   ***/
861
862
863
864 /**
865  *
866  * @note see phyfromfile.cpp in Muscle3.7. Tree has to be a pointer to
867  * an already allocated tree structure.
868  *
869  * return non-Zero on failure
870  *
871  * leafids will not be set here (FIXME:CHECK if true)
872  */
873 int
874 MuscleTreeFromFile(tree_t *tree, char *ftree)
875 {
876     double dEdgeLength;
877     bool bEdgeLength;
878     char szToken[16];
879     NEWICK_TOKEN_TYPE NTT;
880     unsigned uThirdNode;
881     FILE *fp = NULL;
882
883     assert(tree!=NULL);
884     assert(ftree!=NULL);
885
886     if (NULL == (fp=fopen(ftree, "r"))) {
887         Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for reading. Skipping", ftree);
888         return -1;
889     }
890     
891     /* Assume rooted.
892      * If we discover that it is unrooted, will convert on the fly.
893      */
894     TreeCreateRooted(tree);
895
896     bEdgeLength = GetGroupFromFile(fp, 0, &dEdgeLength, tree);
897
898
899     /* Next token should be either ';' for rooted tree or ',' for
900      * unrooted.
901      */
902     NTT = GetToken(fp, szToken, sizeof(szToken));
903
904     /* If rooted, all done. */
905     if (NTT_Semicolon == NTT) {
906         if (bEdgeLength)
907             Log(&rLog, LOG_WARN, " *** Warning *** edge length on root group in Newick file %s\n", ftree);
908         TreeValidate(tree);
909         fclose(fp);
910         return 0;
911     }
912
913     if (NTT_Comma != NTT)
914         Log(&rLog, LOG_FATAL, "Tree::FromFile, expected ';' or ',', got '%s'", szToken);
915
916     uThirdNode = UnrootFromFile(tree);
917     bEdgeLength = GetGroupFromFile(fp, uThirdNode, &dEdgeLength, tree);
918     if (bEdgeLength)
919         SetEdgeLength(0, uThirdNode, dEdgeLength, tree);
920     TreeValidate(tree);
921
922     fclose(fp);
923     return 0;
924 }
925 /***   end MuscleTreeFromFile   ***/
926
927
928
929 /**
930  *
931  */
932 void
933 ExpandCache(tree_t *tree)
934 {
935     const uint uNodeCount = 100;
936     uint uNewCacheCount;
937     uint *uNewNeighbor1, *uNewNeighbor2, *uNewNeighbor3;
938     uint *uNewIds;
939     double *dNewEdgeLength1, *dNewEdgeLength2, *dNewEdgeLength3;
940 #if USE_HEIGHT
941     double *dNewHeight;
942     bool *bNewHasHeight;
943 #endif
944     bool *bNewHasEdgeLength1, *bNewHasEdgeLength2, *bNewHasEdgeLength3;
945     char **ptrNewName;
946
947     assert(NULL != tree);
948     uNewCacheCount = tree->m_uCacheCount + uNodeCount;
949     uNewNeighbor1 = (uint *) CKMALLOC(
950         uNewCacheCount * sizeof(uint));
951     uNewNeighbor2 = (uint *) CKMALLOC(
952         uNewCacheCount * sizeof(uint));
953     uNewNeighbor3 = (uint *) CKMALLOC(
954         uNewCacheCount * sizeof(uint));
955
956     uNewIds = (uint *) CKCALLOC(
957         uNewCacheCount, sizeof(uint));
958     
959     dNewEdgeLength1 = (double *) CKMALLOC(
960         uNewCacheCount * sizeof(double));
961     dNewEdgeLength2 =  (double *) CKMALLOC(
962         uNewCacheCount * sizeof(double));
963     dNewEdgeLength3 = (double *) CKMALLOC(
964         uNewCacheCount * sizeof(double));
965 #if USE_HEIGHT
966     dNewHeight = (double *) CKMALLOC(
967         uNewCacheCount * sizeof(double));
968     bNewHasHeight = (bool *) CKMALLOC(
969         uNewCacheCount * sizeof(bool));
970 #endif
971      
972     bNewHasEdgeLength1 = (bool *) CKMALLOC(
973         uNewCacheCount * sizeof(bool));
974     bNewHasEdgeLength2 = (bool *) CKMALLOC(
975         uNewCacheCount * sizeof(bool));
976     bNewHasEdgeLength3 = (bool *) CKMALLOC(
977         uNewCacheCount * sizeof(bool));
978     ptrNewName = (char **) CKCALLOC(uNewCacheCount, sizeof(char*));
979
980     if (tree->m_uCacheCount > 0) {
981         uint uUnsignedBytes, uEdgeBytes;
982         uint uBoolBytes, uNameBytes;
983
984         uUnsignedBytes = tree->m_uCacheCount*sizeof(uint);
985         
986         memcpy(uNewNeighbor1, tree->m_uNeighbor1, uUnsignedBytes);
987         memcpy(uNewNeighbor2, tree->m_uNeighbor2, uUnsignedBytes);
988         memcpy(uNewNeighbor3, tree->m_uNeighbor3, uUnsignedBytes);
989
990         memcpy(uNewIds, tree->m_Ids, uUnsignedBytes);
991
992         uEdgeBytes = tree->m_uCacheCount*sizeof(double);
993         memcpy(dNewEdgeLength1, tree->m_dEdgeLength1, uEdgeBytes);
994         memcpy(dNewEdgeLength2, tree->m_dEdgeLength2, uEdgeBytes);
995         memcpy(dNewEdgeLength3, tree->m_dEdgeLength3, uEdgeBytes);
996 #if USE_HEIGHT
997         memcpy(dNewHeight, tree->m_dHeight, uEdgeBytes);
998 #endif        
999           
1000         uBoolBytes = tree->m_uCacheCount*sizeof(bool);
1001         memcpy(bNewHasEdgeLength1, tree->m_bHasEdgeLength1, uBoolBytes);
1002         memcpy(bNewHasEdgeLength2, tree->m_bHasEdgeLength2, uBoolBytes);
1003         memcpy(bNewHasEdgeLength3, tree->m_bHasEdgeLength3, uBoolBytes);
1004 #if USE_HEIGHT
1005         memcpy(bNewHasHeight, tree->m_bHasHeight, uBoolBytes);
1006 #endif
1007         uNameBytes = tree->m_uCacheCount*sizeof(char *);
1008         memcpy(ptrNewName, tree->m_ptrName, uNameBytes);
1009
1010         /* similiar to FreeMuscleTree
1011          */
1012         
1013         /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
1014          * so free first
1015          */
1016 #if 0
1017         for (i=0; i<tree->m_uNodeCount; i++) {
1018             if (IsLeaf(i, tree)) {
1019 #ifndef NDEBUG
1020                 if (NULL==tree->m_ptrName[i]) {
1021                     Log(&rLog, LOG_WARN, "FIXME tree->m_ptrName[%d] is already NULL", i);
1022                 }
1023 #endif
1024                 CKFREE(tree->m_ptrName[i]);
1025             }
1026         }
1027 #endif
1028         CKFREE(tree->m_ptrName);
1029
1030         CKFREE(tree->m_uNeighbor1);
1031         CKFREE(tree->m_uNeighbor2);
1032         CKFREE(tree->m_uNeighbor3);
1033
1034         CKFREE(tree->m_Ids);
1035
1036         CKFREE(tree->m_dEdgeLength1);
1037         CKFREE(tree->m_dEdgeLength2);
1038         CKFREE(tree->m_dEdgeLength3);
1039
1040         CKFREE(tree->m_bHasEdgeLength1);
1041         CKFREE(tree->m_bHasEdgeLength2);
1042         CKFREE(tree->m_bHasEdgeLength3);
1043 #if USE_HEIGHT
1044         CKFREE(tree->m_bHasHeight);
1045         CKFREE(tree->m_dHeight);
1046 #endif
1047     }
1048     
1049     tree->m_uCacheCount = uNewCacheCount;
1050     tree->m_uNeighbor1 = uNewNeighbor1;
1051     tree->m_uNeighbor2 = uNewNeighbor2;
1052     tree->m_uNeighbor3 = uNewNeighbor3;
1053     tree->m_Ids = uNewIds;
1054     tree->m_dEdgeLength1 = dNewEdgeLength1;
1055     tree->m_dEdgeLength2 = dNewEdgeLength2;
1056     tree->m_dEdgeLength3 = dNewEdgeLength3;
1057         
1058 #ifdef USE_HEIGHT
1059     tree->m_dHeight = dNewHeight;
1060     tree->m_bHasHeight = bNewHasHeight;
1061 #endif
1062     tree->m_bHasEdgeLength1 = bNewHasEdgeLength1;
1063     tree->m_bHasEdgeLength2 = bNewHasEdgeLength2;
1064     tree->m_bHasEdgeLength3 = bNewHasEdgeLength3;
1065         
1066     tree->m_ptrName = ptrNewName;
1067
1068 }
1069 /***   end: ExpandCache   ***/
1070
1071
1072
1073 /**
1074  *
1075  * Tree must be pointer to an already allocated tree structure
1076  *
1077  */
1078 void
1079 TreeCreateRooted(tree_t *tree)
1080 {
1081     TreeZero(tree);
1082     ExpandCache(tree);
1083     tree->m_uNodeCount = 1;
1084
1085     tree->m_uNeighbor1[0] = NULL_NEIGHBOR;
1086     tree->m_uNeighbor2[0] = NULL_NEIGHBOR;
1087     tree->m_uNeighbor3[0] = NULL_NEIGHBOR;
1088     
1089     tree->m_bHasEdgeLength1[0] = FALSE;
1090     tree->m_bHasEdgeLength2[0] = FALSE;
1091     tree->m_bHasEdgeLength3[0] = FALSE;
1092 #if USE_HEIGHT
1093     tree->m_bHasHeight[0] = FALSE;
1094 #endif
1095     
1096     tree->m_uRootNodeIndex = 0;
1097     tree->m_bRooted = TRUE;
1098     
1099 #ifndef NDEBUG
1100     TreeValidate(tree);
1101 #endif
1102 }
1103 /***   end: TreeCreateRooted   ***/
1104
1105
1106 /**
1107  *
1108  */
1109 uint
1110 UnrootFromFile(tree_t *tree)
1111 {
1112     uint uThirdNode;
1113 #if TRACE
1114     Log("Before unroot:\n");
1115     LogMe();
1116 #endif
1117     
1118     if (!tree->m_bRooted)
1119         Log(&rLog, LOG_FATAL, "Tree::Unroot, not rooted");
1120     
1121     /* Convention: root node is always node zero */
1122     assert(IsRoot(0, tree));
1123     assert(NULL_NEIGHBOR == tree->m_uNeighbor1[0]);
1124
1125     uThirdNode = tree->m_uNodeCount++;
1126     
1127     tree->m_uNeighbor1[0] = uThirdNode;
1128     tree->m_uNeighbor1[uThirdNode] = 0;
1129     
1130     tree->m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
1131     tree->m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
1132     
1133     tree->m_dEdgeLength1[0] = 0;
1134     tree->m_dEdgeLength1[uThirdNode] = 0;
1135     tree->m_bHasEdgeLength1[uThirdNode] = TRUE;
1136     
1137     tree->m_bRooted = FALSE;
1138     
1139 #if TRACE
1140     Log("After unroot:\n");
1141     LogMe();
1142 #endif
1143
1144     return uThirdNode;
1145 }
1146 /***   end: UnrootFromFile   ***/
1147
1148
1149
1150 /**
1151  *
1152  *
1153  */
1154 bool
1155 GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree)
1156 {
1157     char szToken[1024];
1158     NEWICK_TOKEN_TYPE NTT = GetToken(fp, szToken, sizeof(szToken));
1159     bool bRightLength;
1160     bool bEof;
1161     char c;
1162
1163     /* Group is either leaf name or (left, right). */
1164     if (NTT_String == NTT) {
1165         SetLeafName(uNodeIndex, szToken, tree);
1166 #if TRACE
1167         Log(&rLog, LOG_FORCED_DEBUG, "Group is leaf '%s'", szToken);
1168 #endif
1169     } else if (NTT_Lparen == NTT) {
1170         const unsigned uLeft = AppendBranch(tree, uNodeIndex);
1171         const unsigned uRight = uLeft + 1;
1172         double dEdgeLength;
1173         bool bLeftLength = GetGroupFromFile(fp, uLeft, &dEdgeLength, tree);
1174         
1175         /* Left sub-group...
1176          */
1177 #if TRACE
1178         Log(&rLog, LOG_FORCED_DEBUG, "%s", "Got '(', group is compound, expect left sub-group");
1179
1180         if (bLeftLength) {
1181             Log(&rLog, LOG_FORCED_DEBUG, "Edge length for left sub-group: %.3g", dEdgeLength);
1182         } else {
1183             Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for left sub-group");
1184         }
1185 #endif
1186         if (bLeftLength)
1187             SetEdgeLength(uNodeIndex, uLeft, dEdgeLength, tree);
1188
1189         /* ... then comma ...
1190          */
1191 #if TRACE
1192         Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect comma");
1193 #endif
1194         NTT = GetToken(fp, szToken, sizeof(szToken));
1195         if (NTT_Comma != NTT)
1196             Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
1197         
1198         /* ...then right sub-group...
1199          */
1200 #if TRACE
1201         Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect right sub-group");
1202 #endif
1203         bRightLength = GetGroupFromFile(fp, uRight, &dEdgeLength, tree);
1204         if (bRightLength)
1205             SetEdgeLength(uNodeIndex, uRight, dEdgeLength, tree);
1206         
1207 #if TRACE
1208         if (bRightLength)
1209             Log(&rLog, LOG_FORCED_DEBUG, "Edge length for right sub-group: %.3g", dEdgeLength);
1210         else
1211             Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for right sub-group");
1212 #endif
1213
1214         /* ... then closing parenthesis.
1215          */
1216 #if TRACE
1217         Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect closing parenthesis (or comma if > 2-ary)");
1218 #endif
1219         NTT = GetToken(fp, szToken, sizeof(szToken));
1220         if (NTT_Rparen == NTT)
1221             ;
1222         else if (NTT_Comma == NTT) {
1223             if (ungetc(',', fp)==EOF)
1224                 Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1225             return FALSE;
1226         } else
1227             Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
1228     } else {
1229         Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
1230               szToken);
1231     }
1232     
1233     /* Group may optionally be followed by edge length.
1234      */
1235     bEof = FileSkipWhiteX(fp);
1236     if (bEof)
1237         return FALSE;
1238     if ((c = fgetc(fp))==EOF) /* GetCharX */
1239         Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1240 #if TRACE
1241     Log(&rLog, LOG_FORCED_DEBUG, "Character following group, could be colon, is '%c'", c);
1242 #endif
1243     if (':' == c) {
1244         NTT = GetToken(fp, szToken, sizeof(szToken));
1245         if (NTT_String != NTT)
1246             Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
1247         *ptrdEdgeLength = atof(szToken);
1248         return TRUE;
1249     }
1250     if (ungetc(c, fp)==EOF)
1251         Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1252     
1253     return FALSE;
1254 }
1255 /***   end: GetGroupFromFile   ***/
1256
1257
1258
1259
1260 /**
1261  *
1262  */
1263 void
1264 FileSkipWhite(FILE *fp)
1265 {
1266     bool bEof = FileSkipWhiteX(fp);
1267     if (bEof)
1268         Log(&rLog, LOG_FATAL, "%s", "End-of-file skipping white space");
1269 }
1270 /***   end: FileSkipWhite   ***/
1271
1272
1273
1274
1275 /**
1276  *
1277  */
1278 bool
1279 FileSkipWhiteX(FILE *fp)
1280 {
1281     for (;;) {
1282         int c;
1283         bool bEof;
1284         
1285         /* GetChar */
1286         if ((c = fgetc(fp))==EOF) {
1287             bEof = TRUE;
1288         } else {
1289             bEof = FALSE;
1290         }
1291
1292         if (bEof)
1293             return TRUE;
1294         if (!isspace(c)) {
1295             if (ungetc(c, fp)==EOF)
1296                 Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1297             break;
1298         }
1299     }
1300     return FALSE;
1301 }
1302 /***   end: FileSkipWhiteX   ***/
1303
1304
1305
1306
1307 /**
1308  *
1309  */
1310 NEWICK_TOKEN_TYPE
1311 GetToken(FILE *fp, char szToken[], uint uBytes)
1312 {
1313     char c;
1314     unsigned uBytesCopied = 0;
1315     NEWICK_TOKEN_TYPE TT;
1316
1317     /* Skip leading white space */
1318     FileSkipWhite(fp);
1319
1320     if ((c = fgetc(fp))==EOF) /* GetCharX */
1321         Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1322     
1323     /* In case a single-character token */
1324     szToken[0] = c;
1325     szToken[1] = 0;
1326
1327     switch (c) {
1328
1329     case '(':
1330         return NTT_Lparen;
1331         
1332     case ')':
1333         return NTT_Rparen;
1334         
1335     case ':':
1336         return NTT_Colon;
1337         
1338     case ';':
1339         return NTT_Semicolon;
1340         
1341     case ',':
1342         return NTT_Comma;
1343         
1344     case '\'':
1345         TT = NTT_SingleQuotedString;
1346         if ((c = fgetc(fp))==EOF) /* GetCharX */
1347             Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1348         break;
1349         
1350     case '"':
1351         TT = NTT_DoubleQuotedString;
1352         if ((c = fgetc(fp))==EOF) /* GetCharX */
1353             Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1354         break;
1355         
1356     case '[':
1357         TT = NTT_Comment;
1358         break;
1359         
1360     default:
1361         TT = NTT_String;
1362         break;
1363     }
1364     
1365     for (;;)
1366     {
1367         bool bEof;
1368         if (TT != NTT_Comment) {
1369             if (uBytesCopied < uBytes - 2)  {
1370                 szToken[uBytesCopied++] = c;
1371                 szToken[uBytesCopied] = 0;
1372             } else {
1373                 Log(&rLog, LOG_FATAL, "Tree::GetToken: input buffer too small, token so far='%s'", szToken);
1374             }
1375         }
1376         c = fgetc(fp); /* GetChar */
1377         bEof = (c==EOF ? TRUE : FALSE);
1378         if (bEof)
1379             return TT;
1380         
1381         switch (TT) {
1382
1383         case NTT_String:
1384             if (0 != strchr("():;,", c)) {
1385                 if (ungetc(c, fp)==EOF)
1386                     Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1387                 return NTT_String;
1388             }
1389             if (isspace(c))
1390                 return NTT_String;
1391             break;
1392             
1393         case NTT_SingleQuotedString:
1394             if ('\'' == c)
1395                 return NTT_String;
1396             break;
1397             
1398         case NTT_DoubleQuotedString:
1399             if ('"' == c)
1400                 return NTT_String;
1401             break;
1402             
1403         case NTT_Comment:
1404             if (']' == c)
1405                 return GetToken(fp, szToken, uBytes);
1406             break;
1407             
1408         default:
1409             Log(&rLog, LOG_FATAL, "Tree::GetToken, invalid TT=%u", TT);
1410         }
1411     }
1412 }
1413 /***   end: GetToken   ***/
1414
1415
1416
1417 /***   SetLeafName
1418  *
1419  */
1420 void
1421 SetLeafName(unsigned uNodeIndex, const char *ptrName, tree_t *tree)
1422 {
1423     assert(uNodeIndex < tree->m_uNodeCount);
1424     assert(IsLeaf(uNodeIndex, tree));
1425     free(tree->m_ptrName[uNodeIndex]);
1426     /*LOG_DEBUG("Setting tree->m_ptrName[uNodeIndex=%d] to %s", uNodeIndex, ptrName);*/
1427     tree->m_ptrName[uNodeIndex] = CkStrdup(ptrName);
1428 }
1429 /***   end: SetLeafName   ***/
1430
1431
1432
1433
1434 /**
1435  *
1436  * Append a new branch. This adds two new nodes and joins them to an
1437  * existing leaf node. Return value is k, new nodes have indexes k and
1438  * k+1 respectively.
1439  *
1440  */
1441 uint
1442 AppendBranch(tree_t *tree, uint uExistingLeafIndex)
1443 {
1444     uint uNewLeaf1;
1445     uint uNewLeaf2;
1446     
1447     assert(tree!=NULL);
1448     if (0 == tree->m_uNodeCount) {
1449         Log(&rLog, LOG_FATAL, "%s(): %s", __FUNCTION__, "tree has not been created");
1450     }
1451     assert(NULL_NEIGHBOR == tree->m_uNeighbor2[uExistingLeafIndex]);
1452     assert(NULL_NEIGHBOR == tree->m_uNeighbor3[uExistingLeafIndex]);
1453     assert(uExistingLeafIndex < tree->m_uNodeCount);
1454 #ifndef NDEBUG
1455     if (!IsLeaf(uExistingLeafIndex, tree)) {
1456         Log(&rLog, LOG_FATAL, "AppendBranch(%u): not leaf", uExistingLeafIndex);
1457     }
1458 #endif
1459
1460     if (tree->m_uNodeCount >= tree->m_uCacheCount - 2) {
1461         ExpandCache(tree);
1462     }
1463     uNewLeaf1 = tree->m_uNodeCount;
1464     uNewLeaf2 = tree->m_uNodeCount + 1;
1465
1466     tree->m_uNodeCount += 2;
1467     
1468     tree->m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
1469     tree->m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
1470     
1471     tree->m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
1472     tree->m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
1473     
1474     tree->m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
1475     tree->m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
1476     
1477     tree->m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
1478     tree->m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
1479     
1480     tree->m_dEdgeLength2[uExistingLeafIndex] = 0;
1481     tree->m_dEdgeLength3[uExistingLeafIndex] = 0;
1482     
1483     tree->m_dEdgeLength1[uNewLeaf1] = 0;
1484     tree->m_dEdgeLength2[uNewLeaf1] = 0;
1485     tree->m_dEdgeLength3[uNewLeaf1] = 0;
1486     
1487     tree->m_dEdgeLength1[uNewLeaf2] = 0;
1488     tree->m_dEdgeLength2[uNewLeaf2] = 0;
1489     tree->m_dEdgeLength3[uNewLeaf2] = 0;
1490     
1491     tree->m_bHasEdgeLength1[uNewLeaf1] = FALSE;
1492     tree->m_bHasEdgeLength2[uNewLeaf1] = FALSE;
1493     tree->m_bHasEdgeLength3[uNewLeaf1] = FALSE;
1494     
1495     tree->m_bHasEdgeLength1[uNewLeaf2] = FALSE;
1496     tree->m_bHasEdgeLength2[uNewLeaf2] = FALSE;
1497     tree->m_bHasEdgeLength3[uNewLeaf2] = FALSE;
1498     
1499 #if USE_HEIGHT
1500     tree->m_bHasHeight[uNewLeaf1] = FALSE;
1501     tree->m_bHasHeight[uNewLeaf2] = FALSE;
1502 #endif 
1503     tree->m_Ids[uNewLeaf1] = uInsane;
1504     tree->m_Ids[uNewLeaf2] = uInsane;
1505     
1506     return uNewLeaf1;
1507 }
1508 /***   end: AppendBranch   ***/
1509
1510
1511 /**
1512  *
1513  *
1514  */
1515 void
1516 SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
1517               double dLength, tree_t *tree)
1518 {
1519     assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
1520     assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
1521     
1522     if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2) {
1523         tree->m_dEdgeLength1[uNodeIndex1] = dLength;
1524         tree->m_bHasEdgeLength1[uNodeIndex1] = TRUE;
1525     } else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2) {
1526         tree->m_dEdgeLength2[uNodeIndex1] = dLength;
1527         tree->m_bHasEdgeLength2[uNodeIndex1] = TRUE;
1528         
1529     } else {
1530         assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
1531         tree->m_dEdgeLength3[uNodeIndex1] = dLength;
1532         tree->m_bHasEdgeLength3[uNodeIndex1] = TRUE;
1533     }
1534     
1535     if (tree->m_uNeighbor1[uNodeIndex2] == uNodeIndex1) {
1536         tree->m_dEdgeLength1[uNodeIndex2] = dLength;
1537         tree->m_bHasEdgeLength1[uNodeIndex2] = TRUE;
1538     } else if (tree->m_uNeighbor2[uNodeIndex2] == uNodeIndex1) {
1539         tree->m_dEdgeLength2[uNodeIndex2] = dLength;
1540         tree->m_bHasEdgeLength2[uNodeIndex2] = TRUE;
1541     } else {
1542         assert(tree->m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
1543         tree->m_dEdgeLength3[uNodeIndex2] = dLength;
1544         tree->m_bHasEdgeLength3[uNodeIndex2] = TRUE;
1545     }
1546 }
1547 /***   end: SetEdgeLength   ***/
1548
1549
1550
1551 /**
1552  *
1553  * Debug output
1554  *
1555  * LogMe in phy.cpp
1556  *
1557  */
1558 void
1559 LogTree(tree_t *tree, FILE *fp)
1560 {
1561     uint uNodeIndex;
1562     uint n1, n2, n3;
1563     char *ptrName;
1564     
1565     fprintf(fp, "This is a tree with %u nodes, which is ", tree->m_uNodeCount);
1566     
1567     if (IsRooted(tree)) {
1568         fprintf(fp, "rooted:\n");
1569         fprintf(fp, "Index  Parnt  LengthP  Left   LengthL  Right  LengthR     Id  Name\n");
1570         fprintf(fp, "-----  -----  -------  ----   -------  -----  -------  -----  ----\n");
1571
1572     } else {
1573         fprintf(fp, "unrooted;\n");
1574         fprintf(fp, "Index  Nbr_1  Length1  Nbr_2  Length2  Nbr_3  Length3     Id  Name\n");
1575         fprintf(fp, "-----  -----  -------  -----  -------  -----  -------  -----  ----\n");
1576     }
1577     
1578     for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
1579         fprintf(fp, "%5u  ", uNodeIndex);
1580         n1 = tree->m_uNeighbor1[uNodeIndex];
1581         n2 = tree->m_uNeighbor2[uNodeIndex];
1582         n3 = tree->m_uNeighbor3[uNodeIndex];
1583         
1584         if (NULL_NEIGHBOR != n1) {
1585             fprintf(fp, "%5u  ", n1);
1586             if (tree->m_bHasEdgeLength1[uNodeIndex])
1587                 fprintf(fp, "%7.3g  ", tree->m_dEdgeLength1[uNodeIndex]);
1588             else
1589                 fprintf(fp, "      *  ");
1590         } else {
1591             fprintf(fp, "                ");
1592         }
1593         
1594         if (NULL_NEIGHBOR != n2) {
1595             fprintf(fp, "%5u  ", n2);
1596             if (tree->m_bHasEdgeLength2[uNodeIndex])
1597                 fprintf(fp, "%7.3g  ", tree->m_dEdgeLength2[uNodeIndex]);
1598             else
1599                 fprintf(fp, "      *  ");
1600         } else {
1601             fprintf(fp, "                ");
1602         }
1603         
1604         if (NULL_NEIGHBOR != n3) {
1605             fprintf(fp, "%5u  ", n3);
1606             if (tree->m_bHasEdgeLength3[uNodeIndex])
1607                 fprintf(fp, "%7.3g  ", tree->m_dEdgeLength3[uNodeIndex]);
1608             else
1609                 fprintf(fp, "      *  ");
1610         } else {
1611             fprintf(fp, "                ");
1612         }
1613         
1614         if (tree->m_Ids != 0 && IsLeaf(uNodeIndex, tree)) {
1615             unsigned uId = tree->m_Ids[uNodeIndex];
1616             if (uId == uInsane)
1617                 fprintf(fp, "    *");
1618             else
1619                 fprintf(fp, "%5u", uId);
1620         } else {
1621             fprintf(fp, "     ");
1622         }
1623         
1624         if (tree->m_bRooted && uNodeIndex == tree->m_uRootNodeIndex)
1625             fprintf(fp, "  [ROOT] ");
1626         ptrName = tree->m_ptrName[uNodeIndex];
1627         if (ptrName != 0)
1628             fprintf(fp, "  %s", ptrName);
1629         
1630         fprintf(fp, "\n");
1631     }     
1632 }
1633 /***   end: LogTree   ***/
1634
1635
1636
1637 /**
1638  *
1639  * replaces m_uLeafCount
1640  */
1641 uint
1642 GetLeafCount(tree_t *tree)
1643 {
1644     assert(tree!=NULL);
1645     
1646     return (tree->m_uNodeCount+1)/2;
1647 }
1648 /***   GetLeafCount   ***/
1649
1650
1651
1652 /**
1653  *
1654  */
1655 uint
1656 GetNodeCount(tree_t *tree)
1657 {
1658     assert(tree!=NULL);
1659     
1660     return 2*(GetLeafCount(tree)) - 1;
1661 }
1662 /***   end: GetNodeCount   ***/
1663
1664
1665 /**
1666  *
1667  */
1668 uint
1669 GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *prTree)
1670 {
1671     assert(uNodeIndex < prTree->m_uNodeCount);
1672     switch (uNeighborSubscript)
1673     {
1674     case 0:
1675         return prTree->m_uNeighbor1[uNodeIndex];
1676     case 1:
1677         return prTree->m_uNeighbor2[uNodeIndex];
1678     case 2:
1679         return prTree->m_uNeighbor3[uNodeIndex];
1680     }
1681     Log(&rLog, LOG_FATAL, "Internal error in %s: sub=%u", __FUNCTION__, uNeighborSubscript);
1682     return NULL_NEIGHBOR;
1683 }
1684 /***   end: GetNeighbor   ***/
1685
1686
1687
1688
1689
1690 /**
1691  *
1692  */
1693 void
1694 SetLeafId(tree_t *tree, uint uNodeIndex, uint uId)
1695 {
1696     assert(uNodeIndex < tree->m_uNodeCount);
1697     assert(IsLeaf(uNodeIndex, tree));
1698     tree->m_Ids[uNodeIndex] = uId;
1699 }
1700 /***   end: SetLeafId    ***/
1701
1702
1703 /**
1704  *
1705  */
1706 uint
1707 GetRootNodeIndex(tree_t *tree)
1708 {
1709     assert(NULL!=tree);
1710     return tree->m_uRootNodeIndex;
1711 }
1712 /***   end: GetRootNodeIndex   ***/
1713
1714
1715
1716 /**
1717  * @note avoid usage if you want to iterate over all indices, because
1718  * this will be slow
1719  *
1720  */
1721 uint
1722 LeafIndexToNodeIndex(uint uLeafIndex, tree_t *prTree) {
1723     uint uLeafCount = 0;
1724     unsigned uNodeCount = GetNodeCount(prTree);
1725     uint uNodeIndex;
1726     
1727     for (uNodeIndex = 0; uNodeIndex < uNodeCount; uNodeIndex++) {
1728         if (IsLeaf(uNodeIndex, prTree)) {
1729             if (uLeafCount == uLeafIndex) {
1730                 return uNodeIndex;
1731             } else {
1732                 uLeafCount++;
1733             }
1734         }
1735     }
1736     Log(&rLog, LOG_FATAL, "Internal error: node index out of range");
1737     return 0;
1738 }
1739 /***   end: LeafIndexToNodeIndex   ***/
1740
1741
1742
1743
1744 /**
1745  * @brief Append a (source) tree to a (dest) tree to a given node
1746  * which will be replaced. All other nodes in that tree will stay the
1747  * same.
1748  *
1749  * @param[out] prDstTree
1750  * The tree to append to
1751  * @param[in] uDstTreeReplaceNodeIndex
1752  * Dest tree node to which source tree will be appended
1753  * @param[in] prSrcTree
1754  * The tree to append
1755  * 
1756  * @note No nodes inside prDstTree will change except
1757  * uDstTreeReplaceNodeIndex
1758  *
1759  *
1760  * @warning: Function won't check or touch the m_Ids/leaf-indices!
1761  * That means if you want to join two trees with leaf indices 1-10 and
1762  * 1-10 your m_Ids/leaf-indices won't be unique anymore and the
1763  * association between your sequences and the tree are broken. Make
1764  * sure m_Ids are unique before calling me.
1765  *
1766  * The easiest would have been to do this by recursively calling
1767  * AppendBranch() (after adding uSrcTreeNodeIndex as extra argument to
1768  * this function). But recursion is evil. Yet another version would be
1769  * to setup all the data and call MuscleTreeCreate() to create a third
1770  * tree, which seems complicated and wasteful. The approach taken here
1771  * is the following: increase dest tree memory, copy over each src
1772  * tree node data and update the indices and counters. This is tricky
1773  * and has a lot of potential for bugs if tree interface is changed
1774  * (and even if it isn't).
1775  *
1776  */
1777 void
1778 AppendTree(tree_t *prDstTree, uint uDstTreeReplaceNodeIndex, tree_t *prSrcTree)
1779 {
1780     uint uSrcTreeNodeIndex;
1781     uint uOrgDstTreeNodeCount;
1782
1783     assert(NULL!=prDstTree);
1784     assert(NULL!=prSrcTree);
1785     assert(uDstTreeReplaceNodeIndex < prDstTree->m_uNodeCount);
1786     assert(IsLeaf(uDstTreeReplaceNodeIndex, prDstTree));
1787     assert(IsRooted(prDstTree) && IsRooted(prSrcTree));
1788
1789     
1790     uOrgDstTreeNodeCount = prDstTree->m_uNodeCount;
1791
1792     
1793     /* increase dest tree memory
1794      */
1795     while (prDstTree->m_uCacheCount
1796            <
1797            (GetNodeCount(prDstTree) + GetNodeCount(prSrcTree))) {
1798         ExpandCache(prDstTree);
1799     }
1800
1801
1802     /* copy all src tree nodes
1803      *
1804      */
1805     for (uSrcTreeNodeIndex=0;
1806          uSrcTreeNodeIndex<GetNodeCount(prSrcTree); uSrcTreeNodeIndex++) {
1807         uint uNewDstNodeIndex = prDstTree->m_uNodeCount;
1808         
1809         /* distinguish 4 cases for src nodes to copy:
1810          *
1811          * 1. src node is the only node, i.e. root & leaf
1812          *
1813          * 2. src node is root: set only left & right, but not parent
1814          * and just replace the given dest index
1815          *
1816          * 3. src node is leaf: set only parent
1817          *
1818          * 4. src node is internal node: update all three neighbours
1819          *
1820          * FIXME: this is messy. Is there a cleaner way to do this by
1821          * merging all cases?
1822          *
1823          */
1824         if (IsRoot(uSrcTreeNodeIndex, prSrcTree) && IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
1825             /* special case: if this is the only member in
1826              * tree, i.e. it's root and leaf. Copy leaf name and leaf
1827              * id. No neighbours to update
1828              */
1829
1830             /* free dst node name if set */
1831             if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
1832                 CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
1833             }
1834
1835             prDstTree->m_ptrName[uDstTreeReplaceNodeIndex] =
1836                 CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
1837
1838             prDstTree->m_Ids[uDstTreeReplaceNodeIndex] =
1839                 prSrcTree->m_Ids[uSrcTreeNodeIndex];
1840
1841             /* no updated of uNodeCount, because we used the replace node */
1842
1843 #if TRACE
1844             Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the only src leaf node: parent=%d (%f)",
1845                       uDstTreeReplaceNodeIndex,
1846                       prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex]);
1847 #endif
1848             
1849         } else if (IsRoot(uSrcTreeNodeIndex, prSrcTree)) {
1850             /* src node is root: replace uDstTreeReplaceNodeIndex
1851              * (not uNewDstNodeIndex) with src root, i.e. the
1852              * uDstTreeReplaceNodeIndex becomes an internal node now.
1853              *
1854              * We only have two neighbours 2 & 3 (no parent). Keep old
1855              * parent info (neighbor 1).
1856              */
1857
1858             /* free dst node name if set */
1859             if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
1860                 CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
1861             }
1862             
1863             prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex] =
1864                 prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1865             prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex] =
1866                 prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;          
1867             
1868             prDstTree->m_bHasEdgeLength2[uDstTreeReplaceNodeIndex] =
1869                 prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
1870             prDstTree->m_bHasEdgeLength3[uDstTreeReplaceNodeIndex] =
1871                 prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];            
1872
1873             prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex] =
1874                 prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
1875             prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex] =
1876                 prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
1877
1878             /* make Id invalid */
1879             prDstTree->m_Ids[uDstTreeReplaceNodeIndex] = uInsane;
1880
1881             /* no updated of uNodeCount, because we used the replace node */
1882
1883 #if TRACE
1884             Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the src root node: (untouched) parent=%d (%f) left=%d (%f) right=%d (%f)",
1885                       uDstTreeReplaceNodeIndex,
1886                       prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex],
1887                       prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex],
1888                       prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex]);
1889 #endif
1890             
1891         } else if (IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
1892             /* src node is a leaf, which means we only have one
1893              * neighbour, and that is its parent, i.e. n1
1894              *
1895              */
1896
1897             /* initialise/zero new node to default values
1898              */
1899             InitNode(prDstTree, uNewDstNodeIndex);
1900
1901         
1902             /*  update m_ptrName/leaf name
1903              */
1904             prDstTree->m_ptrName[uNewDstNodeIndex] =
1905                 CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
1906
1907             /* update parent node (beware of special case: parent was
1908                src tree root */
1909             if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
1910                     prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1911                         uDstTreeReplaceNodeIndex;
1912             } else {
1913                 prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1914                     prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1915             }
1916
1917             /* update edge length info to parent
1918              */
1919             prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
1920                 prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
1921             prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
1922                 prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
1923
1924             /* update sequence/object id
1925              */
1926             prDstTree->m_Ids[uNewDstNodeIndex] =
1927                 prSrcTree->m_Ids[uSrcTreeNodeIndex];            
1928
1929             /* we used a new node so increase their count */
1930             prDstTree->m_uNodeCount += 1;
1931             
1932 #if TRACE
1933             Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with a src leaf node: parent=%d (%f)",
1934                       uNewDstNodeIndex,
1935                       prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex]);
1936 #endif
1937             
1938         } else  {
1939             /* src node is not root neither leaf, means we have an
1940              * internal node. Update all neighbour info
1941              * 
1942              */
1943
1944             /* initialise/zero node values to default values
1945              */
1946             InitNode(prDstTree, uNewDstNodeIndex);
1947           
1948             /* update neigbours
1949              */
1950             /* parent: special case if parent was src tree root */
1951             if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
1952                     prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1953                         uDstTreeReplaceNodeIndex;
1954             } else {
1955                 prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1956                     prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1957             }
1958             /* left */
1959             prDstTree->m_uNeighbor2[uNewDstNodeIndex] =
1960                 prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1961             /* right */
1962             prDstTree->m_uNeighbor3[uNewDstNodeIndex] =
1963                 prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1964
1965             /* update edge length info
1966              */
1967             /* parent */
1968             prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
1969                 prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
1970             prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
1971                 prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
1972             /* left */
1973             prDstTree->m_bHasEdgeLength2[uNewDstNodeIndex] =
1974                 prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
1975             prDstTree->m_dEdgeLength2[uNewDstNodeIndex] =
1976                 prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
1977             /* right */
1978             prDstTree->m_bHasEdgeLength3[uNewDstNodeIndex] =
1979                 prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
1980             prDstTree->m_dEdgeLength3[uNewDstNodeIndex] =
1981                 prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
1982
1983             /* we used a new node so increase their count */
1984             prDstTree->m_uNodeCount += 1;
1985             
1986 #if TRACE
1987             Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with an internal src node: parent=%d (%f) left=%d (%f) right=%d (%f)",
1988                       uNewDstNodeIndex,
1989                       prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex],
1990                       prDstTree->m_uNeighbor2[uNewDstNodeIndex], prDstTree->m_dEdgeLength2[uNewDstNodeIndex],
1991                       prDstTree->m_uNeighbor3[uNewDstNodeIndex], prDstTree->m_dEdgeLength3[uNewDstNodeIndex]);
1992 #endif
1993         }
1994
1995     }
1996     /* end for each src tree node */
1997
1998     
1999     /*
2000      * m_uRootNodeIndex stays the same.
2001      *
2002      * No need to touch m_uCacheCount.
2003      *
2004      */    
2005 #if USE_HEIGHT
2006     Log(&rLog, LOG_FATAL, "Internal error: Height usage not implemented in %s", __FUNCTION__);
2007 #endif 
2008
2009     
2010 #ifndef NDEBUG
2011     TreeValidate(prDstTree);
2012 #endif
2013         
2014     return;
2015 }
2016 /***   end: AppendTree()   ***/
2017