1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 /* This a mix of tree functions and data-structures from
4 * Bob Edgar's Muscle (version 3.7) ported to pure C, topped up with
5 * some of our own stuff.
7 * Used files: phy.cpp, tree.h, phytofile.cpp and phyfromclust.cpp
9 * Muscle's code is public domain and so is this code here.
11 * From http://www.drive5.com/muscle/license.htm:
13 * MUSCLE is public domain software
15 * The MUSCLE software, including object and source code and
16 * documentation, is hereby donated to the public domain.
18 * Disclaimer of warranty
19 * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
20 * EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * RCS $Id: muscle_tree.c 230 2011-04-09 15:37:50Z andreas $
39 #include "muscle_tree.h"
42 static const double VERY_NEGATIVE_DOUBLE = -9e29;
43 /*const double dInsane = VERY_NEGATIVE_DOUBLE;*/
44 static const double dInsane = -9e29;
45 static const unsigned uInsane = 8888888;
51 /* Returned from Tree::GetToken: */
59 /* Following are never returned from Tree::GetToken: */
60 NTT_SingleQuotedString,
61 NTT_DoubleQuotedString,
67 InitCache(uint uCacheCount, tree_t *tree);
69 TreeZero(tree_t *tree);
71 GetNeighborCount(unsigned uNodeIndex, tree_t *tree);
73 IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
75 HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree);
77 TreeToFileNodeRooted(tree_t *tree, uint m_uRootNodeIndex, FILE *fp);
79 ValidateNode(uint uNodeIndex, tree_t *tree);
81 AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree);
83 ExpandCache(tree_t *tree);
85 TreeCreateRooted(tree_t *tree);
87 GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree);
88 static NEWICK_TOKEN_TYPE
89 GetToken(FILE *fp, char szToken[], uint uBytes);
90 /* stuff from textfile.cpp */
92 FileSkipWhite(FILE *fp);
94 FileSkipWhiteX(FILE *fp);
96 SetLeafName(uint uNodeIndex, const char *ptrName, tree_t *tree);
98 AppendBranch(tree_t *tree, uint uExistingLeafIndex);
100 SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
101 double dLength, tree_t *tree);
103 UnrootFromFile(tree_t *tree);
105 GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *tree);
107 InitNode(tree_t *prTree, uint uNodeIndex);
111 * @param[in] uNodeIndex
115 * @return id of left node
116 * @note called GetRight in Muscle3.7
119 GetLeft(uint uNodeIndex, tree_t *tree)
121 assert(NULL != tree);
122 assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
123 return tree->m_uNeighbor2[uNodeIndex];
125 /*** end: GetLeft ***/
130 * @param[in] uNodeIndex
134 * @return id of right node
135 * @note called GetRight in Muscle3.7
138 GetRight(uint uNodeIndex, tree_t *tree)
140 assert(NULL != tree);
141 assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
142 return tree->m_uNeighbor3[uNodeIndex];
144 /*** end: GetRight ***/
149 * @param[in] uNodeIndex
153 * @return leaf id of current node
156 GetLeafId(uint uNodeIndex, tree_t *tree)
158 assert(NULL != tree);
159 assert(uNodeIndex < tree->m_uNodeCount);
160 assert(IsLeaf(uNodeIndex, tree));
162 return tree->m_Ids[uNodeIndex];
164 /*** end: GetLeafId ***/
169 * @note originally called GetLeafName
173 GetLeafName(unsigned uNodeIndex, tree_t *tree)
175 assert(NULL != tree);
176 assert(uNodeIndex < tree->m_uNodeCount);
177 assert(IsLeaf(uNodeIndex, tree));
178 return tree->m_ptrName[uNodeIndex];
180 /*** end: GetLeafName ***/
185 * @brief returns first leaf node for a depth-first traversal of tree
190 * @return node index of first leaf node for depth-first traversal
192 * @note called FirstDepthFirstNode in Muscle3.7
196 FirstDepthFirstNode(tree_t *tree)
200 assert(NULL != tree);
201 assert(IsRooted(tree));
203 /* Descend via left branches until we hit a leaf */
204 uNodeIndex = tree->m_uRootNodeIndex;
205 while (!IsLeaf(uNodeIndex, tree))
206 uNodeIndex = GetLeft(uNodeIndex, tree);
209 /*** end: FirstDepthFirstNode ***/
213 * @brief returns next leaf node index for depth-first traversal of
218 * @param[in] uNodeIndex
221 * @return node index of next leaf node during depth-first traversal
223 * @note called NextDepthFirstNode in Muscle3.7
226 NextDepthFirstNode(uint uNodeIndex, tree_t *tree)
230 assert(NULL != tree);
231 assert(IsRooted(tree));
232 assert(uNodeIndex < tree->m_uNodeCount);
234 if (IsRoot(uNodeIndex, tree))
236 /* Node uNodeIndex is root, end of traversal */
237 return NULL_NEIGHBOR;
240 uParent = GetParent(uNodeIndex, tree);
241 if (GetRight(uParent, tree) == uNodeIndex) {
242 /* Is right branch, return parent=uParent */
246 uNodeIndex = GetRight(uParent, tree);
247 /* Descend left from right sibling uNodeIndex */
248 while (!IsLeaf(uNodeIndex, tree)) {
249 uNodeIndex = GetLeft(uNodeIndex, tree);
252 /* bottom out at leaf uNodeIndex */
255 /*** end: NextDepthFirstNode ***/
260 * @brief check if tree is a rooted tree
263 * @return TRUE if given tree is rooted, FALSE otherwise
267 IsRooted(tree_t *tree)
269 assert(NULL != tree);
270 return tree->m_bRooted;
272 /*** end: IsRooted ***/
279 FreeMuscleTree(tree_t *tree)
286 /* FIXME use GetLeafNodeIndex? or
287 for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
289 if (tree.IsLeaf(uNodeIndex))
291 const char *ptrName =
292 tree.GetLeafName(uNodeIndex);
294 /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
297 for (i=0; i<tree->m_uNodeCount; i++) {
298 /* IsLeaf needs neighbour count, so free those guys later */
299 if (IsLeaf(i, tree)) {
300 CKFREE(tree->m_ptrName[i]);
303 CKFREE(tree->m_ptrName);
305 CKFREE(tree->m_uNeighbor1);
306 CKFREE(tree->m_uNeighbor2);
307 CKFREE(tree->m_uNeighbor3);
311 CKFREE(tree->m_dEdgeLength1);
312 CKFREE(tree->m_dEdgeLength2);
313 CKFREE(tree->m_dEdgeLength3);
315 CKFREE(tree->m_dHeight);
316 CKFREE(tree->m_bHasHeight);
318 CKFREE(tree->m_bHasEdgeLength1);
319 CKFREE(tree->m_bHasEdgeLength2);
320 CKFREE(tree->m_bHasEdgeLength3);
326 /*** end: FreeMuscleTree ***/
331 * @brief create a muscle tree
333 * @note Original comment in Muscle code: "Create rooted tree from a
334 * vector description. Node indexes are 0..N-1 for leaves, N..2N-2 for
335 * internal nodes. Vector subscripts are i-N and have values for
336 * internal nodes only, but those values are node indexes 0..2N-2. So
337 * e.g. if N=6 and Left[2]=1, this means that the third internal node
338 * (node index 8) has the second leaf (node index 1) as its left
339 * child. uRoot gives the vector subscript of the root, so add N to
340 * get the node index."
344 * @param[in] uLeafCount
345 * number of leaf nodes
347 * Internal node index of root node
349 * Node index of left sibling of an internal node.
350 * Index range: 0--uLeafCount-1
352 * Node index of right sibling of an internal node.
353 * Index range: 0--uLeafCount-1
354 * @param[in] LeftLength
355 * Branch length of left branch of an internal node.
356 * Index range: 0--uLeafCount-1
357 * @param[in] RightLength
358 * Branch length of right branch of an internal node.
359 * Index range: 0--uLeafCount-1
361 * Leaf id. Index range: 0--uLeafCount
362 * @param[in] LeafNames
363 * Leaf label. Index range: 0--uLeafCount
367 MuscleTreeCreate(tree_t *tree,
368 uint uLeafCount, uint uRoot,
369 const uint *Left, const uint *Right,
370 const float *LeftLength, const float* RightLength,
371 const uint *LeafIds, char **LeafNames)
376 tree->m_uNodeCount = 2*uLeafCount - 1;
377 InitCache(tree->m_uNodeCount, tree);
379 for (uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex) {
380 tree->m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
381 tree->m_ptrName[uNodeIndex] = CkStrdup(LeafNames[uNodeIndex]);
384 for (uNodeIndex = uLeafCount; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
385 uint v = uNodeIndex - uLeafCount;
386 uint uLeft = Left[v];
387 uint uRight = Right[v];
388 float fLeft = LeftLength[v];
389 float fRight = RightLength[v];
391 tree->m_uNeighbor2[uNodeIndex] = uLeft;
392 tree->m_uNeighbor3[uNodeIndex] = uRight;
394 tree->m_bHasEdgeLength2[uNodeIndex] = TRUE;
395 tree->m_bHasEdgeLength3[uNodeIndex] = TRUE;
397 tree->m_dEdgeLength2[uNodeIndex] = fLeft;
398 tree->m_dEdgeLength3[uNodeIndex] = fRight;
400 tree->m_uNeighbor1[uLeft] = uNodeIndex;
401 tree->m_uNeighbor1[uRight] = uNodeIndex;
403 tree->m_dEdgeLength1[uLeft] = fLeft;
404 tree->m_dEdgeLength1[uRight] = fRight;
406 tree->m_bHasEdgeLength1[uLeft] = TRUE;
407 tree->m_bHasEdgeLength1[uRight] = TRUE;
410 tree->m_bRooted = TRUE;
411 tree->m_uRootNodeIndex = uRoot + uLeafCount;
416 /*** end: MuscleTreeCreate ***/
424 * filepointer to write to2
426 * @brief write a muscle tree to a file in newick format (distances
432 MuscleTreeToFile(FILE *fp, tree_t *tree)
434 assert(NULL != tree);
435 if (IsRooted(tree)) {
436 TreeToFileNodeRooted(tree, tree->m_uRootNodeIndex, fp);
440 Log(&rLog, LOG_FATAL, "FIXME: output of unrooted muscle trees not implemented");
443 /*** end: MuscleTreeToFile ***/
448 * @brief check if given node is a leaf node
450 * @param[in] uNodeIndex
455 * @return TRUE if given node is a leaf, FALSE otherwise
457 * @note called IsLeaf in Muscle3.7. See tree.h in original code
460 IsLeaf(uint uNodeIndex, tree_t *tree)
462 assert(NULL != tree);
463 assert(uNodeIndex < tree->m_uNodeCount);
464 if (1 == tree->m_uNodeCount)
466 return 1 == GetNeighborCount(uNodeIndex, tree);
468 /*** end: IsLeaf ***/
475 IsRoot(uint uNodeIndex, tree_t *tree)
477 assert(NULL != tree);
478 return IsRooted(tree) && tree->m_uRootNodeIndex == uNodeIndex;
480 /*** end: IsRoot ***/
487 GetNeighborCount(uint uNodeIndex, tree_t *tree)
490 assert(uNodeIndex < tree->m_uNodeCount);
491 assert(NULL != tree);
492 assert(NULL != tree->m_uNeighbor1);
493 assert(NULL != tree->m_uNeighbor2);
494 assert(NULL != tree->m_uNeighbor3);
495 n1 = tree->m_uNeighbor1[uNodeIndex];
496 n2 = tree->m_uNeighbor2[uNodeIndex];
497 n3 = tree->m_uNeighbor3[uNodeIndex];
498 return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
500 /*** end: GetNeighborCount ***/
506 GetParent(unsigned uNodeIndex, tree_t *tree)
508 assert(NULL != tree);
509 assert(tree->m_bRooted && uNodeIndex < tree->m_uNodeCount);
510 return tree->m_uNeighbor1[uNodeIndex];
512 /*** end: GetParent ***/
519 IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
521 assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
522 assert(NULL != tree);
524 return tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
525 tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
526 tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
528 /*** end: IsEdge ***/
535 HasEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
537 assert(NULL != tree);
538 assert(uNodeIndex1 < tree->m_uNodeCount);
539 assert(uNodeIndex2 < tree->m_uNodeCount);
540 assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
542 if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
543 return tree->m_bHasEdgeLength1[uNodeIndex1];
544 else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
545 return tree->m_bHasEdgeLength2[uNodeIndex1];
546 assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
547 return tree->m_bHasEdgeLength3[uNodeIndex1];
555 GetEdgeLength(uint uNodeIndex1, uint uNodeIndex2, tree_t *tree)
557 assert(NULL != tree);
558 assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
559 if (!HasEdgeLength(uNodeIndex1, uNodeIndex2, tree))
561 Log(&rLog, LOG_FATAL, "Missing edge length in tree %u-%u", uNodeIndex1, uNodeIndex2);
564 if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
565 return tree->m_dEdgeLength1[uNodeIndex1];
566 else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
567 return tree->m_dEdgeLength2[uNodeIndex1];
568 assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
569 return tree->m_dEdgeLength3[uNodeIndex1];
571 /*** end: GetEdgeLength ***/
578 InitNode(tree_t *prTree, uint uNodeIndex)
580 prTree->m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
581 prTree->m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
582 prTree->m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
583 prTree->m_bHasEdgeLength1[uNodeIndex] = FALSE;
584 prTree->m_bHasEdgeLength2[uNodeIndex] = FALSE;
585 prTree->m_bHasEdgeLength3[uNodeIndex] = FALSE;
587 prTree->m_bHasHeight[uNodeIndex] = FALSE;
588 prTree->m_dHeight[uNodeIndex] = dInsane;
590 prTree->m_dEdgeLength1[uNodeIndex] = dInsane;
591 prTree->m_dEdgeLength2[uNodeIndex] = dInsane;
592 prTree->m_dEdgeLength3[uNodeIndex] = dInsane;
594 prTree->m_ptrName[uNodeIndex] = NULL;
595 prTree->m_Ids[uNodeIndex] = uInsane;
597 /*** end: InitNode ***/
604 InitCache(uint uCacheCount, tree_t *tree)
608 tree->m_uCacheCount = uCacheCount;
610 tree->m_uNeighbor1 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
611 tree->m_uNeighbor2 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
612 tree->m_uNeighbor3 = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
614 tree->m_Ids = (uint *) CKMALLOC(sizeof(uint) * tree->m_uCacheCount);
616 tree->m_dEdgeLength1 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
617 tree->m_dEdgeLength2 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
618 tree->m_dEdgeLength3 = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
620 tree->m_dHeight = (double *) CKMALLOC(sizeof(double) * tree->m_uCacheCount);
621 tree->m_bHasHeight = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
623 tree->m_bHasEdgeLength1 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
624 tree->m_bHasEdgeLength2 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
625 tree->m_bHasEdgeLength3 = (bool *) CKMALLOC(sizeof(bool) * tree->m_uCacheCount);
627 tree->m_ptrName = (char **) CKMALLOC(sizeof(char *) * tree->m_uCacheCount);
629 for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
630 InitNode(tree, uNodeIndex);
633 /*** end: InitCache ***/
640 * @note Replacing Tree::Clear but no freeing of memory! Just setting
641 * everything to 0/NULL
644 TreeZero(tree_t *tree)
646 assert(NULL != tree);
647 tree->m_uNodeCount = 0;
648 tree->m_uCacheCount = 0;
650 tree->m_uNeighbor1 = NULL;
651 tree->m_uNeighbor2 = NULL;
652 tree->m_uNeighbor3 = NULL;
654 tree->m_dEdgeLength1 = NULL;
655 tree->m_dEdgeLength2 = NULL;
656 tree->m_dEdgeLength3 = NULL;
659 tree->m_dHeight = NULL;
660 tree->m_bHasHeight = NULL;
662 tree->m_bHasEdgeLength1 = NULL;
663 tree->m_bHasEdgeLength2 = NULL;
664 tree->m_bHasEdgeLength3 = NULL;
666 tree->m_ptrName = NULL;
669 tree->m_bRooted = FALSE;
670 tree->m_uRootNodeIndex = 0;
680 TreeToFileNodeRooted(tree_t *tree, uint uNodeIndex, FILE *fp)
684 assert(IsRooted(tree));
685 assert(NULL != tree);
686 bGroup = !IsLeaf(uNodeIndex, tree) || IsRoot(uNodeIndex, tree);
692 if (IsLeaf(uNodeIndex, tree)) {
693 /* File.PutString(GetName(uNodeIndex)); */
694 fprintf(fp, "%s", tree->m_ptrName[uNodeIndex]);
696 TreeToFileNodeRooted(tree, GetLeft(uNodeIndex, tree), fp);
698 TreeToFileNodeRooted(tree, GetRight(uNodeIndex, tree), fp);
704 if (!IsRoot(uNodeIndex, tree)) {
705 uint uParent = GetParent(uNodeIndex, tree);
706 if (HasEdgeLength(uNodeIndex, uParent, tree))
707 fprintf(fp, ":%g", GetEdgeLength(uNodeIndex, uParent, tree));
711 /*** end: TreeToFileNodeRooted ***/
719 TreeValidate(tree_t *tree)
722 assert(NULL != tree);
723 for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
724 ValidateNode(uNodeIndex, tree);
726 /* FIXME: maybe set negative length to zero? What impact would
729 /*** end TreeValidate ***/
738 ValidateNode(uint uNodeIndex, tree_t *tree)
742 assert(NULL != tree);
744 if (uNodeIndex >= tree->m_uNodeCount)
745 Log(&rLog, LOG_FATAL, "ValidateNode(%u), %u nodes", uNodeIndex, tree->m_uNodeCount);
747 uNeighborCount = GetNeighborCount(uNodeIndex, tree);
750 if (2 == uNeighborCount) {
751 if (!tree->m_bRooted) {
752 Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
755 if (uNodeIndex != tree->m_uRootNodeIndex) {
756 Log(&rLog, LOG_FATAL, "Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
757 uNodeIndex, tree->m_uRootNodeIndex);
761 n1 = tree->m_uNeighbor1[uNodeIndex];
762 n2 = tree->m_uNeighbor2[uNodeIndex];
763 n3 = tree->m_uNeighbor3[uNodeIndex];
765 if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3) {
766 Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
768 if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2) {
769 Log(&rLog, LOG_FATAL, "Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
772 if (n1 != NULL_NEIGHBOR)
773 AssertAreNeighbors(uNodeIndex, n1, tree);
774 if (n2 != NULL_NEIGHBOR)
775 AssertAreNeighbors(uNodeIndex, n2, tree);
776 if (n3 != NULL_NEIGHBOR)
777 AssertAreNeighbors(uNodeIndex, n3, tree);
781 if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3)) {
782 Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
784 if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3)) {
785 Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
787 if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2)) {
788 Log(&rLog, LOG_FATAL, "Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
792 if (IsRooted(tree)) {
793 if (NULL_NEIGHBOR == GetParent(uNodeIndex, tree)) {
794 if (uNodeIndex != tree->m_uRootNodeIndex) {
795 Log(&rLog, LOG_FATAL, "Tree::ValiateNode(%u), no parent", uNodeIndex);
797 } else if (GetLeft(GetParent(uNodeIndex, tree), tree) != uNodeIndex &&
798 GetRight(GetParent(uNodeIndex, tree), tree) != uNodeIndex) {
799 Log(&rLog, LOG_FATAL, "Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
803 /*** end: ValidateNode ***/
811 AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2, tree_t *tree)
814 assert(NULL != tree);
816 if (uNodeIndex1 >= tree->m_uNodeCount || uNodeIndex2 >= tree->m_uNodeCount)
817 Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u), are %u nodes",
818 uNodeIndex1, uNodeIndex2, tree->m_uNodeCount);
820 if (tree->m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
821 tree->m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
822 tree->m_uNeighbor3[uNodeIndex1] != uNodeIndex2) {
823 Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
827 if (tree->m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
828 tree->m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
829 tree->m_uNeighbor3[uNodeIndex2] != uNodeIndex1) {
830 Log(&rLog, LOG_FATAL, "AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
834 Has12 = HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
835 Has21 = HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
836 if (Has12 != Has21) {
837 HasEdgeLength(uNodeIndex1, uNodeIndex2, tree);
838 HasEdgeLength(uNodeIndex2, uNodeIndex1, tree);
839 Log(&rLog, LOG_ERROR, "HasEdgeLength(%u, %u)=%c HasEdgeLength(%u, %u)=%c\n",
846 Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, HasEdgeLength not symmetric");
851 double d12 = GetEdgeLength(uNodeIndex1, uNodeIndex2, tree);
852 double d21 = GetEdgeLength(uNodeIndex2, uNodeIndex1, tree);
854 Log(&rLog, LOG_FATAL, "Tree::AssertAreNeighbors, Edge length disagrees %u-%u=%.3g, %u-%u=%.3g",
855 uNodeIndex1, uNodeIndex2, d12,
856 uNodeIndex2, uNodeIndex1, d21);
860 /*** end: AssertAreNeighbors ***/
866 * @note see phyfromfile.cpp in Muscle3.7. Tree has to be a pointer to
867 * an already allocated tree structure.
869 * return non-Zero on failure
871 * leafids will not be set here (FIXME:CHECK if true)
874 MuscleTreeFromFile(tree_t *tree, char *ftree)
879 NEWICK_TOKEN_TYPE NTT;
886 if (NULL == (fp=fopen(ftree, "r"))) {
887 Log(&rLog, LOG_ERROR, "Couldn't open tree-file '%s' for reading. Skipping", ftree);
892 * If we discover that it is unrooted, will convert on the fly.
894 TreeCreateRooted(tree);
896 bEdgeLength = GetGroupFromFile(fp, 0, &dEdgeLength, tree);
899 /* Next token should be either ';' for rooted tree or ',' for
902 NTT = GetToken(fp, szToken, sizeof(szToken));
904 /* If rooted, all done. */
905 if (NTT_Semicolon == NTT) {
907 Log(&rLog, LOG_WARN, " *** Warning *** edge length on root group in Newick file %s\n", ftree);
913 if (NTT_Comma != NTT)
914 Log(&rLog, LOG_FATAL, "Tree::FromFile, expected ';' or ',', got '%s'", szToken);
916 uThirdNode = UnrootFromFile(tree);
917 bEdgeLength = GetGroupFromFile(fp, uThirdNode, &dEdgeLength, tree);
919 SetEdgeLength(0, uThirdNode, dEdgeLength, tree);
925 /*** end MuscleTreeFromFile ***/
933 ExpandCache(tree_t *tree)
935 const uint uNodeCount = 100;
937 uint *uNewNeighbor1, *uNewNeighbor2, *uNewNeighbor3;
939 double *dNewEdgeLength1, *dNewEdgeLength2, *dNewEdgeLength3;
944 bool *bNewHasEdgeLength1, *bNewHasEdgeLength2, *bNewHasEdgeLength3;
947 assert(NULL != tree);
948 uNewCacheCount = tree->m_uCacheCount + uNodeCount;
949 uNewNeighbor1 = (uint *) CKMALLOC(
950 uNewCacheCount * sizeof(uint));
951 uNewNeighbor2 = (uint *) CKMALLOC(
952 uNewCacheCount * sizeof(uint));
953 uNewNeighbor3 = (uint *) CKMALLOC(
954 uNewCacheCount * sizeof(uint));
956 uNewIds = (uint *) CKCALLOC(
957 uNewCacheCount, sizeof(uint));
959 dNewEdgeLength1 = (double *) CKMALLOC(
960 uNewCacheCount * sizeof(double));
961 dNewEdgeLength2 = (double *) CKMALLOC(
962 uNewCacheCount * sizeof(double));
963 dNewEdgeLength3 = (double *) CKMALLOC(
964 uNewCacheCount * sizeof(double));
966 dNewHeight = (double *) CKMALLOC(
967 uNewCacheCount * sizeof(double));
968 bNewHasHeight = (bool *) CKMALLOC(
969 uNewCacheCount * sizeof(bool));
972 bNewHasEdgeLength1 = (bool *) CKMALLOC(
973 uNewCacheCount * sizeof(bool));
974 bNewHasEdgeLength2 = (bool *) CKMALLOC(
975 uNewCacheCount * sizeof(bool));
976 bNewHasEdgeLength3 = (bool *) CKMALLOC(
977 uNewCacheCount * sizeof(bool));
978 ptrNewName = (char **) CKCALLOC(uNewCacheCount, sizeof(char*));
980 if (tree->m_uCacheCount > 0) {
981 uint uUnsignedBytes, uEdgeBytes;
982 uint uBoolBytes, uNameBytes;
984 uUnsignedBytes = tree->m_uCacheCount*sizeof(uint);
986 memcpy(uNewNeighbor1, tree->m_uNeighbor1, uUnsignedBytes);
987 memcpy(uNewNeighbor2, tree->m_uNeighbor2, uUnsignedBytes);
988 memcpy(uNewNeighbor3, tree->m_uNeighbor3, uUnsignedBytes);
990 memcpy(uNewIds, tree->m_Ids, uUnsignedBytes);
992 uEdgeBytes = tree->m_uCacheCount*sizeof(double);
993 memcpy(dNewEdgeLength1, tree->m_dEdgeLength1, uEdgeBytes);
994 memcpy(dNewEdgeLength2, tree->m_dEdgeLength2, uEdgeBytes);
995 memcpy(dNewEdgeLength3, tree->m_dEdgeLength3, uEdgeBytes);
997 memcpy(dNewHeight, tree->m_dHeight, uEdgeBytes);
1000 uBoolBytes = tree->m_uCacheCount*sizeof(bool);
1001 memcpy(bNewHasEdgeLength1, tree->m_bHasEdgeLength1, uBoolBytes);
1002 memcpy(bNewHasEdgeLength2, tree->m_bHasEdgeLength2, uBoolBytes);
1003 memcpy(bNewHasEdgeLength3, tree->m_bHasEdgeLength3, uBoolBytes);
1005 memcpy(bNewHasHeight, tree->m_bHasHeight, uBoolBytes);
1007 uNameBytes = tree->m_uCacheCount*sizeof(char *);
1008 memcpy(ptrNewName, tree->m_ptrName, uNameBytes);
1010 /* similiar to FreeMuscleTree
1013 /* IsLeaf needs m_uNodeCount and all m_uNeighbor's
1017 for (i=0; i<tree->m_uNodeCount; i++) {
1018 if (IsLeaf(i, tree)) {
1020 if (NULL==tree->m_ptrName[i]) {
1021 Log(&rLog, LOG_WARN, "FIXME tree->m_ptrName[%d] is already NULL", i);
1024 CKFREE(tree->m_ptrName[i]);
1028 CKFREE(tree->m_ptrName);
1030 CKFREE(tree->m_uNeighbor1);
1031 CKFREE(tree->m_uNeighbor2);
1032 CKFREE(tree->m_uNeighbor3);
1034 CKFREE(tree->m_Ids);
1036 CKFREE(tree->m_dEdgeLength1);
1037 CKFREE(tree->m_dEdgeLength2);
1038 CKFREE(tree->m_dEdgeLength3);
1040 CKFREE(tree->m_bHasEdgeLength1);
1041 CKFREE(tree->m_bHasEdgeLength2);
1042 CKFREE(tree->m_bHasEdgeLength3);
1044 CKFREE(tree->m_bHasHeight);
1045 CKFREE(tree->m_dHeight);
1049 tree->m_uCacheCount = uNewCacheCount;
1050 tree->m_uNeighbor1 = uNewNeighbor1;
1051 tree->m_uNeighbor2 = uNewNeighbor2;
1052 tree->m_uNeighbor3 = uNewNeighbor3;
1053 tree->m_Ids = uNewIds;
1054 tree->m_dEdgeLength1 = dNewEdgeLength1;
1055 tree->m_dEdgeLength2 = dNewEdgeLength2;
1056 tree->m_dEdgeLength3 = dNewEdgeLength3;
1059 tree->m_dHeight = dNewHeight;
1060 tree->m_bHasHeight = bNewHasHeight;
1062 tree->m_bHasEdgeLength1 = bNewHasEdgeLength1;
1063 tree->m_bHasEdgeLength2 = bNewHasEdgeLength2;
1064 tree->m_bHasEdgeLength3 = bNewHasEdgeLength3;
1066 tree->m_ptrName = ptrNewName;
1069 /*** end: ExpandCache ***/
1075 * Tree must be pointer to an already allocated tree structure
1079 TreeCreateRooted(tree_t *tree)
1083 tree->m_uNodeCount = 1;
1085 tree->m_uNeighbor1[0] = NULL_NEIGHBOR;
1086 tree->m_uNeighbor2[0] = NULL_NEIGHBOR;
1087 tree->m_uNeighbor3[0] = NULL_NEIGHBOR;
1089 tree->m_bHasEdgeLength1[0] = FALSE;
1090 tree->m_bHasEdgeLength2[0] = FALSE;
1091 tree->m_bHasEdgeLength3[0] = FALSE;
1093 tree->m_bHasHeight[0] = FALSE;
1096 tree->m_uRootNodeIndex = 0;
1097 tree->m_bRooted = TRUE;
1103 /*** end: TreeCreateRooted ***/
1110 UnrootFromFile(tree_t *tree)
1114 Log("Before unroot:\n");
1118 if (!tree->m_bRooted)
1119 Log(&rLog, LOG_FATAL, "Tree::Unroot, not rooted");
1121 /* Convention: root node is always node zero */
1122 assert(IsRoot(0, tree));
1123 assert(NULL_NEIGHBOR == tree->m_uNeighbor1[0]);
1125 uThirdNode = tree->m_uNodeCount++;
1127 tree->m_uNeighbor1[0] = uThirdNode;
1128 tree->m_uNeighbor1[uThirdNode] = 0;
1130 tree->m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
1131 tree->m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
1133 tree->m_dEdgeLength1[0] = 0;
1134 tree->m_dEdgeLength1[uThirdNode] = 0;
1135 tree->m_bHasEdgeLength1[uThirdNode] = TRUE;
1137 tree->m_bRooted = FALSE;
1140 Log("After unroot:\n");
1146 /*** end: UnrootFromFile ***/
1155 GetGroupFromFile(FILE *fp, uint uNodeIndex, double *ptrdEdgeLength, tree_t *tree)
1158 NEWICK_TOKEN_TYPE NTT = GetToken(fp, szToken, sizeof(szToken));
1163 /* Group is either leaf name or (left, right). */
1164 if (NTT_String == NTT) {
1165 SetLeafName(uNodeIndex, szToken, tree);
1167 Log(&rLog, LOG_FORCED_DEBUG, "Group is leaf '%s'", szToken);
1169 } else if (NTT_Lparen == NTT) {
1170 const unsigned uLeft = AppendBranch(tree, uNodeIndex);
1171 const unsigned uRight = uLeft + 1;
1173 bool bLeftLength = GetGroupFromFile(fp, uLeft, &dEdgeLength, tree);
1175 /* Left sub-group...
1178 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Got '(', group is compound, expect left sub-group");
1181 Log(&rLog, LOG_FORCED_DEBUG, "Edge length for left sub-group: %.3g", dEdgeLength);
1183 Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for left sub-group");
1187 SetEdgeLength(uNodeIndex, uLeft, dEdgeLength, tree);
1189 /* ... then comma ...
1192 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect comma");
1194 NTT = GetToken(fp, szToken, sizeof(szToken));
1195 if (NTT_Comma != NTT)
1196 Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
1198 /* ...then right sub-group...
1201 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect right sub-group");
1203 bRightLength = GetGroupFromFile(fp, uRight, &dEdgeLength, tree);
1205 SetEdgeLength(uNodeIndex, uRight, dEdgeLength, tree);
1209 Log(&rLog, LOG_FORCED_DEBUG, "Edge length for right sub-group: %.3g", dEdgeLength);
1211 Log(&rLog, LOG_FORCED_DEBUG, "%s", "No edge length for right sub-group");
1214 /* ... then closing parenthesis.
1217 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Expect closing parenthesis (or comma if > 2-ary)");
1219 NTT = GetToken(fp, szToken, sizeof(szToken));
1220 if (NTT_Rparen == NTT)
1222 else if (NTT_Comma == NTT) {
1223 if (ungetc(',', fp)==EOF)
1224 Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1227 Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
1229 Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
1233 /* Group may optionally be followed by edge length.
1235 bEof = FileSkipWhiteX(fp);
1238 if ((c = fgetc(fp))==EOF) /* GetCharX */
1239 Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1241 Log(&rLog, LOG_FORCED_DEBUG, "Character following group, could be colon, is '%c'", c);
1244 NTT = GetToken(fp, szToken, sizeof(szToken));
1245 if (NTT_String != NTT)
1246 Log(&rLog, LOG_FATAL, "Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
1247 *ptrdEdgeLength = atof(szToken);
1250 if (ungetc(c, fp)==EOF)
1251 Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1255 /*** end: GetGroupFromFile ***/
1264 FileSkipWhite(FILE *fp)
1266 bool bEof = FileSkipWhiteX(fp);
1268 Log(&rLog, LOG_FATAL, "%s", "End-of-file skipping white space");
1270 /*** end: FileSkipWhite ***/
1279 FileSkipWhiteX(FILE *fp)
1286 if ((c = fgetc(fp))==EOF) {
1295 if (ungetc(c, fp)==EOF)
1296 Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1302 /*** end: FileSkipWhiteX ***/
1311 GetToken(FILE *fp, char szToken[], uint uBytes)
1314 unsigned uBytesCopied = 0;
1315 NEWICK_TOKEN_TYPE TT;
1317 /* Skip leading white space */
1320 if ((c = fgetc(fp))==EOF) /* GetCharX */
1321 Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1323 /* In case a single-character token */
1339 return NTT_Semicolon;
1345 TT = NTT_SingleQuotedString;
1346 if ((c = fgetc(fp))==EOF) /* GetCharX */
1347 Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1351 TT = NTT_DoubleQuotedString;
1352 if ((c = fgetc(fp))==EOF) /* GetCharX */
1353 Log(&rLog, LOG_FATAL, "%s", "fgetc reached end of file");
1368 if (TT != NTT_Comment) {
1369 if (uBytesCopied < uBytes - 2) {
1370 szToken[uBytesCopied++] = c;
1371 szToken[uBytesCopied] = 0;
1373 Log(&rLog, LOG_FATAL, "Tree::GetToken: input buffer too small, token so far='%s'", szToken);
1376 c = fgetc(fp); /* GetChar */
1377 bEof = (c==EOF ? TRUE : FALSE);
1384 if (0 != strchr("():;,", c)) {
1385 if (ungetc(c, fp)==EOF)
1386 Log(&rLog, LOG_FATAL, "%s" "ungetc failed");
1393 case NTT_SingleQuotedString:
1398 case NTT_DoubleQuotedString:
1405 return GetToken(fp, szToken, uBytes);
1409 Log(&rLog, LOG_FATAL, "Tree::GetToken, invalid TT=%u", TT);
1413 /*** end: GetToken ***/
1421 SetLeafName(unsigned uNodeIndex, const char *ptrName, tree_t *tree)
1423 assert(uNodeIndex < tree->m_uNodeCount);
1424 assert(IsLeaf(uNodeIndex, tree));
1425 free(tree->m_ptrName[uNodeIndex]);
1426 /*LOG_DEBUG("Setting tree->m_ptrName[uNodeIndex=%d] to %s", uNodeIndex, ptrName);*/
1427 tree->m_ptrName[uNodeIndex] = CkStrdup(ptrName);
1429 /*** end: SetLeafName ***/
1436 * Append a new branch. This adds two new nodes and joins them to an
1437 * existing leaf node. Return value is k, new nodes have indexes k and
1442 AppendBranch(tree_t *tree, uint uExistingLeafIndex)
1448 if (0 == tree->m_uNodeCount) {
1449 Log(&rLog, LOG_FATAL, "%s(): %s", __FUNCTION__, "tree has not been created");
1451 assert(NULL_NEIGHBOR == tree->m_uNeighbor2[uExistingLeafIndex]);
1452 assert(NULL_NEIGHBOR == tree->m_uNeighbor3[uExistingLeafIndex]);
1453 assert(uExistingLeafIndex < tree->m_uNodeCount);
1455 if (!IsLeaf(uExistingLeafIndex, tree)) {
1456 Log(&rLog, LOG_FATAL, "AppendBranch(%u): not leaf", uExistingLeafIndex);
1460 if (tree->m_uNodeCount >= tree->m_uCacheCount - 2) {
1463 uNewLeaf1 = tree->m_uNodeCount;
1464 uNewLeaf2 = tree->m_uNodeCount + 1;
1466 tree->m_uNodeCount += 2;
1468 tree->m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
1469 tree->m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
1471 tree->m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
1472 tree->m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
1474 tree->m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
1475 tree->m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
1477 tree->m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
1478 tree->m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
1480 tree->m_dEdgeLength2[uExistingLeafIndex] = 0;
1481 tree->m_dEdgeLength3[uExistingLeafIndex] = 0;
1483 tree->m_dEdgeLength1[uNewLeaf1] = 0;
1484 tree->m_dEdgeLength2[uNewLeaf1] = 0;
1485 tree->m_dEdgeLength3[uNewLeaf1] = 0;
1487 tree->m_dEdgeLength1[uNewLeaf2] = 0;
1488 tree->m_dEdgeLength2[uNewLeaf2] = 0;
1489 tree->m_dEdgeLength3[uNewLeaf2] = 0;
1491 tree->m_bHasEdgeLength1[uNewLeaf1] = FALSE;
1492 tree->m_bHasEdgeLength2[uNewLeaf1] = FALSE;
1493 tree->m_bHasEdgeLength3[uNewLeaf1] = FALSE;
1495 tree->m_bHasEdgeLength1[uNewLeaf2] = FALSE;
1496 tree->m_bHasEdgeLength2[uNewLeaf2] = FALSE;
1497 tree->m_bHasEdgeLength3[uNewLeaf2] = FALSE;
1500 tree->m_bHasHeight[uNewLeaf1] = FALSE;
1501 tree->m_bHasHeight[uNewLeaf2] = FALSE;
1503 tree->m_Ids[uNewLeaf1] = uInsane;
1504 tree->m_Ids[uNewLeaf2] = uInsane;
1508 /*** end: AppendBranch ***/
1516 SetEdgeLength(uint uNodeIndex1, uint uNodeIndex2,
1517 double dLength, tree_t *tree)
1519 assert(uNodeIndex1 < tree->m_uNodeCount && uNodeIndex2 < tree->m_uNodeCount);
1520 assert(IsEdge(uNodeIndex1, uNodeIndex2, tree));
1522 if (tree->m_uNeighbor1[uNodeIndex1] == uNodeIndex2) {
1523 tree->m_dEdgeLength1[uNodeIndex1] = dLength;
1524 tree->m_bHasEdgeLength1[uNodeIndex1] = TRUE;
1525 } else if (tree->m_uNeighbor2[uNodeIndex1] == uNodeIndex2) {
1526 tree->m_dEdgeLength2[uNodeIndex1] = dLength;
1527 tree->m_bHasEdgeLength2[uNodeIndex1] = TRUE;
1530 assert(tree->m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
1531 tree->m_dEdgeLength3[uNodeIndex1] = dLength;
1532 tree->m_bHasEdgeLength3[uNodeIndex1] = TRUE;
1535 if (tree->m_uNeighbor1[uNodeIndex2] == uNodeIndex1) {
1536 tree->m_dEdgeLength1[uNodeIndex2] = dLength;
1537 tree->m_bHasEdgeLength1[uNodeIndex2] = TRUE;
1538 } else if (tree->m_uNeighbor2[uNodeIndex2] == uNodeIndex1) {
1539 tree->m_dEdgeLength2[uNodeIndex2] = dLength;
1540 tree->m_bHasEdgeLength2[uNodeIndex2] = TRUE;
1542 assert(tree->m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
1543 tree->m_dEdgeLength3[uNodeIndex2] = dLength;
1544 tree->m_bHasEdgeLength3[uNodeIndex2] = TRUE;
1547 /*** end: SetEdgeLength ***/
1559 LogTree(tree_t *tree, FILE *fp)
1565 fprintf(fp, "This is a tree with %u nodes, which is ", tree->m_uNodeCount);
1567 if (IsRooted(tree)) {
1568 fprintf(fp, "rooted:\n");
1569 fprintf(fp, "Index Parnt LengthP Left LengthL Right LengthR Id Name\n");
1570 fprintf(fp, "----- ----- ------- ---- ------- ----- ------- ----- ----\n");
1573 fprintf(fp, "unrooted;\n");
1574 fprintf(fp, "Index Nbr_1 Length1 Nbr_2 Length2 Nbr_3 Length3 Id Name\n");
1575 fprintf(fp, "----- ----- ------- ----- ------- ----- ------- ----- ----\n");
1578 for (uNodeIndex = 0; uNodeIndex < tree->m_uNodeCount; ++uNodeIndex) {
1579 fprintf(fp, "%5u ", uNodeIndex);
1580 n1 = tree->m_uNeighbor1[uNodeIndex];
1581 n2 = tree->m_uNeighbor2[uNodeIndex];
1582 n3 = tree->m_uNeighbor3[uNodeIndex];
1584 if (NULL_NEIGHBOR != n1) {
1585 fprintf(fp, "%5u ", n1);
1586 if (tree->m_bHasEdgeLength1[uNodeIndex])
1587 fprintf(fp, "%7.3g ", tree->m_dEdgeLength1[uNodeIndex]);
1594 if (NULL_NEIGHBOR != n2) {
1595 fprintf(fp, "%5u ", n2);
1596 if (tree->m_bHasEdgeLength2[uNodeIndex])
1597 fprintf(fp, "%7.3g ", tree->m_dEdgeLength2[uNodeIndex]);
1604 if (NULL_NEIGHBOR != n3) {
1605 fprintf(fp, "%5u ", n3);
1606 if (tree->m_bHasEdgeLength3[uNodeIndex])
1607 fprintf(fp, "%7.3g ", tree->m_dEdgeLength3[uNodeIndex]);
1614 if (tree->m_Ids != 0 && IsLeaf(uNodeIndex, tree)) {
1615 unsigned uId = tree->m_Ids[uNodeIndex];
1619 fprintf(fp, "%5u", uId);
1624 if (tree->m_bRooted && uNodeIndex == tree->m_uRootNodeIndex)
1625 fprintf(fp, " [ROOT] ");
1626 ptrName = tree->m_ptrName[uNodeIndex];
1628 fprintf(fp, " %s", ptrName);
1633 /*** end: LogTree ***/
1639 * replaces m_uLeafCount
1642 GetLeafCount(tree_t *tree)
1646 return (tree->m_uNodeCount+1)/2;
1648 /*** GetLeafCount ***/
1656 GetNodeCount(tree_t *tree)
1660 return 2*(GetLeafCount(tree)) - 1;
1662 /*** end: GetNodeCount ***/
1669 GetNeighbor(uint uNodeIndex, uint uNeighborSubscript, tree_t *prTree)
1671 assert(uNodeIndex < prTree->m_uNodeCount);
1672 switch (uNeighborSubscript)
1675 return prTree->m_uNeighbor1[uNodeIndex];
1677 return prTree->m_uNeighbor2[uNodeIndex];
1679 return prTree->m_uNeighbor3[uNodeIndex];
1681 Log(&rLog, LOG_FATAL, "Internal error in %s: sub=%u", __FUNCTION__, uNeighborSubscript);
1682 return NULL_NEIGHBOR;
1684 /*** end: GetNeighbor ***/
1694 SetLeafId(tree_t *tree, uint uNodeIndex, uint uId)
1696 assert(uNodeIndex < tree->m_uNodeCount);
1697 assert(IsLeaf(uNodeIndex, tree));
1698 tree->m_Ids[uNodeIndex] = uId;
1700 /*** end: SetLeafId ***/
1707 GetRootNodeIndex(tree_t *tree)
1710 return tree->m_uRootNodeIndex;
1712 /*** end: GetRootNodeIndex ***/
1717 * @note avoid usage if you want to iterate over all indices, because
1722 LeafIndexToNodeIndex(uint uLeafIndex, tree_t *prTree) {
1723 uint uLeafCount = 0;
1724 unsigned uNodeCount = GetNodeCount(prTree);
1727 for (uNodeIndex = 0; uNodeIndex < uNodeCount; uNodeIndex++) {
1728 if (IsLeaf(uNodeIndex, prTree)) {
1729 if (uLeafCount == uLeafIndex) {
1736 Log(&rLog, LOG_FATAL, "Internal error: node index out of range");
1739 /*** end: LeafIndexToNodeIndex ***/
1745 * @brief Append a (source) tree to a (dest) tree to a given node
1746 * which will be replaced. All other nodes in that tree will stay the
1749 * @param[out] prDstTree
1750 * The tree to append to
1751 * @param[in] uDstTreeReplaceNodeIndex
1752 * Dest tree node to which source tree will be appended
1753 * @param[in] prSrcTree
1754 * The tree to append
1756 * @note No nodes inside prDstTree will change except
1757 * uDstTreeReplaceNodeIndex
1760 * @warning: Function won't check or touch the m_Ids/leaf-indices!
1761 * That means if you want to join two trees with leaf indices 1-10 and
1762 * 1-10 your m_Ids/leaf-indices won't be unique anymore and the
1763 * association between your sequences and the tree are broken. Make
1764 * sure m_Ids are unique before calling me.
1766 * The easiest would have been to do this by recursively calling
1767 * AppendBranch() (after adding uSrcTreeNodeIndex as extra argument to
1768 * this function). But recursion is evil. Yet another version would be
1769 * to setup all the data and call MuscleTreeCreate() to create a third
1770 * tree, which seems complicated and wasteful. The approach taken here
1771 * is the following: increase dest tree memory, copy over each src
1772 * tree node data and update the indices and counters. This is tricky
1773 * and has a lot of potential for bugs if tree interface is changed
1774 * (and even if it isn't).
1778 AppendTree(tree_t *prDstTree, uint uDstTreeReplaceNodeIndex, tree_t *prSrcTree)
1780 uint uSrcTreeNodeIndex;
1781 uint uOrgDstTreeNodeCount;
1783 assert(NULL!=prDstTree);
1784 assert(NULL!=prSrcTree);
1785 assert(uDstTreeReplaceNodeIndex < prDstTree->m_uNodeCount);
1786 assert(IsLeaf(uDstTreeReplaceNodeIndex, prDstTree));
1787 assert(IsRooted(prDstTree) && IsRooted(prSrcTree));
1790 uOrgDstTreeNodeCount = prDstTree->m_uNodeCount;
1793 /* increase dest tree memory
1795 while (prDstTree->m_uCacheCount
1797 (GetNodeCount(prDstTree) + GetNodeCount(prSrcTree))) {
1798 ExpandCache(prDstTree);
1802 /* copy all src tree nodes
1805 for (uSrcTreeNodeIndex=0;
1806 uSrcTreeNodeIndex<GetNodeCount(prSrcTree); uSrcTreeNodeIndex++) {
1807 uint uNewDstNodeIndex = prDstTree->m_uNodeCount;
1809 /* distinguish 4 cases for src nodes to copy:
1811 * 1. src node is the only node, i.e. root & leaf
1813 * 2. src node is root: set only left & right, but not parent
1814 * and just replace the given dest index
1816 * 3. src node is leaf: set only parent
1818 * 4. src node is internal node: update all three neighbours
1820 * FIXME: this is messy. Is there a cleaner way to do this by
1821 * merging all cases?
1824 if (IsRoot(uSrcTreeNodeIndex, prSrcTree) && IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
1825 /* special case: if this is the only member in
1826 * tree, i.e. it's root and leaf. Copy leaf name and leaf
1827 * id. No neighbours to update
1830 /* free dst node name if set */
1831 if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
1832 CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
1835 prDstTree->m_ptrName[uDstTreeReplaceNodeIndex] =
1836 CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
1838 prDstTree->m_Ids[uDstTreeReplaceNodeIndex] =
1839 prSrcTree->m_Ids[uSrcTreeNodeIndex];
1841 /* no updated of uNodeCount, because we used the replace node */
1844 Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the only src leaf node: parent=%d (%f)",
1845 uDstTreeReplaceNodeIndex,
1846 prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex]);
1849 } else if (IsRoot(uSrcTreeNodeIndex, prSrcTree)) {
1850 /* src node is root: replace uDstTreeReplaceNodeIndex
1851 * (not uNewDstNodeIndex) with src root, i.e. the
1852 * uDstTreeReplaceNodeIndex becomes an internal node now.
1854 * We only have two neighbours 2 & 3 (no parent). Keep old
1855 * parent info (neighbor 1).
1858 /* free dst node name if set */
1859 if (NULL != prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]) {
1860 CKFREE(prDstTree->m_ptrName[uDstTreeReplaceNodeIndex]);
1863 prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex] =
1864 prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1865 prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex] =
1866 prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1868 prDstTree->m_bHasEdgeLength2[uDstTreeReplaceNodeIndex] =
1869 prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
1870 prDstTree->m_bHasEdgeLength3[uDstTreeReplaceNodeIndex] =
1871 prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
1873 prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex] =
1874 prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
1875 prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex] =
1876 prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
1878 /* make Id invalid */
1879 prDstTree->m_Ids[uDstTreeReplaceNodeIndex] = uInsane;
1881 /* no updated of uNodeCount, because we used the replace node */
1884 Log(&rLog, LOG_FORCED_DEBUG, "Updated dst rpl node %d with the src root node: (untouched) parent=%d (%f) left=%d (%f) right=%d (%f)",
1885 uDstTreeReplaceNodeIndex,
1886 prDstTree->m_uNeighbor1[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength1[uDstTreeReplaceNodeIndex],
1887 prDstTree->m_uNeighbor2[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength2[uDstTreeReplaceNodeIndex],
1888 prDstTree->m_uNeighbor3[uDstTreeReplaceNodeIndex], prDstTree->m_dEdgeLength3[uDstTreeReplaceNodeIndex]);
1891 } else if (IsLeaf(uSrcTreeNodeIndex, prSrcTree)) {
1892 /* src node is a leaf, which means we only have one
1893 * neighbour, and that is its parent, i.e. n1
1897 /* initialise/zero new node to default values
1899 InitNode(prDstTree, uNewDstNodeIndex);
1902 /* update m_ptrName/leaf name
1904 prDstTree->m_ptrName[uNewDstNodeIndex] =
1905 CkStrdup(GetLeafName(uSrcTreeNodeIndex, prSrcTree));
1907 /* update parent node (beware of special case: parent was
1909 if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
1910 prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1911 uDstTreeReplaceNodeIndex;
1913 prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1914 prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1917 /* update edge length info to parent
1919 prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
1920 prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
1921 prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
1922 prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
1924 /* update sequence/object id
1926 prDstTree->m_Ids[uNewDstNodeIndex] =
1927 prSrcTree->m_Ids[uSrcTreeNodeIndex];
1929 /* we used a new node so increase their count */
1930 prDstTree->m_uNodeCount += 1;
1933 Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with a src leaf node: parent=%d (%f)",
1935 prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex]);
1939 /* src node is not root neither leaf, means we have an
1940 * internal node. Update all neighbour info
1944 /* initialise/zero node values to default values
1946 InitNode(prDstTree, uNewDstNodeIndex);
1950 /* parent: special case if parent was src tree root */
1951 if (IsRoot(prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex], prSrcTree)) {
1952 prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1953 uDstTreeReplaceNodeIndex;
1955 prDstTree->m_uNeighbor1[uNewDstNodeIndex] =
1956 prSrcTree->m_uNeighbor1[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1959 prDstTree->m_uNeighbor2[uNewDstNodeIndex] =
1960 prSrcTree->m_uNeighbor2[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1962 prDstTree->m_uNeighbor3[uNewDstNodeIndex] =
1963 prSrcTree->m_uNeighbor3[uSrcTreeNodeIndex] + uOrgDstTreeNodeCount;
1965 /* update edge length info
1968 prDstTree->m_bHasEdgeLength1[uNewDstNodeIndex] =
1969 prSrcTree->m_bHasEdgeLength1[uSrcTreeNodeIndex];
1970 prDstTree->m_dEdgeLength1[uNewDstNodeIndex] =
1971 prSrcTree->m_dEdgeLength1[uSrcTreeNodeIndex];
1973 prDstTree->m_bHasEdgeLength2[uNewDstNodeIndex] =
1974 prSrcTree->m_bHasEdgeLength2[uSrcTreeNodeIndex];
1975 prDstTree->m_dEdgeLength2[uNewDstNodeIndex] =
1976 prSrcTree->m_dEdgeLength2[uSrcTreeNodeIndex];
1978 prDstTree->m_bHasEdgeLength3[uNewDstNodeIndex] =
1979 prSrcTree->m_bHasEdgeLength3[uSrcTreeNodeIndex];
1980 prDstTree->m_dEdgeLength3[uNewDstNodeIndex] =
1981 prSrcTree->m_dEdgeLength3[uSrcTreeNodeIndex];
1983 /* we used a new node so increase their count */
1984 prDstTree->m_uNodeCount += 1;
1987 Log(&rLog, LOG_FORCED_DEBUG, "Updated dst node %d with an internal src node: parent=%d (%f) left=%d (%f) right=%d (%f)",
1989 prDstTree->m_uNeighbor1[uNewDstNodeIndex], prDstTree->m_dEdgeLength1[uNewDstNodeIndex],
1990 prDstTree->m_uNeighbor2[uNewDstNodeIndex], prDstTree->m_dEdgeLength2[uNewDstNodeIndex],
1991 prDstTree->m_uNeighbor3[uNewDstNodeIndex], prDstTree->m_dEdgeLength3[uNewDstNodeIndex]);
1996 /* end for each src tree node */
2000 * m_uRootNodeIndex stays the same.
2002 * No need to touch m_uCacheCount.
2006 Log(&rLog, LOG_FATAL, "Internal error: Height usage not implemented in %s", __FUNCTION__);
2011 TreeValidate(prDstTree);
2016 /*** end: AppendTree() ***/