--- /dev/null
+#include "muscle.h"\r
+#include "tree.h"\r
+#include "edgelist.h"\r
+\r
+#define TRACE 0\r
+\r
+struct EdgeInfo\r
+ {\r
+ EdgeInfo()\r
+ {\r
+ m_bSet = false;\r
+ }\r
+// Is data in this structure valid (i.e, has been set)?\r
+ bool m_bSet;\r
+\r
+// Node at start of this edge\r
+ unsigned m_uNode1;\r
+\r
+// Node at end of this edge\r
+ unsigned m_uNode2;\r
+\r
+// Maximum distance from Node2 to a leaf\r
+ double m_dMaxDistToLeaf;\r
+\r
+// Sum of distances from Node2 to all leaves under Node2\r
+ double m_dTotalDistToLeaves;\r
+\r
+// Next node on path from Node2 to most distant leaf\r
+ unsigned m_uMaxStep;\r
+\r
+// Most distant leaf from Node2 (used for debugging only)\r
+ unsigned m_uMostDistantLeaf;\r
+\r
+// Number of leaves under Node2\r
+ unsigned m_uLeafCount;\r
+ };\r
+\r
+static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,\r
+ unsigned *ptruNode1, unsigned *ptruNode2,\r
+ double *ptrdLength1, double *ptrdLength2);\r
+static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,\r
+ unsigned *ptruNode1, unsigned *ptruNode2,\r
+ double *ptrdLength1, double *ptrdLength2);\r
+\r
+static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)\r
+ {\r
+ Log("Node1 Node2 MaxDist TotDist MostDist LeafCount Step\n");\r
+ Log("----- ----- ------- ------- -------- --------- ----\n");\r
+ // 12345 12345 1234567 1234567 12345678 123456789\r
+\r
+ for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)\r
+ for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)\r
+ {\r
+ const EdgeInfo &EI = EIs[uNode][uNeighbor];\r
+ if (!EI.m_bSet)\r
+ continue;\r
+ Log("%5u %5u %7.3g %7.3g %8u %9u",\r
+ EI.m_uNode1,\r
+ EI.m_uNode2,\r
+ EI.m_dMaxDistToLeaf,\r
+ EI.m_dTotalDistToLeaves,\r
+ EI.m_uMostDistantLeaf,\r
+ EI.m_uLeafCount);\r
+ if (NULL_NEIGHBOR != EI.m_uMaxStep)\r
+ Log(" %4u", EI.m_uMaxStep);\r
+ Log("\n");\r
+ }\r
+ }\r
+\r
+static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)\r
+ {\r
+ const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);\r
+ EdgeInfo &EI = EIs[uNode1][uNeighborIndex];\r
+ EI.m_uNode1 = uNode1;\r
+ EI.m_uNode2 = uNode2;\r
+\r
+ if (tree.IsLeaf(uNode2))\r
+ {\r
+ EI.m_dMaxDistToLeaf = 0;\r
+ EI.m_dTotalDistToLeaves = 0;\r
+ EI.m_uMaxStep = NULL_NEIGHBOR;\r
+ EI.m_uMostDistantLeaf = uNode2;\r
+ EI.m_uLeafCount = 1;\r
+ EI.m_bSet = true;\r
+ return;\r
+ }\r
+\r
+ double dMaxDistToLeaf = -1e29;\r
+ double dTotalDistToLeaves = 0.0;\r
+ unsigned uLeafCount = 0;\r
+ unsigned uMostDistantLeaf = NULL_NEIGHBOR;\r
+ unsigned uMaxStep = NULL_NEIGHBOR;\r
+\r
+ const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);\r
+ for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
+ {\r
+ const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);\r
+ if (uNode3 == uNode1)\r
+ continue;\r
+ const EdgeInfo &EINext = EIs[uNode2][uSub];\r
+ if (!EINext.m_bSet)\r
+ Quit("CalcInfo: internal error, dist %u->%u not known",\r
+ uNode2, uNode3);\r
+\r
+\r
+ uLeafCount += EINext.m_uLeafCount;\r
+\r
+ const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);\r
+ const double dTotalDist = EINext.m_dTotalDistToLeaves +\r
+ EINext.m_uLeafCount*dEdgeLength;\r
+ dTotalDistToLeaves += dTotalDist;\r
+\r
+ const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;\r
+ if (dDist > dMaxDistToLeaf)\r
+ {\r
+ dMaxDistToLeaf = dDist;\r
+ uMostDistantLeaf = EINext.m_uMostDistantLeaf;\r
+ uMaxStep = uNode3;\r
+ }\r
+ }\r
+ if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||\r
+ 0 == uLeafCount)\r
+ Quit("CalcInfo: internal error 2");\r
+\r
+ const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);\r
+ EI.m_dMaxDistToLeaf = dMaxDistToLeaf;\r
+ EI.m_dTotalDistToLeaves = dTotalDistToLeaves;\r
+ EI.m_uMaxStep = uMaxStep;\r
+ EI.m_uMostDistantLeaf = uMostDistantLeaf;\r
+ EI.m_uLeafCount = uLeafCount;\r
+ EI.m_bSet = true;\r
+ }\r
+\r
+static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,\r
+ unsigned uNodeTo)\r
+ {\r
+ const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);\r
+ return EIs[uNodeFrom][uSub].m_bSet;\r
+ }\r
+\r
+static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,\r
+ unsigned uNodeTo)\r
+ {\r
+ const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);\r
+ for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
+ {\r
+ unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);\r
+ if (uNeighborIndex == uNodeFrom)\r
+ continue;\r
+ if (!EIs[uNodeTo][uSub].m_bSet)\r
+ return false;\r
+ }\r
+ return true;\r
+ }\r
+\r
+void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,\r
+ double *ptrdLength1, double *ptrdLength2,\r
+ ROOT RootMethod)\r
+ {\r
+#if TRACE\r
+ tree.LogMe();\r
+#endif\r
+ if (tree.IsRooted())\r
+ Quit("FindRoot: tree already rooted");\r
+\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+ const unsigned uLeafCount = tree.GetLeafCount();\r
+\r
+ if (uNodeCount < 2)\r
+ Quit("Root: don't support trees with < 2 edges");\r
+\r
+ EdgeInfo **EIs = new EdgeInfo *[uNodeCount];\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ EIs[uNodeIndex] = new EdgeInfo[3];\r
+\r
+ EdgeList Edges;\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ if (tree.IsLeaf(uNodeIndex))\r
+ {\r
+ unsigned uParent = tree.GetNeighbor1(uNodeIndex);\r
+ Edges.Add(uParent, uNodeIndex);\r
+ }\r
+\r
+#if TRACE\r
+ Log("Edges: ");\r
+ Edges.LogMe();\r
+#endif\r
+\r
+// Main loop: iterate until all distances known\r
+ double dAllMaxDist = -1e20;\r
+ unsigned uMaxFrom = NULL_NEIGHBOR;\r
+ unsigned uMaxTo = NULL_NEIGHBOR;\r
+ for (;;)\r
+ {\r
+ EdgeList NextEdges;\r
+\r
+#if TRACE\r
+ Log("\nTop of main loop\n");\r
+ Log("Edges: ");\r
+ Edges.LogMe();\r
+ Log("MDs:\n");\r
+ ListEIs(EIs, uNodeCount);\r
+#endif\r
+\r
+ // For all edges\r
+ const unsigned uEdgeCount = Edges.GetCount();\r
+ if (0 == uEdgeCount)\r
+ break;\r
+ for (unsigned n = 0; n < uEdgeCount; ++n)\r
+ {\r
+ unsigned uNodeFrom;\r
+ unsigned uNodeTo;\r
+ Edges.GetEdge(n, &uNodeFrom, &uNodeTo);\r
+\r
+ CalcInfo(tree, uNodeFrom, uNodeTo, EIs);\r
+#if TRACE\r
+ Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);\r
+#endif\r
+ const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);\r
+ for (unsigned i = 0; i < uNeighborCount; ++i)\r
+ {\r
+ const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);\r
+ if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&\r
+ AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))\r
+ NextEdges.Add(uNeighborIndex, uNodeFrom);\r
+ }\r
+ }\r
+ Edges.Copy(NextEdges);\r
+ }\r
+\r
+#if TRACE\r
+ ListEIs(EIs, uNodeCount);\r
+#endif\r
+\r
+ switch (RootMethod)\r
+ {\r
+ case ROOT_MidLongestSpan:\r
+ RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,\r
+ ptrdLength1, ptrdLength2);\r
+ break;\r
+\r
+ case ROOT_MinAvgLeafDist:\r
+ RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,\r
+ ptrdLength1, ptrdLength2);\r
+ break;\r
+\r
+ default:\r
+ Quit("Invalid RootMethod=%d", RootMethod);\r
+ }\r
+\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ delete[] EIs[uNodeIndex];\r
+ delete[] EIs;\r
+ }\r
+\r
+static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,\r
+ unsigned *ptruNode1, unsigned *ptruNode2,\r
+ double *ptrdLength1, double *ptrdLength2)\r
+ {\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+ unsigned uLeaf1 = NULL_NEIGHBOR;\r
+ unsigned uMostDistantLeaf = NULL_NEIGHBOR;\r
+ double dMaxDist = -VERY_LARGE_DOUBLE;\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ {\r
+ if (!tree.IsLeaf(uNodeIndex))\r
+ continue;\r
+\r
+ const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);\r
+ if (NULL_NEIGHBOR == uNode2)\r
+ Quit("RootByMidLongestSpan: internal error 0");\r
+ const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);\r
+ const EdgeInfo &EI = EIs[uNodeIndex][0];\r
+ if (!EI.m_bSet)\r
+ Quit("RootByMidLongestSpan: internal error 1");\r
+ if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)\r
+ Quit("RootByMidLongestSpan: internal error 2");\r
+ const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;\r
+ if (dSpanLength > dMaxDist)\r
+ {\r
+ dMaxDist = dSpanLength;\r
+ uLeaf1 = uNodeIndex;\r
+ uMostDistantLeaf = EI.m_uMostDistantLeaf;\r
+ }\r
+ }\r
+ \r
+ if (NULL_NEIGHBOR == uLeaf1)\r
+ Quit("RootByMidLongestSpan: internal error 3");\r
+\r
+ const double dTreeHeight = dMaxDist/2.0;\r
+ unsigned uNode1 = uLeaf1;\r
+ unsigned uNode2 = tree.GetNeighbor1(uLeaf1);\r
+ double dAccumSpanLength = 0;\r
+\r
+#if TRACE\r
+ Log("RootByMidLongestSpan: span=%u", uLeaf1);\r
+#endif\r
+\r
+ for (;;)\r
+ {\r
+ const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);\r
+#if TRACE\r
+ Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);\r
+#endif\r
+ if (dAccumSpanLength + dEdgeLength >= dTreeHeight)\r
+ {\r
+ *ptruNode1 = uNode1;\r
+ *ptruNode2 = uNode2;\r
+ *ptrdLength1 = dTreeHeight - dAccumSpanLength;\r
+ *ptrdLength2 = dEdgeLength - *ptrdLength1;\r
+#if TRACE\r
+ {\r
+ const EdgeInfo &EI = EIs[uLeaf1][0];\r
+ Log("...\n");\r
+ Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",\r
+ uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);\r
+ }\r
+#endif\r
+ return;\r
+ }\r
+\r
+ if (tree.IsLeaf(uNode2))\r
+ Quit("RootByMidLongestSpan: internal error 4");\r
+\r
+ dAccumSpanLength += dEdgeLength;\r
+ const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);\r
+ const EdgeInfo &EI = EIs[uNode1][uSub];\r
+ if (!EI.m_bSet)\r
+ Quit("RootByMidLongestSpan: internal error 5");\r
+\r
+ uNode1 = uNode2;\r
+ uNode2 = EI.m_uMaxStep;\r
+ }\r
+ }\r
+\r
+/***\r
+Root by balancing average distance to leaves.\r
+The root is a point p such that the average\r
+distance to leaves to the left of p is the\r
+same as the to the right.\r
+\r
+This is the method used by CLUSTALW, which\r
+was originally used in PROFILEWEIGHT:\r
+\r
+ Thompson et al. (1994) CABIOS (10) 1, 19-29.\r
+***/\r
+\r
+static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,\r
+ unsigned *ptruNode1, unsigned *ptruNode2,\r
+ double *ptrdLength1, double *ptrdLength2)\r
+ {\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+ const unsigned uLeafCount = tree.GetLeafCount();\r
+ unsigned uNode1 = NULL_NEIGHBOR;\r
+ unsigned uNode2 = NULL_NEIGHBOR;\r
+ double dMinHeight = VERY_LARGE_DOUBLE;\r
+ double dBestLength1 = VERY_LARGE_DOUBLE;\r
+ double dBestLength2 = VERY_LARGE_DOUBLE;\r
+\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ {\r
+ const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);\r
+ for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
+ {\r
+ const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);\r
+\r
+ // Avoid visiting same edge a second time in reversed order.\r
+ if (uNeighborIndex < uNodeIndex)\r
+ continue;\r
+\r
+ const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);\r
+ if (NULL_NEIGHBOR == uSubRev)\r
+ Quit("RootByMinAvgLeafDist, internal error 1");\r
+\r
+ // Get info for edges Node1->Node2 and Node2->Node1 (reversed)\r
+ const EdgeInfo &EI = EIs[uNodeIndex][uSub];\r
+ const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];\r
+\r
+ if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||\r
+ EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)\r
+ Quit("RootByMinAvgLeafDist, internal error 2");\r
+ if (!EI.m_bSet)\r
+ Quit("RootByMinAvgLeafDist, internal error 3");\r
+ if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)\r
+ Quit("RootByMinAvgLeafDist, internal error 4");\r
+\r
+ const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);\r
+ if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))\r
+ Quit("RootByMinAvgLeafDist, internal error 5");\r
+\r
+ // Consider point p on edge 12 in tree (1=Node, 2=Neighbor).\r
+ //\r
+ // ----- ----\r
+ // | |\r
+ // 1----p--2\r
+ // | |\r
+ // ----- ----\r
+ //\r
+ // Define:\r
+ // ADLp = average distance to leaves to left of point p.\r
+ // ADRp = average distance to leaves to right of point p.\r
+ // L = edge length = distance 12\r
+ // x = distance 1p\r
+ // So distance p2 = L - x.\r
+ // Average distance from p to leaves on left of p is:\r
+ // ADLp = ADL1 + x\r
+ // Average distance from p to leaves on right of p is:\r
+ // ADRp = ADR2 + (L - x)\r
+ // To be a root, we require these two distances to be equal,\r
+ // ADLp = ADRp\r
+ // ADL1 + x = ADR2 + (L - x)\r
+ // Solving for x,\r
+ // x = (ADR2 - ADL1 + L)/2\r
+ // If 0 <= x <= L, we can place the root on edge 12.\r
+\r
+ const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;\r
+ const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;\r
+\r
+ const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;\r
+ if (x >= 0 && x <= dEdgeLength)\r
+ {\r
+ const double dLength1 = x;\r
+ const double dLength2 = dEdgeLength - x;\r
+ const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;\r
+ const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;\r
+ const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;\r
+#if TRACE\r
+ Log("Candidate root Node1=%u Node2=%u Height=%g\n",\r
+ uNodeIndex, uNeighborIndex, dHeight);\r
+#endif\r
+ if (dHeight < dMinHeight)\r
+ {\r
+ uNode1 = uNodeIndex;\r
+ uNode2 = uNeighborIndex;\r
+ dBestLength1 = dLength1;\r
+ dBestLength2 = dLength2;\r
+ dMinHeight = dHeight;\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)\r
+ Quit("RootByMinAvgLeafDist, internal error 6");\r
+\r
+#if TRACE\r
+ Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",\r
+ uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);\r
+#endif\r
+\r
+ *ptruNode1 = uNode1;\r
+ *ptruNode2 = uNode2;\r
+ *ptrdLength1 = dBestLength1;\r
+ *ptrdLength2 = dBestLength2;\r
+ }\r
+\r
+void FixRoot(Tree &tree, ROOT Method)\r
+ {\r
+ if (!tree.IsRooted())\r
+ Quit("FixRoot: expecting rooted tree");\r
+\r
+ // Pseudo-root: keep root assigned by clustering\r
+ if (ROOT_Pseudo == Method)\r
+ return;\r
+\r
+ tree.UnrootByDeletingRoot();\r
+ tree.RootUnrootedTree(Method);\r
+ }\r