--- /dev/null
+#include "muscle.h"\r
+#include "tree.h"\r
+#include <math.h>\r
+\r
+#define TRACE 0\r
+\r
+/***\r
+Sequence weights derived from a tree using Gotoh's\r
+three-way method.\r
+\r
+ Gotoh (1995) CABIOS 11(5), 543-51.\r
+\r
+Each edge e is assigned a weight w(e).\r
+\r
+Consider first a tree with three leaves A,B and C\r
+having branch lengths a, b and c, as follows.\r
+\r
+ B\r
+ |\r
+ b\r
+ |\r
+ A---a---R---c---C\r
+\r
+The internal node is denoted by R.\r
+\r
+Define:\r
+\r
+ S = (ab + ca + ab)\r
+ x = bc(a + b)(a + c)\r
+ y = a(b + c)FS\r
+\r
+Here F is a tunable normalization factor which is\r
+approximately 1.0. Then the edge weight for AR\r
+is computed as:\r
+\r
+ w(AR) = sqrt(x/y)\r
+\r
+Similar expressions for the other edges follow by\r
+symmetry.\r
+\r
+For a tree with more than three edges, the weight\r
+of an edge that ends in a leaf is computed from\r
+the three-way tree that includes the edge and\r
+its two neighbors. The weight of an internal edge\r
+is computed as the product of the weights for that\r
+edge derived from the two three-way subtrees that\r
+include that edge.\r
+\r
+For example, consider the following tree.\r
+\r
+ B\r
+ |\r
+ A--R--V--C\r
+ |\r
+ D\r
+\r
+Here, w(RV) is computed as the product of the\r
+two values for w(RV) derived from the three-way\r
+trees with leaves ABV and RCD respectively.\r
+\r
+The calculation is done using "Gotoh lengths",\r
+not the real edge lengths.\r
+\r
+The Gotoh length G of a directed edge is calculated\r
+recursively as:\r
+\r
+ G = d + LR/(L + R)\r
+\r
+where d is the length of the edge, and L and R are\r
+the Gotoh lengths of the left and right edges adjoining\r
+the terminal end of the edge. If the edge terminates on\r
+a leaf, then G=d.\r
+\r
+Pairwise sequence weights are computed as the\r
+product of edge weights on the path that connects\r
+their leaves.\r
+\r
+If the tree is split into two subtrees by deleting\r
+a given edge e, then the pairwise weights factorize.\r
+For operations on profiles formed from the two\r
+subtrees, it is possible to assign a weight to a\r
+sequence as the product of edge weights on a path\r
+from e to its leaf.\r
+***/\r
+\r
+// The xxxUnrooted functions present a rooted tree as\r
+// if it had been unrooted by deleting the root node.\r
+static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
+ unsigned uNode2)\r
+ {\r
+ if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+ Quit("GetFirstNeighborUnrooted, should never be called with root");\r
+ if (!tree.IsEdge(uNode1, uNode2))\r
+ {\r
+ if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+ !tree.IsRoot(tree.GetParent(uNode2)))\r
+ Quit("GetFirstNeighborUnrooted, not edge");\r
+ const unsigned uRoot = tree.GetRootNodeIndex();\r
+ return tree.GetFirstNeighbor(uNode1, uRoot);\r
+ }\r
+\r
+ unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);\r
+ if (tree.IsRoot(uNeighbor))\r
+ return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
+ return uNeighbor;\r
+ }\r
+\r
+static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
+ unsigned uNode2)\r
+ {\r
+ if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+ Quit("GetFirstNeighborUnrooted, should never be called with root");\r
+ if (!tree.IsEdge(uNode1, uNode2))\r
+ {\r
+ if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+ !tree.IsRoot(tree.GetParent(uNode2)))\r
+ Quit("GetFirstNeighborUnrooted, not edge");\r
+ const unsigned uRoot = tree.GetRootNodeIndex();\r
+ return tree.GetSecondNeighbor(uNode1, uRoot);\r
+ }\r
+\r
+ unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);\r
+ if (tree.IsRoot(uNeighbor))\r
+ return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
+ return uNeighbor;\r
+ }\r
+\r
+static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
+ unsigned uSub)\r
+ {\r
+ unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);\r
+ if (tree.IsRoot(uNeighbor))\r
+ return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
+ return uNeighbor;\r
+ }\r
+\r
+static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,\r
+ unsigned uNode2)\r
+ {\r
+ if (tree.IsEdge(uNode1, uNode2))\r
+ return tree.GetNeighborSubscript(uNode1, uNode2);\r
+ if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+ !tree.IsRoot(tree.GetParent(uNode2)))\r
+ Quit("GetNeighborSubscriptUnrooted, not edge");\r
+ for (unsigned uSub = 0; uSub < 3; ++uSub)\r
+ if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)\r
+ return uSub;\r
+ Quit("GetNeighborSubscriptUnrooted, not a neighbor");\r
+ return NULL_NEIGHBOR;\r
+ }\r
+\r
+static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,\r
+ unsigned uNode2)\r
+ {\r
+ if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+ Quit("GetEdgeLengthUnrooted, should never be called with root");\r
+ if (!tree.IsEdge(uNode1, uNode2))\r
+ {\r
+ if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+ !tree.IsRoot(tree.GetParent(uNode2)))\r
+ Quit("GetEdgeLengthUnrooted, not edge");\r
+\r
+ const unsigned uRoot = tree.GetRootNodeIndex();\r
+ return tree.GetEdgeLength(uNode1, uRoot) +\r
+ tree.GetEdgeLength(uNode2, uRoot);\r
+ }\r
+ return tree.GetEdgeLength(uNode1, uNode2);\r
+ }\r
+\r
+double GetGotohLength(const Tree &tree, unsigned R, unsigned A)\r
+ {\r
+ double dThis = GetEdgeLengthUnrooted(tree, R, A);\r
+\r
+// Enforce non-negative edge lengths\r
+ if (dThis < 0)\r
+ dThis = 0;\r
+\r
+ if (tree.IsLeaf(A))\r
+ return dThis;\r
+\r
+ const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);\r
+ const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);\r
+ const double dFirst = GetGotohLength(tree, A, uFirst);\r
+ const double dSecond = GetGotohLength(tree, A, uSecond);\r
+ const double dSum = dFirst + dSecond;\r
+ const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;\r
+ return dThis + dThird;\r
+ }\r
+\r
+// Return weight of edge A-R in three-way subtree that has\r
+// leaves A,B,C and internal node R.\r
+static double GotohWeightThreeWay(const Tree &tree, unsigned A,\r
+ unsigned B, unsigned C, unsigned R)\r
+ {\r
+ const double F = 1.0;\r
+\r
+ if (tree.IsLeaf(R))\r
+ Quit("GotohThreeWay: R must be internal node");\r
+\r
+ double a = GetGotohLength(tree, R, A);\r
+ double b = GetGotohLength(tree, R, B);\r
+ double c = GetGotohLength(tree, R, C);\r
+\r
+ double S = b*c + c*a + a*b;\r
+ double x = b*c*(a + b)*(a + c);\r
+ double y = a*(b + c)*F*S;\r
+\r
+// y is zero iff all three branch lengths are zero.\r
+ if (y < 0.001)\r
+ return 1.0;\r
+ return sqrt(x/y);\r
+ }\r
+\r
+static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,\r
+ unsigned uNodeIndex2)\r
+ {\r
+ double w1 = 1.0;\r
+ double w2 = 1.0;\r
+ if (!tree.IsLeaf(uNodeIndex1))\r
+ {\r
+ unsigned R = uNodeIndex1;\r
+ unsigned A = uNodeIndex2;\r
+ unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
+ unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
+ w1 = GotohWeightThreeWay(tree, A, B, C, R);\r
+ }\r
+ if (!tree.IsLeaf(uNodeIndex2))\r
+ {\r
+ unsigned R = uNodeIndex2;\r
+ unsigned A = uNodeIndex1;\r
+ unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
+ unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
+ w2 = GotohWeightThreeWay(tree, A, B, C, R);\r
+ }\r
+ return w1*w2;\r
+ }\r
+\r
+void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)\r
+ {\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+ for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
+ {\r
+ if (tree.IsRoot(uNodeIndex1))\r
+ continue;\r
+ for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
+ {\r
+ const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
+ if (NULL_NEIGHBOR == uNodeIndex2)\r
+ continue;\r
+\r
+ // Avoid computing same edge twice in reversed order\r
+ if (uNodeIndex2 < uNodeIndex1)\r
+ continue;\r
+\r
+ const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);\r
+ const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);\r
+#if DEBUG\r
+ {\r
+ assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));\r
+ assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));\r
+ const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);\r
+ if (!BTEq(w, wRev))\r
+ Quit("CalcThreeWayWeights: rev check failed %g %g",\r
+ w, wRev);\r
+ }\r
+#endif\r
+ EdgeWeights[uNodeIndex1][uSub1] = w;\r
+ EdgeWeights[uNodeIndex2][uSub2] = w;\r
+ }\r
+ }\r
+ }\r
+\r
+static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
+ double dPathWeight, WEIGHT *Weights)\r
+ {\r
+ if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+ Quit("SetSeqWeights, should never be called with root");\r
+\r
+ const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);\r
+ if (tree.IsLeaf(uNode2))\r
+ {\r
+ const unsigned Id = tree.GetLeafId(uNode2);\r
+ Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);\r
+ return;\r
+ }\r
+ const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);\r
+ const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);\r
+ dPathWeight *= dThisLength;\r
+ SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);\r
+ SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);\r
+ }\r
+\r
+void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
+ WEIGHT *Weights)\r
+ {\r
+#if TRACE\r
+ Log("CalcThreeWayEdgeWeights\n");\r
+ tree.LogMe();\r
+#endif\r
+\r
+ if (tree.IsRoot(uNode1))\r
+ uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);\r
+ else if (tree.IsRoot(uNode2))\r
+ uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);\r
+ const unsigned uNodeCount = tree.GetNodeCount();\r
+ WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ EdgeWeights[uNodeIndex] = new WEIGHT[3];\r
+\r
+ CalcThreeWayEdgeWeights(tree, EdgeWeights);\r
+\r
+#if TRACE\r
+ {\r
+ Log("Node1 Node2 Length Gotoh EdgeWt\n");\r
+ Log("----- ----- ------ ------ ------\n");\r
+ for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
+ {\r
+ if (tree.IsRoot(uNodeIndex1))\r
+ continue;\r
+ for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
+ {\r
+ const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
+ if (NULL_NEIGHBOR == uNodeIndex2)\r
+ continue;\r
+ if (uNodeIndex2 < uNodeIndex1)\r
+ continue;\r
+ const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];\r
+ const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);\r
+ const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);\r
+ Log("%5u %5u %6.3f %6.3f %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);\r
+ }\r
+ }\r
+ }\r
+#endif\r
+\r
+ SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);\r
+ SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);\r
+\r
+ for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+ delete[] EdgeWeights[uNodeIndex];\r
+ delete[] EdgeWeights;\r
+ }\r