+++ /dev/null
-#include "muscle.h"\r
-#include "tree.h"\r
-#include <math.h>\r
-\r
-#define TRACE 0\r
-\r
-/***\r
-Sequence weights derived from a tree using Gotoh's\r
-three-way method.\r
-\r
- Gotoh (1995) CABIOS 11(5), 543-51.\r
-\r
-Each edge e is assigned a weight w(e).\r
-\r
-Consider first a tree with three leaves A,B and C\r
-having branch lengths a, b and c, as follows.\r
-\r
- B\r
- |\r
- b\r
- |\r
- A---a---R---c---C\r
-\r
-The internal node is denoted by R.\r
-\r
-Define:\r
-\r
- S = (ab + ca + ab)\r
- x = bc(a + b)(a + c)\r
- y = a(b + c)FS\r
-\r
-Here F is a tunable normalization factor which is\r
-approximately 1.0. Then the edge weight for AR\r
-is computed as:\r
-\r
- w(AR) = sqrt(x/y)\r
-\r
-Similar expressions for the other edges follow by\r
-symmetry.\r
-\r
-For a tree with more than three edges, the weight\r
-of an edge that ends in a leaf is computed from\r
-the three-way tree that includes the edge and\r
-its two neighbors. The weight of an internal edge\r
-is computed as the product of the weights for that\r
-edge derived from the two three-way subtrees that\r
-include that edge.\r
-\r
-For example, consider the following tree.\r
-\r
- B\r
- |\r
- A--R--V--C\r
- |\r
- D\r
-\r
-Here, w(RV) is computed as the product of the\r
-two values for w(RV) derived from the three-way\r
-trees with leaves ABV and RCD respectively.\r
-\r
-The calculation is done using "Gotoh lengths",\r
-not the real edge lengths.\r
-\r
-The Gotoh length G of a directed edge is calculated\r
-recursively as:\r
-\r
- G = d + LR/(L + R)\r
-\r
-where d is the length of the edge, and L and R are\r
-the Gotoh lengths of the left and right edges adjoining\r
-the terminal end of the edge. If the edge terminates on\r
-a leaf, then G=d.\r
-\r
-Pairwise sequence weights are computed as the\r
-product of edge weights on the path that connects\r
-their leaves.\r
-\r
-If the tree is split into two subtrees by deleting\r
-a given edge e, then the pairwise weights factorize.\r
-For operations on profiles formed from the two\r
-subtrees, it is possible to assign a weight to a\r
-sequence as the product of edge weights on a path\r
-from e to its leaf.\r
-***/\r
-\r
-// The xxxUnrooted functions present a rooted tree as\r
-// if it had been unrooted by deleting the root node.\r
-static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
- unsigned uNode2)\r
- {\r
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
- Quit("GetFirstNeighborUnrooted, should never be called with root");\r
- if (!tree.IsEdge(uNode1, uNode2))\r
- {\r
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
- !tree.IsRoot(tree.GetParent(uNode2)))\r
- Quit("GetFirstNeighborUnrooted, not edge");\r
- const unsigned uRoot = tree.GetRootNodeIndex();\r
- return tree.GetFirstNeighbor(uNode1, uRoot);\r
- }\r
-\r
- unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);\r
- if (tree.IsRoot(uNeighbor))\r
- return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
- return uNeighbor;\r
- }\r
-\r
-static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
- unsigned uNode2)\r
- {\r
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
- Quit("GetFirstNeighborUnrooted, should never be called with root");\r
- if (!tree.IsEdge(uNode1, uNode2))\r
- {\r
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
- !tree.IsRoot(tree.GetParent(uNode2)))\r
- Quit("GetFirstNeighborUnrooted, not edge");\r
- const unsigned uRoot = tree.GetRootNodeIndex();\r
- return tree.GetSecondNeighbor(uNode1, uRoot);\r
- }\r
-\r
- unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);\r
- if (tree.IsRoot(uNeighbor))\r
- return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
- return uNeighbor;\r
- }\r
-\r
-static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
- unsigned uSub)\r
- {\r
- unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);\r
- if (tree.IsRoot(uNeighbor))\r
- return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
- return uNeighbor;\r
- }\r
-\r
-static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,\r
- unsigned uNode2)\r
- {\r
- if (tree.IsEdge(uNode1, uNode2))\r
- return tree.GetNeighborSubscript(uNode1, uNode2);\r
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
- !tree.IsRoot(tree.GetParent(uNode2)))\r
- Quit("GetNeighborSubscriptUnrooted, not edge");\r
- for (unsigned uSub = 0; uSub < 3; ++uSub)\r
- if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)\r
- return uSub;\r
- Quit("GetNeighborSubscriptUnrooted, not a neighbor");\r
- return NULL_NEIGHBOR;\r
- }\r
-\r
-static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,\r
- unsigned uNode2)\r
- {\r
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
- Quit("GetEdgeLengthUnrooted, should never be called with root");\r
- if (!tree.IsEdge(uNode1, uNode2))\r
- {\r
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
- !tree.IsRoot(tree.GetParent(uNode2)))\r
- Quit("GetEdgeLengthUnrooted, not edge");\r
-\r
- const unsigned uRoot = tree.GetRootNodeIndex();\r
- return tree.GetEdgeLength(uNode1, uRoot) +\r
- tree.GetEdgeLength(uNode2, uRoot);\r
- }\r
- return tree.GetEdgeLength(uNode1, uNode2);\r
- }\r
-\r
-double GetGotohLength(const Tree &tree, unsigned R, unsigned A)\r
- {\r
- double dThis = GetEdgeLengthUnrooted(tree, R, A);\r
-\r
-// Enforce non-negative edge lengths\r
- if (dThis < 0)\r
- dThis = 0;\r
-\r
- if (tree.IsLeaf(A))\r
- return dThis;\r
-\r
- const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);\r
- const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);\r
- const double dFirst = GetGotohLength(tree, A, uFirst);\r
- const double dSecond = GetGotohLength(tree, A, uSecond);\r
- const double dSum = dFirst + dSecond;\r
- const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;\r
- return dThis + dThird;\r
- }\r
-\r
-// Return weight of edge A-R in three-way subtree that has\r
-// leaves A,B,C and internal node R.\r
-static double GotohWeightThreeWay(const Tree &tree, unsigned A,\r
- unsigned B, unsigned C, unsigned R)\r
- {\r
- const double F = 1.0;\r
-\r
- if (tree.IsLeaf(R))\r
- Quit("GotohThreeWay: R must be internal node");\r
-\r
- double a = GetGotohLength(tree, R, A);\r
- double b = GetGotohLength(tree, R, B);\r
- double c = GetGotohLength(tree, R, C);\r
-\r
- double S = b*c + c*a + a*b;\r
- double x = b*c*(a + b)*(a + c);\r
- double y = a*(b + c)*F*S;\r
-\r
-// y is zero iff all three branch lengths are zero.\r
- if (y < 0.001)\r
- return 1.0;\r
- return sqrt(x/y);\r
- }\r
-\r
-static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,\r
- unsigned uNodeIndex2)\r
- {\r
- double w1 = 1.0;\r
- double w2 = 1.0;\r
- if (!tree.IsLeaf(uNodeIndex1))\r
- {\r
- unsigned R = uNodeIndex1;\r
- unsigned A = uNodeIndex2;\r
- unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
- unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
- w1 = GotohWeightThreeWay(tree, A, B, C, R);\r
- }\r
- if (!tree.IsLeaf(uNodeIndex2))\r
- {\r
- unsigned R = uNodeIndex2;\r
- unsigned A = uNodeIndex1;\r
- unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
- unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
- w2 = GotohWeightThreeWay(tree, A, B, C, R);\r
- }\r
- return w1*w2;\r
- }\r
-\r
-void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)\r
- {\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
- for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
- {\r
- if (tree.IsRoot(uNodeIndex1))\r
- continue;\r
- for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
- {\r
- const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
- if (NULL_NEIGHBOR == uNodeIndex2)\r
- continue;\r
-\r
- // Avoid computing same edge twice in reversed order\r
- if (uNodeIndex2 < uNodeIndex1)\r
- continue;\r
-\r
- const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);\r
- const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);\r
-#if DEBUG\r
- {\r
- assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));\r
- assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));\r
- const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);\r
- if (!BTEq(w, wRev))\r
- Quit("CalcThreeWayWeights: rev check failed %g %g",\r
- w, wRev);\r
- }\r
-#endif\r
- EdgeWeights[uNodeIndex1][uSub1] = w;\r
- EdgeWeights[uNodeIndex2][uSub2] = w;\r
- }\r
- }\r
- }\r
-\r
-static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
- double dPathWeight, WEIGHT *Weights)\r
- {\r
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
- Quit("SetSeqWeights, should never be called with root");\r
-\r
- const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);\r
- if (tree.IsLeaf(uNode2))\r
- {\r
- const unsigned Id = tree.GetLeafId(uNode2);\r
- Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);\r
- return;\r
- }\r
- const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);\r
- const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);\r
- dPathWeight *= dThisLength;\r
- SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);\r
- SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);\r
- }\r
-\r
-void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
- WEIGHT *Weights)\r
- {\r
-#if TRACE\r
- Log("CalcThreeWayEdgeWeights\n");\r
- tree.LogMe();\r
-#endif\r
-\r
- if (tree.IsRoot(uNode1))\r
- uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);\r
- else if (tree.IsRoot(uNode2))\r
- uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
- WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- EdgeWeights[uNodeIndex] = new WEIGHT[3];\r
-\r
- CalcThreeWayEdgeWeights(tree, EdgeWeights);\r
-\r
-#if TRACE\r
- {\r
- Log("Node1 Node2 Length Gotoh EdgeWt\n");\r
- Log("----- ----- ------ ------ ------\n");\r
- for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
- {\r
- if (tree.IsRoot(uNodeIndex1))\r
- continue;\r
- for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
- {\r
- const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
- if (NULL_NEIGHBOR == uNodeIndex2)\r
- continue;\r
- if (uNodeIndex2 < uNodeIndex1)\r
- continue;\r
- const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];\r
- const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);\r
- const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);\r
- Log("%5u %5u %6.3f %6.3f %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);\r
- }\r
- }\r
- }\r
-#endif\r
-\r
- SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);\r
- SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);\r
-\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- delete[] EdgeWeights[uNodeIndex];\r
- delete[] EdgeWeights;\r
- }\r