Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / threewaywt.cpp
diff --git a/website/archive/binaries/mac/src/muscle/threewaywt.cpp b/website/archive/binaries/mac/src/muscle/threewaywt.cpp
new file mode 100644 (file)
index 0000000..cf4c0ce
--- /dev/null
@@ -0,0 +1,342 @@
+#include "muscle.h"\r
+#include "tree.h"\r
+#include <math.h>\r
+\r
+#define TRACE  0\r
+\r
+/***\r
+Sequence weights derived from a tree using Gotoh's\r
+three-way method.\r
+\r
+       Gotoh (1995) CABIOS 11(5), 543-51.\r
+\r
+Each edge e is assigned a weight w(e).\r
+\r
+Consider first a tree with three leaves A,B and C\r
+having branch lengths a, b and c, as follows.\r
+\r
+            B\r
+            |\r
+            b\r
+            |\r
+    A---a---R---c---C\r
+\r
+The internal node is denoted by R.\r
+\r
+Define:\r
+\r
+       S = (ab + ca + ab)\r
+       x = bc(a + b)(a + c)\r
+       y = a(b + c)FS\r
+\r
+Here F is a tunable normalization factor which is\r
+approximately 1.0. Then the edge weight for AR\r
+is computed as:\r
+\r
+       w(AR) = sqrt(x/y)\r
+\r
+Similar expressions for the other edges follow by\r
+symmetry.\r
+\r
+For a tree with more than three edges, the weight\r
+of an edge that ends in a leaf is computed from\r
+the three-way tree that includes the edge and\r
+its two neighbors. The weight of an internal edge\r
+is computed as the product of the weights for that\r
+edge derived from the two three-way subtrees that\r
+include that edge.\r
+\r
+For example, consider the following tree.\r
+\r
+       B\r
+       |\r
+    A--R--V--C\r
+          |\r
+          D\r
+\r
+Here, w(RV) is computed as the product of the\r
+two values for w(RV) derived from the three-way\r
+trees with leaves ABV and RCD respectively.\r
+\r
+The calculation is done using "Gotoh lengths",\r
+not the real edge lengths.\r
+\r
+The Gotoh length G of a directed edge is calculated\r
+recursively as:\r
+\r
+       G = d + LR/(L + R)\r
+\r
+where d is the length of the edge, and L and R are\r
+the Gotoh lengths of the left and right edges adjoining\r
+the terminal end of the edge. If the edge terminates on\r
+a leaf, then G=d.\r
+\r
+Pairwise sequence weights are computed as the\r
+product of edge weights on the path that connects\r
+their leaves.\r
+\r
+If the tree is split into two subtrees by deleting\r
+a given edge e, then the pairwise weights factorize.\r
+For operations on profiles formed from the two\r
+subtrees, it is possible to assign a weight to a\r
+sequence as the product of edge weights on a path\r
+from e to its leaf.\r
+***/\r
+\r
+// The xxxUnrooted functions present a rooted tree as\r
+// if it had been unrooted by deleting the root node.\r
+static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
+  unsigned uNode2)\r
+       {\r
+       if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+               Quit("GetFirstNeighborUnrooted, should never be called with root");\r
+       if (!tree.IsEdge(uNode1, uNode2))\r
+               {\r
+               if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+                 !tree.IsRoot(tree.GetParent(uNode2)))\r
+                       Quit("GetFirstNeighborUnrooted, not edge");\r
+               const unsigned uRoot = tree.GetRootNodeIndex();\r
+               return tree.GetFirstNeighbor(uNode1, uRoot);\r
+               }\r
+\r
+       unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);\r
+       if (tree.IsRoot(uNeighbor))\r
+               return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
+       return uNeighbor;\r
+       }\r
+\r
+static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
+  unsigned uNode2)\r
+       {\r
+       if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+               Quit("GetFirstNeighborUnrooted, should never be called with root");\r
+       if (!tree.IsEdge(uNode1, uNode2))\r
+               {\r
+               if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+                 !tree.IsRoot(tree.GetParent(uNode2)))\r
+                       Quit("GetFirstNeighborUnrooted, not edge");\r
+               const unsigned uRoot = tree.GetRootNodeIndex();\r
+               return tree.GetSecondNeighbor(uNode1, uRoot);\r
+               }\r
+\r
+       unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);\r
+       if (tree.IsRoot(uNeighbor))\r
+               return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
+       return uNeighbor;\r
+       }\r
+\r
+static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
+  unsigned uSub)\r
+       {\r
+       unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);\r
+       if (tree.IsRoot(uNeighbor))\r
+               return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
+       return uNeighbor;\r
+       }\r
+\r
+static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,\r
+  unsigned uNode2)\r
+       {\r
+       if (tree.IsEdge(uNode1, uNode2))\r
+               return tree.GetNeighborSubscript(uNode1, uNode2);\r
+       if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+         !tree.IsRoot(tree.GetParent(uNode2)))\r
+               Quit("GetNeighborSubscriptUnrooted, not edge");\r
+       for (unsigned uSub = 0; uSub < 3; ++uSub)\r
+               if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)\r
+                       return uSub;\r
+       Quit("GetNeighborSubscriptUnrooted, not a neighbor");\r
+       return NULL_NEIGHBOR;\r
+       }\r
+\r
+static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,\r
+  unsigned uNode2)\r
+       {\r
+       if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+               Quit("GetEdgeLengthUnrooted, should never be called with root");\r
+       if (!tree.IsEdge(uNode1, uNode2))\r
+               {\r
+               if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
+                 !tree.IsRoot(tree.GetParent(uNode2)))\r
+                       Quit("GetEdgeLengthUnrooted, not edge");\r
+\r
+               const unsigned uRoot = tree.GetRootNodeIndex();\r
+               return tree.GetEdgeLength(uNode1, uRoot) +\r
+                 tree.GetEdgeLength(uNode2, uRoot);\r
+               }\r
+       return tree.GetEdgeLength(uNode1, uNode2);\r
+       }\r
+\r
+double GetGotohLength(const Tree &tree, unsigned R, unsigned A)\r
+       {\r
+       double dThis = GetEdgeLengthUnrooted(tree, R, A);\r
+\r
+// Enforce non-negative edge lengths\r
+       if (dThis < 0)\r
+               dThis = 0;\r
+\r
+       if (tree.IsLeaf(A))\r
+               return dThis;\r
+\r
+       const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);\r
+       const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);\r
+       const double dFirst = GetGotohLength(tree, A, uFirst);\r
+       const double dSecond = GetGotohLength(tree, A, uSecond);\r
+       const double dSum = dFirst + dSecond;\r
+       const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;\r
+       return dThis + dThird;\r
+       }\r
+\r
+// Return weight of edge A-R in three-way subtree that has\r
+// leaves A,B,C and internal node R.\r
+static double GotohWeightThreeWay(const Tree &tree, unsigned A,\r
+  unsigned B, unsigned C, unsigned R)\r
+       {\r
+       const double F = 1.0;\r
+\r
+       if (tree.IsLeaf(R))\r
+               Quit("GotohThreeWay: R must be internal node");\r
+\r
+       double a = GetGotohLength(tree, R, A);\r
+       double b = GetGotohLength(tree, R, B);\r
+       double c = GetGotohLength(tree, R, C);\r
+\r
+       double S = b*c + c*a + a*b;\r
+       double x = b*c*(a + b)*(a + c);\r
+       double y = a*(b + c)*F*S;\r
+\r
+// y is zero iff all three branch lengths are zero.\r
+       if (y < 0.001)\r
+               return 1.0;\r
+       return sqrt(x/y);\r
+       }\r
+\r
+static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,\r
+  unsigned uNodeIndex2)\r
+       {\r
+       double w1 = 1.0;\r
+       double w2 = 1.0;\r
+       if (!tree.IsLeaf(uNodeIndex1))\r
+               {\r
+               unsigned R = uNodeIndex1;\r
+               unsigned A = uNodeIndex2;\r
+               unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
+               unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
+               w1 = GotohWeightThreeWay(tree, A, B, C, R);\r
+               }\r
+       if (!tree.IsLeaf(uNodeIndex2))\r
+               {\r
+               unsigned R = uNodeIndex2;\r
+               unsigned A = uNodeIndex1;\r
+               unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
+               unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
+               w2 = GotohWeightThreeWay(tree, A, B, C, R);\r
+               }\r
+       return w1*w2;\r
+       }\r
+\r
+void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)\r
+       {\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+       for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
+               {\r
+               if (tree.IsRoot(uNodeIndex1))\r
+                       continue;\r
+               for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
+                       {\r
+                       const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
+                       if (NULL_NEIGHBOR == uNodeIndex2)\r
+                               continue;\r
+\r
+               // Avoid computing same edge twice in reversed order\r
+                       if (uNodeIndex2 < uNodeIndex1)\r
+                               continue;\r
+\r
+                       const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);\r
+                       const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);\r
+#if    DEBUG\r
+                       {\r
+                       assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));\r
+                       assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));\r
+                       const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);\r
+                       if (!BTEq(w, wRev))\r
+                               Quit("CalcThreeWayWeights: rev check failed %g %g",\r
+                                 w, wRev);\r
+                       }\r
+#endif\r
+                       EdgeWeights[uNodeIndex1][uSub1] = w;\r
+                       EdgeWeights[uNodeIndex2][uSub2] = w;\r
+                       }\r
+               }\r
+       }\r
+\r
+static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
+  double dPathWeight, WEIGHT *Weights)\r
+       {\r
+       if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
+               Quit("SetSeqWeights, should never be called with root");\r
+\r
+       const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);\r
+       if (tree.IsLeaf(uNode2))\r
+               {\r
+               const unsigned Id = tree.GetLeafId(uNode2);\r
+               Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);\r
+               return;\r
+               }\r
+       const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);\r
+       const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);\r
+       dPathWeight *= dThisLength;\r
+       SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);\r
+       SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);\r
+       }\r
+\r
+void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
+  WEIGHT *Weights)\r
+       {\r
+#if    TRACE\r
+       Log("CalcThreeWayEdgeWeights\n");\r
+       tree.LogMe();\r
+#endif\r
+\r
+       if (tree.IsRoot(uNode1))\r
+               uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);\r
+       else if (tree.IsRoot(uNode2))\r
+               uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+       WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               EdgeWeights[uNodeIndex] = new WEIGHT[3];\r
+\r
+       CalcThreeWayEdgeWeights(tree, EdgeWeights);\r
+\r
+#if    TRACE\r
+       {\r
+       Log("Node1  Node2  Length   Gotoh  EdgeWt\n");\r
+       Log("-----  -----  ------  ------  ------\n");\r
+       for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
+               {\r
+               if (tree.IsRoot(uNodeIndex1))\r
+                       continue;\r
+               for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
+                       {\r
+                       const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
+                       if (NULL_NEIGHBOR == uNodeIndex2)\r
+                               continue;\r
+                       if (uNodeIndex2 < uNodeIndex1)\r
+                               continue;\r
+                       const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];\r
+                       const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);\r
+                       const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);\r
+                       Log("%5u  %5u  %6.3f  %6.3f  %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);\r
+                       }\r
+               }\r
+       }\r
+#endif\r
+\r
+       SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);\r
+       SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);\r
+\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               delete[] EdgeWeights[uNodeIndex];\r
+       delete[] EdgeWeights;\r
+       }\r