Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / phy3.cpp
diff --git a/website/archive/binaries/mac/src/muscle/phy3.cpp b/website/archive/binaries/mac/src/muscle/phy3.cpp
new file mode 100644 (file)
index 0000000..615372e
--- /dev/null
@@ -0,0 +1,469 @@
+#include "muscle.h"\r
+#include "tree.h"\r
+#include "edgelist.h"\r
+\r
+#define TRACE  0\r
+\r
+struct EdgeInfo\r
+       {\r
+       EdgeInfo()\r
+               {\r
+               m_bSet = false;\r
+               }\r
+// Is data in this structure valid (i.e, has been set)?\r
+       bool m_bSet;\r
+\r
+// Node at start of this edge\r
+       unsigned m_uNode1;\r
+\r
+// Node at end of this edge\r
+       unsigned m_uNode2;\r
+\r
+// Maximum distance from Node2 to a leaf\r
+       double m_dMaxDistToLeaf;\r
+\r
+// Sum of distances from Node2 to all leaves under Node2\r
+       double m_dTotalDistToLeaves;\r
+\r
+// Next node on path from Node2 to most distant leaf\r
+       unsigned m_uMaxStep;\r
+\r
+// Most distant leaf from Node2 (used for debugging only)\r
+       unsigned m_uMostDistantLeaf;\r
+\r
+// Number of leaves under Node2\r
+       unsigned m_uLeafCount;\r
+       };\r
+\r
+static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,\r
+  unsigned *ptruNode1, unsigned *ptruNode2,\r
+  double *ptrdLength1, double *ptrdLength2);\r
+static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,\r
+  unsigned *ptruNode1, unsigned *ptruNode2,\r
+  double *ptrdLength1, double *ptrdLength2);\r
+\r
+static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)\r
+       {\r
+       Log("Node1  Node2  MaxDist  TotDist  MostDist  LeafCount  Step\n");\r
+       Log("-----  -----  -------  -------  --------  ---------  ----\n");\r
+       //    12345  12345  1234567  1234567  12345678  123456789\r
+\r
+       for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)\r
+               for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)\r
+                       {\r
+                       const EdgeInfo &EI = EIs[uNode][uNeighbor];\r
+                       if (!EI.m_bSet)\r
+                               continue;\r
+                       Log("%5u  %5u  %7.3g  %7.3g  %8u  %9u",\r
+                         EI.m_uNode1,\r
+                         EI.m_uNode2,\r
+                         EI.m_dMaxDistToLeaf,\r
+                         EI.m_dTotalDistToLeaves,\r
+                         EI.m_uMostDistantLeaf,\r
+                         EI.m_uLeafCount);\r
+                       if (NULL_NEIGHBOR != EI.m_uMaxStep)\r
+                               Log("  %4u", EI.m_uMaxStep);\r
+                       Log("\n");\r
+                       }\r
+       }\r
+\r
+static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)\r
+       {\r
+       const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);\r
+       EdgeInfo &EI = EIs[uNode1][uNeighborIndex];\r
+       EI.m_uNode1 = uNode1;\r
+       EI.m_uNode2 = uNode2;\r
+\r
+       if (tree.IsLeaf(uNode2))\r
+               {\r
+               EI.m_dMaxDistToLeaf = 0;\r
+               EI.m_dTotalDistToLeaves = 0;\r
+               EI.m_uMaxStep = NULL_NEIGHBOR;\r
+               EI.m_uMostDistantLeaf = uNode2;\r
+               EI.m_uLeafCount = 1;\r
+               EI.m_bSet = true;\r
+               return;\r
+               }\r
+\r
+       double dMaxDistToLeaf = -1e29;\r
+       double dTotalDistToLeaves = 0.0;\r
+       unsigned uLeafCount = 0;\r
+       unsigned uMostDistantLeaf = NULL_NEIGHBOR;\r
+       unsigned uMaxStep = NULL_NEIGHBOR;\r
+\r
+       const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);\r
+       for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
+               {\r
+               const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);\r
+               if (uNode3 == uNode1)\r
+                       continue;\r
+               const EdgeInfo &EINext = EIs[uNode2][uSub];\r
+               if (!EINext.m_bSet)\r
+                       Quit("CalcInfo: internal error, dist %u->%u not known",\r
+                               uNode2, uNode3);\r
+\r
+\r
+               uLeafCount += EINext.m_uLeafCount;\r
+\r
+               const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);\r
+               const double dTotalDist = EINext.m_dTotalDistToLeaves +\r
+                 EINext.m_uLeafCount*dEdgeLength;\r
+               dTotalDistToLeaves += dTotalDist;\r
+\r
+               const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;\r
+               if (dDist > dMaxDistToLeaf)\r
+                       {\r
+                       dMaxDistToLeaf = dDist;\r
+                       uMostDistantLeaf = EINext.m_uMostDistantLeaf;\r
+                       uMaxStep = uNode3;\r
+                       }\r
+               }\r
+       if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||\r
+         0 == uLeafCount)\r
+               Quit("CalcInfo: internal error 2");\r
+\r
+       const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);\r
+       EI.m_dMaxDistToLeaf = dMaxDistToLeaf;\r
+       EI.m_dTotalDistToLeaves = dTotalDistToLeaves;\r
+       EI.m_uMaxStep = uMaxStep;\r
+       EI.m_uMostDistantLeaf = uMostDistantLeaf;\r
+       EI.m_uLeafCount = uLeafCount;\r
+       EI.m_bSet = true;\r
+       }\r
+\r
+static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,\r
+  unsigned uNodeTo)\r
+       {\r
+       const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);\r
+       return EIs[uNodeFrom][uSub].m_bSet;\r
+       }\r
+\r
+static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,\r
+  unsigned uNodeTo)\r
+       {\r
+       const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);\r
+       for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
+               {\r
+               unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);\r
+               if (uNeighborIndex == uNodeFrom)\r
+                       continue;\r
+               if (!EIs[uNodeTo][uSub].m_bSet)\r
+                       return false;\r
+               }\r
+       return true;\r
+       }\r
+\r
+void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,\r
+  double *ptrdLength1, double *ptrdLength2,\r
+  ROOT RootMethod)\r
+       {\r
+#if    TRACE\r
+       tree.LogMe();\r
+#endif\r
+       if (tree.IsRooted())\r
+               Quit("FindRoot: tree already rooted");\r
+\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+       const unsigned uLeafCount = tree.GetLeafCount();\r
+\r
+       if (uNodeCount < 2)\r
+               Quit("Root: don't support trees with < 2 edges");\r
+\r
+       EdgeInfo **EIs = new EdgeInfo *[uNodeCount];\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               EIs[uNodeIndex] = new EdgeInfo[3];\r
+\r
+       EdgeList Edges;\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               if (tree.IsLeaf(uNodeIndex))\r
+                       {\r
+                       unsigned uParent = tree.GetNeighbor1(uNodeIndex);\r
+                       Edges.Add(uParent, uNodeIndex);\r
+                       }\r
+\r
+#if    TRACE\r
+       Log("Edges: ");\r
+       Edges.LogMe();\r
+#endif\r
+\r
+// Main loop: iterate until all distances known\r
+       double dAllMaxDist = -1e20;\r
+       unsigned uMaxFrom = NULL_NEIGHBOR;\r
+       unsigned uMaxTo = NULL_NEIGHBOR;\r
+       for (;;)\r
+               {\r
+               EdgeList NextEdges;\r
+\r
+#if    TRACE\r
+               Log("\nTop of main loop\n");\r
+               Log("Edges: ");\r
+               Edges.LogMe();\r
+               Log("MDs:\n");\r
+               ListEIs(EIs, uNodeCount);\r
+#endif\r
+\r
+       // For all edges\r
+               const unsigned uEdgeCount = Edges.GetCount();\r
+               if (0 == uEdgeCount)\r
+                       break;\r
+               for (unsigned n = 0; n < uEdgeCount; ++n)\r
+                       {\r
+                       unsigned uNodeFrom;\r
+                       unsigned uNodeTo;\r
+                       Edges.GetEdge(n, &uNodeFrom, &uNodeTo);\r
+\r
+                       CalcInfo(tree, uNodeFrom, uNodeTo, EIs);\r
+#if    TRACE\r
+                       Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);\r
+#endif\r
+                       const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);\r
+                       for (unsigned i = 0; i < uNeighborCount; ++i)\r
+                               {\r
+                               const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);\r
+                               if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&\r
+                                 AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))\r
+                                       NextEdges.Add(uNeighborIndex, uNodeFrom);\r
+                               }\r
+                       }\r
+               Edges.Copy(NextEdges);\r
+               }\r
+\r
+#if    TRACE\r
+       ListEIs(EIs, uNodeCount);\r
+#endif\r
+\r
+       switch (RootMethod)\r
+               {\r
+       case ROOT_MidLongestSpan:\r
+               RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,\r
+                 ptrdLength1, ptrdLength2);\r
+               break;\r
+\r
+       case ROOT_MinAvgLeafDist:\r
+               RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,\r
+                 ptrdLength1, ptrdLength2);\r
+               break;\r
+\r
+       default:\r
+               Quit("Invalid RootMethod=%d", RootMethod);\r
+               }\r
+\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               delete[] EIs[uNodeIndex];\r
+       delete[] EIs;\r
+       }\r
+\r
+static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,\r
+  unsigned *ptruNode1, unsigned *ptruNode2,\r
+  double *ptrdLength1, double *ptrdLength2)\r
+       {\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+\r
+       unsigned uLeaf1 = NULL_NEIGHBOR;\r
+       unsigned uMostDistantLeaf = NULL_NEIGHBOR;\r
+       double dMaxDist = -VERY_LARGE_DOUBLE;\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               {\r
+               if (!tree.IsLeaf(uNodeIndex))\r
+                       continue;\r
+\r
+               const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);\r
+               if (NULL_NEIGHBOR == uNode2)\r
+                       Quit("RootByMidLongestSpan: internal error 0");\r
+               const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);\r
+               const EdgeInfo &EI = EIs[uNodeIndex][0];\r
+               if (!EI.m_bSet)\r
+                       Quit("RootByMidLongestSpan: internal error 1");\r
+               if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)\r
+                       Quit("RootByMidLongestSpan: internal error 2");\r
+               const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;\r
+               if (dSpanLength > dMaxDist)\r
+                       {\r
+                       dMaxDist = dSpanLength;\r
+                       uLeaf1 = uNodeIndex;\r
+                       uMostDistantLeaf = EI.m_uMostDistantLeaf;\r
+                       }\r
+               }\r
+       \r
+       if (NULL_NEIGHBOR == uLeaf1)\r
+               Quit("RootByMidLongestSpan: internal error 3");\r
+\r
+       const double dTreeHeight = dMaxDist/2.0;\r
+       unsigned uNode1 = uLeaf1;\r
+       unsigned uNode2 = tree.GetNeighbor1(uLeaf1);\r
+       double dAccumSpanLength = 0;\r
+\r
+#if    TRACE\r
+       Log("RootByMidLongestSpan: span=%u", uLeaf1);\r
+#endif\r
+\r
+       for (;;)\r
+               {\r
+               const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);\r
+#if    TRACE\r
+               Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);\r
+#endif\r
+               if (dAccumSpanLength + dEdgeLength >= dTreeHeight)\r
+                       {\r
+                       *ptruNode1 = uNode1;\r
+                       *ptruNode2 = uNode2;\r
+                       *ptrdLength1 = dTreeHeight - dAccumSpanLength;\r
+                       *ptrdLength2 = dEdgeLength - *ptrdLength1;\r
+#if    TRACE\r
+                       {\r
+                       const EdgeInfo &EI = EIs[uLeaf1][0];\r
+                       Log("...\n");\r
+                       Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",\r
+                         uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);\r
+                       }\r
+#endif\r
+                       return;\r
+                       }\r
+\r
+               if (tree.IsLeaf(uNode2))\r
+                       Quit("RootByMidLongestSpan: internal error 4");\r
+\r
+               dAccumSpanLength += dEdgeLength;\r
+               const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);\r
+               const EdgeInfo &EI = EIs[uNode1][uSub];\r
+               if (!EI.m_bSet)\r
+                       Quit("RootByMidLongestSpan: internal error 5");\r
+\r
+               uNode1 = uNode2;\r
+               uNode2 = EI.m_uMaxStep;\r
+               }\r
+       }\r
+\r
+/***\r
+Root by balancing average distance to leaves.\r
+The root is a point p such that the average\r
+distance to leaves to the left of p is the\r
+same as the to the right.\r
+\r
+This is the method used by CLUSTALW, which\r
+was originally used in PROFILEWEIGHT:\r
+\r
+       Thompson et al. (1994) CABIOS (10) 1, 19-29.\r
+***/\r
+\r
+static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,\r
+  unsigned *ptruNode1, unsigned *ptruNode2,\r
+  double *ptrdLength1, double *ptrdLength2)\r
+       {\r
+       const unsigned uNodeCount = tree.GetNodeCount();\r
+       const unsigned uLeafCount = tree.GetLeafCount();\r
+       unsigned uNode1 = NULL_NEIGHBOR;\r
+       unsigned uNode2 = NULL_NEIGHBOR;\r
+       double dMinHeight = VERY_LARGE_DOUBLE;\r
+       double dBestLength1 = VERY_LARGE_DOUBLE;\r
+       double dBestLength2 = VERY_LARGE_DOUBLE;\r
+\r
+       for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
+               {\r
+               const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);\r
+               for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
+                       {\r
+                       const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);\r
+\r
+               // Avoid visiting same edge a second time in reversed order.\r
+                       if (uNeighborIndex < uNodeIndex)\r
+                               continue;\r
+\r
+                       const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);\r
+                       if (NULL_NEIGHBOR == uSubRev)\r
+                               Quit("RootByMinAvgLeafDist, internal error 1");\r
+\r
+               // Get info for edges Node1->Node2 and Node2->Node1 (reversed)\r
+                       const EdgeInfo &EI = EIs[uNodeIndex][uSub];\r
+                       const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];\r
+\r
+                       if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||\r
+                         EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)\r
+                               Quit("RootByMinAvgLeafDist, internal error 2");\r
+                       if (!EI.m_bSet)\r
+                               Quit("RootByMinAvgLeafDist, internal error 3");\r
+                       if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)\r
+                               Quit("RootByMinAvgLeafDist, internal error 4");\r
+\r
+                       const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);\r
+                       if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))\r
+                               Quit("RootByMinAvgLeafDist, internal error 5");\r
+\r
+               // Consider point p on edge 12 in tree (1=Node, 2=Neighbor).\r
+               //\r
+        //     -----         ----\r
+        //          |       |\r
+        //          1----p--2\r
+        //          |       |\r
+        //     -----         ----\r
+               //\r
+               // Define:\r
+               //    ADLp = average distance to leaves to left of point p.\r
+               //        ADRp = average distance to leaves to right of point p.\r
+               //        L = edge length = distance 12\r
+               //    x = distance 1p\r
+               // So distance p2 = L - x.\r
+               // Average distance from p to leaves on left of p is:\r
+               //              ADLp = ADL1 + x\r
+               // Average distance from p to leaves on right of p is:\r
+               //              ADRp = ADR2 + (L - x)\r
+               // To be a root, we require these two distances to be equal,\r
+               //              ADLp = ADRp\r
+               //              ADL1 + x = ADR2 + (L - x)\r
+               // Solving for x,\r
+               //              x = (ADR2 - ADL1 + L)/2\r
+               // If 0 <= x <= L, we can place the root on edge 12.\r
+\r
+                       const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;\r
+                       const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;\r
+\r
+                       const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;\r
+                       if (x >= 0 && x <= dEdgeLength)\r
+                               {\r
+                               const double dLength1 = x;\r
+                               const double dLength2 = dEdgeLength - x;\r
+                               const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;\r
+                               const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;\r
+                               const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;\r
+#if    TRACE\r
+                               Log("Candidate root Node1=%u Node2=%u Height=%g\n",\r
+                                 uNodeIndex, uNeighborIndex, dHeight);\r
+#endif\r
+                               if (dHeight < dMinHeight)\r
+                                       {\r
+                                       uNode1 = uNodeIndex;\r
+                                       uNode2 = uNeighborIndex;\r
+                                       dBestLength1 = dLength1;\r
+                                       dBestLength2 = dLength2;\r
+                                       dMinHeight = dHeight;\r
+                                       }\r
+                               }\r
+                       }\r
+               }\r
+\r
+       if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)\r
+               Quit("RootByMinAvgLeafDist, internal error 6");\r
+\r
+#if    TRACE\r
+       Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",\r
+         uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);\r
+#endif\r
+\r
+       *ptruNode1 = uNode1;\r
+       *ptruNode2 = uNode2;\r
+       *ptrdLength1 = dBestLength1;\r
+       *ptrdLength2 = dBestLength2;\r
+       }\r
+\r
+void FixRoot(Tree &tree, ROOT Method)\r
+       {\r
+       if (!tree.IsRooted())\r
+               Quit("FixRoot: expecting rooted tree");\r
+\r
+       // Pseudo-root: keep root assigned by clustering\r
+       if (ROOT_Pseudo == Method)\r
+               return;\r
+\r
+       tree.UnrootByDeletingRoot();\r
+       tree.RootUnrootedTree(Method);\r
+       }\r