+++ /dev/null
-#include "muscle.h"\r
-#include "tree.h"\r
-#include "msa.h"\r
-\r
-/***\r
-Compute weights by the CLUSTALW method.\r
-Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;\r
-see also CLUSTALW paper.\r
-\r
-Weights are computed from the edge lengths of a rooted tree.\r
-\r
-Define the strength of an edge to be its length divided by the number\r
-of leaves under that edge. The weight of a sequence is then the sum\r
-of edge strengths on the path from the root to the leaf.\r
-\r
-Example.\r
-\r
- 0.2\r
- -----A 0.1\r
- -x ------- B 0.7\r
- --------y ----------- C\r
- 0.3 ----------z\r
- 0.4 -------------- D\r
- 0.8\r
-\r
-Edge Length Leaves Strength\r
----- ----- ------ --------\r
-xy 0.3 3 0.1\r
-xA 0.2 1 0.2\r
-yz 0.4 2 0.2\r
-yB 0.1 1 0.1\r
-zC 0.7 1 0.7\r
-zD 0.8 1 0.8\r
-\r
-Leaf Path Strengths Weight\r
----- ---- --------- ------\r
-A xA 0.2 0.2\r
-B xy-yB 0.1 + 0.1 0.2\r
-C xy-yz-zC 0.1 + 0.2 + 0.7 1.0\r
-D xy-yz-zD 0.1 + 0.2 + 0.8 1.1\r
-\r
-***/\r
-\r
-#define TRACE 0\r
-\r
-static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,\r
- unsigned LeavesUnderNode[])\r
- {\r
- if (tree.IsLeaf(uNodeIndex))\r
- {\r
- LeavesUnderNode[uNodeIndex] = 1;\r
- return 1;\r
- }\r
-\r
- const unsigned uLeft = tree.GetLeft(uNodeIndex);\r
- const unsigned uRight = tree.GetRight(uNodeIndex);\r
- const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);\r
- const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);\r
- const unsigned uCount = uRightCount + uLeftCount;\r
- LeavesUnderNode[uNodeIndex] = uCount;\r
- return uCount;\r
- }\r
-\r
-void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])\r
- {\r
-#if TRACE\r
- Log("CalcClustalWWeights\n");\r
- tree.LogMe();\r
-#endif\r
-\r
- const unsigned uLeafCount = tree.GetLeafCount();\r
- if (0 == uLeafCount)\r
- return;\r
- else if (1 == uLeafCount)\r
- {\r
- Weights[0] = (WEIGHT) 1.0;\r
- return;\r
- }\r
- else if (2 == uLeafCount)\r
- {\r
- Weights[0] = (WEIGHT) 0.5;\r
- Weights[1] = (WEIGHT) 0.5;\r
- return;\r
- }\r
-\r
- if (!tree.IsRooted())\r
- Quit("CalcClustalWWeights requires rooted tree");\r
-\r
- const unsigned uNodeCount = tree.GetNodeCount();\r
- unsigned *LeavesUnderNode = new unsigned[uNodeCount];\r
- memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));\r
-\r
- const unsigned uRootNodeIndex = tree.GetRootNodeIndex();\r
- unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);\r
- if (uLeavesUnderRoot != uLeafCount)\r
- Quit("WeightsFromTreee: Internal error, root count %u %u",\r
- uLeavesUnderRoot, uLeafCount);\r
-\r
-#if TRACE\r
- Log("Node Leaves Length Strength\n");\r
- Log("---- ------ -------- --------\n");\r
- // 1234 123456 12345678 12345678\r
-#endif\r
-\r
- double *Strengths = new double[uNodeCount];\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- if (tree.IsRoot(uNodeIndex))\r
- {\r
- Strengths[uNodeIndex] = 0.0;\r
- continue;\r
- }\r
- const unsigned uParent = tree.GetParent(uNodeIndex);\r
- const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);\r
- const unsigned uLeaves = LeavesUnderNode[uNodeIndex];\r
- const double dStrength = dLength / (double) uLeaves;\r
- Strengths[uNodeIndex] = dStrength;\r
-#if TRACE\r
- Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);\r
-#endif\r
- }\r
-\r
-#if TRACE\r
- Log("\n");\r
- Log(" Seq Path..Weight\n");\r
- Log("-------------------- ------------\n");\r
-#endif\r
- for (unsigned n = 0; n < uLeafCount; ++n)\r
- {\r
- const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);\r
-#if TRACE\r
- Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);\r
-#endif\r
- if (!tree.IsLeaf(uLeafNodeIndex))\r
- Quit("CalcClustalWWeights: leaf");\r
-\r
- double dWeight = 0;\r
- unsigned uNode = uLeafNodeIndex;\r
- while (!tree.IsRoot(uNode))\r
- {\r
- dWeight += Strengths[uNode];\r
- uNode = tree.GetParent(uNode);\r
-#if TRACE\r
- Log("->%u(%g)", uNode, Strengths[uNode]);\r
-#endif\r
- }\r
- if (dWeight < 0.0001)\r
- {\r
-#if TRACE\r
- Log("zero->one");\r
-#endif\r
- dWeight = 1.0;\r
- }\r
- Weights[n] = (WEIGHT) dWeight;\r
-#if TRACE\r
- Log(" = %g\n", dWeight);\r
-#endif\r
- }\r
-\r
- delete[] Strengths;\r
- delete[] LeavesUnderNode;\r
-\r
- Normalize(Weights, uLeafCount);\r
- }\r
-\r
-void MSA::SetClustalWWeights(const Tree &tree)\r
- {\r
- const unsigned uSeqCount = GetSeqCount();\r
- const unsigned uLeafCount = tree.GetLeafCount();\r
-\r
- WEIGHT *Weights = new WEIGHT[uSeqCount];\r
-\r
- CalcClustalWWeights(tree, Weights);\r
-\r
- for (unsigned n = 0; n < uLeafCount; ++n)\r
- {\r
- const WEIGHT w = Weights[n];\r
- const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);\r
- const unsigned uId = tree.GetLeafId(uLeafNodeIndex);\r
- const unsigned uSeqIndex = GetSeqIndex(uId);\r
-#if DEBUG\r
- if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))\r
- Quit("MSA::SetClustalWWeights: names don't match");\r
-#endif\r
- SetSeqWeight(uSeqIndex, w);\r
- }\r
- NormalizeWeights((WEIGHT) 1.0);\r
-\r
- delete[] Weights;\r
- }\r