6 Compute weights by the CLUSTALW method.
\r
7 Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
\r
8 see also CLUSTALW paper.
\r
10 Weights are computed from the edge lengths of a rooted tree.
\r
12 Define the strength of an edge to be its length divided by the number
\r
13 of leaves under that edge. The weight of a sequence is then the sum
\r
14 of edge strengths on the path from the root to the leaf.
\r
21 --------y ----------- C
\r
23 0.4 -------------- D
\r
26 Edge Length Leaves Strength
\r
27 ---- ----- ------ --------
\r
35 Leaf Path Strengths Weight
\r
36 ---- ---- --------- ------
\r
38 B xy-yB 0.1 + 0.1 0.2
\r
39 C xy-yz-zC 0.1 + 0.2 + 0.7 1.0
\r
40 D xy-yz-zD 0.1 + 0.2 + 0.8 1.1
\r
46 static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
\r
47 unsigned LeavesUnderNode[])
\r
49 if (tree.IsLeaf(uNodeIndex))
\r
51 LeavesUnderNode[uNodeIndex] = 1;
\r
55 const unsigned uLeft = tree.GetLeft(uNodeIndex);
\r
56 const unsigned uRight = tree.GetRight(uNodeIndex);
\r
57 const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
\r
58 const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
\r
59 const unsigned uCount = uRightCount + uLeftCount;
\r
60 LeavesUnderNode[uNodeIndex] = uCount;
\r
64 void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
\r
67 Log("CalcClustalWWeights\n");
\r
71 const unsigned uLeafCount = tree.GetLeafCount();
\r
72 if (0 == uLeafCount)
\r
74 else if (1 == uLeafCount)
\r
76 Weights[0] = (WEIGHT) 1.0;
\r
79 else if (2 == uLeafCount)
\r
81 Weights[0] = (WEIGHT) 0.5;
\r
82 Weights[1] = (WEIGHT) 0.5;
\r
86 if (!tree.IsRooted())
\r
87 Quit("CalcClustalWWeights requires rooted tree");
\r
89 const unsigned uNodeCount = tree.GetNodeCount();
\r
90 unsigned *LeavesUnderNode = new unsigned[uNodeCount];
\r
91 memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
\r
93 const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
\r
94 unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
\r
95 if (uLeavesUnderRoot != uLeafCount)
\r
96 Quit("WeightsFromTreee: Internal error, root count %u %u",
\r
97 uLeavesUnderRoot, uLeafCount);
\r
100 Log("Node Leaves Length Strength\n");
\r
101 Log("---- ------ -------- --------\n");
\r
102 // 1234 123456 12345678 12345678
\r
105 double *Strengths = new double[uNodeCount];
\r
106 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
108 if (tree.IsRoot(uNodeIndex))
\r
110 Strengths[uNodeIndex] = 0.0;
\r
113 const unsigned uParent = tree.GetParent(uNodeIndex);
\r
114 const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
\r
115 const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
\r
116 const double dStrength = dLength / (double) uLeaves;
\r
117 Strengths[uNodeIndex] = dStrength;
\r
119 Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
\r
125 Log(" Seq Path..Weight\n");
\r
126 Log("-------------------- ------------\n");
\r
128 for (unsigned n = 0; n < uLeafCount; ++n)
\r
130 const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
\r
132 Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
\r
134 if (!tree.IsLeaf(uLeafNodeIndex))
\r
135 Quit("CalcClustalWWeights: leaf");
\r
137 double dWeight = 0;
\r
138 unsigned uNode = uLeafNodeIndex;
\r
139 while (!tree.IsRoot(uNode))
\r
141 dWeight += Strengths[uNode];
\r
142 uNode = tree.GetParent(uNode);
\r
144 Log("->%u(%g)", uNode, Strengths[uNode]);
\r
147 if (dWeight < 0.0001)
\r
154 Weights[n] = (WEIGHT) dWeight;
\r
156 Log(" = %g\n", dWeight);
\r
160 delete[] Strengths;
\r
161 delete[] LeavesUnderNode;
\r
163 Normalize(Weights, uLeafCount);
\r
166 void MSA::SetClustalWWeights(const Tree &tree)
\r
168 const unsigned uSeqCount = GetSeqCount();
\r
169 const unsigned uLeafCount = tree.GetLeafCount();
\r
171 WEIGHT *Weights = new WEIGHT[uSeqCount];
\r
173 CalcClustalWWeights(tree, Weights);
\r
175 for (unsigned n = 0; n < uLeafCount; ++n)
\r
177 const WEIGHT w = Weights[n];
\r
178 const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
\r
179 const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
\r
180 const unsigned uSeqIndex = GetSeqIndex(uId);
\r
182 if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
\r
183 Quit("MSA::SetClustalWWeights: names don't match");
\r
185 SetSeqWeight(uSeqIndex, w);
\r
187 NormalizeWeights((WEIGHT) 1.0);
\r