8 Sequence weights derived from a tree using Gotoh's
\r
11 Gotoh (1995) CABIOS 11(5), 543-51.
\r
13 Each edge e is assigned a weight w(e).
\r
15 Consider first a tree with three leaves A,B and C
\r
16 having branch lengths a, b and c, as follows.
\r
24 The internal node is denoted by R.
\r
29 x = bc(a + b)(a + c)
\r
32 Here F is a tunable normalization factor which is
\r
33 approximately 1.0. Then the edge weight for AR
\r
38 Similar expressions for the other edges follow by
\r
41 For a tree with more than three edges, the weight
\r
42 of an edge that ends in a leaf is computed from
\r
43 the three-way tree that includes the edge and
\r
44 its two neighbors. The weight of an internal edge
\r
45 is computed as the product of the weights for that
\r
46 edge derived from the two three-way subtrees that
\r
49 For example, consider the following tree.
\r
57 Here, w(RV) is computed as the product of the
\r
58 two values for w(RV) derived from the three-way
\r
59 trees with leaves ABV and RCD respectively.
\r
61 The calculation is done using "Gotoh lengths",
\r
62 not the real edge lengths.
\r
64 The Gotoh length G of a directed edge is calculated
\r
69 where d is the length of the edge, and L and R are
\r
70 the Gotoh lengths of the left and right edges adjoining
\r
71 the terminal end of the edge. If the edge terminates on
\r
74 Pairwise sequence weights are computed as the
\r
75 product of edge weights on the path that connects
\r
78 If the tree is split into two subtrees by deleting
\r
79 a given edge e, then the pairwise weights factorize.
\r
80 For operations on profiles formed from the two
\r
81 subtrees, it is possible to assign a weight to a
\r
82 sequence as the product of edge weights on a path
\r
86 // The xxxUnrooted functions present a rooted tree as
\r
87 // if it had been unrooted by deleting the root node.
\r
88 static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,
\r
91 if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
\r
92 Quit("GetFirstNeighborUnrooted, should never be called with root");
\r
93 if (!tree.IsEdge(uNode1, uNode2))
\r
95 if (!tree.IsRoot(tree.GetParent(uNode1)) ||
\r
96 !tree.IsRoot(tree.GetParent(uNode2)))
\r
97 Quit("GetFirstNeighborUnrooted, not edge");
\r
98 const unsigned uRoot = tree.GetRootNodeIndex();
\r
99 return tree.GetFirstNeighbor(uNode1, uRoot);
\r
102 unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);
\r
103 if (tree.IsRoot(uNeighbor))
\r
104 return tree.GetFirstNeighbor(uNeighbor, uNode1);
\r
108 static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,
\r
111 if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
\r
112 Quit("GetFirstNeighborUnrooted, should never be called with root");
\r
113 if (!tree.IsEdge(uNode1, uNode2))
\r
115 if (!tree.IsRoot(tree.GetParent(uNode1)) ||
\r
116 !tree.IsRoot(tree.GetParent(uNode2)))
\r
117 Quit("GetFirstNeighborUnrooted, not edge");
\r
118 const unsigned uRoot = tree.GetRootNodeIndex();
\r
119 return tree.GetSecondNeighbor(uNode1, uRoot);
\r
122 unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);
\r
123 if (tree.IsRoot(uNeighbor))
\r
124 return tree.GetFirstNeighbor(uNeighbor, uNode1);
\r
128 static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,
\r
131 unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);
\r
132 if (tree.IsRoot(uNeighbor))
\r
133 return tree.GetFirstNeighbor(uNeighbor, uNode1);
\r
137 static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,
\r
140 if (tree.IsEdge(uNode1, uNode2))
\r
141 return tree.GetNeighborSubscript(uNode1, uNode2);
\r
142 if (!tree.IsRoot(tree.GetParent(uNode1)) ||
\r
143 !tree.IsRoot(tree.GetParent(uNode2)))
\r
144 Quit("GetNeighborSubscriptUnrooted, not edge");
\r
145 for (unsigned uSub = 0; uSub < 3; ++uSub)
\r
146 if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)
\r
148 Quit("GetNeighborSubscriptUnrooted, not a neighbor");
\r
149 return NULL_NEIGHBOR;
\r
152 static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,
\r
155 if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
\r
156 Quit("GetEdgeLengthUnrooted, should never be called with root");
\r
157 if (!tree.IsEdge(uNode1, uNode2))
\r
159 if (!tree.IsRoot(tree.GetParent(uNode1)) ||
\r
160 !tree.IsRoot(tree.GetParent(uNode2)))
\r
161 Quit("GetEdgeLengthUnrooted, not edge");
\r
163 const unsigned uRoot = tree.GetRootNodeIndex();
\r
164 return tree.GetEdgeLength(uNode1, uRoot) +
\r
165 tree.GetEdgeLength(uNode2, uRoot);
\r
167 return tree.GetEdgeLength(uNode1, uNode2);
\r
170 double GetGotohLength(const Tree &tree, unsigned R, unsigned A)
\r
172 double dThis = GetEdgeLengthUnrooted(tree, R, A);
\r
174 // Enforce non-negative edge lengths
\r
178 if (tree.IsLeaf(A))
\r
181 const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);
\r
182 const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);
\r
183 const double dFirst = GetGotohLength(tree, A, uFirst);
\r
184 const double dSecond = GetGotohLength(tree, A, uSecond);
\r
185 const double dSum = dFirst + dSecond;
\r
186 const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;
\r
187 return dThis + dThird;
\r
190 // Return weight of edge A-R in three-way subtree that has
\r
191 // leaves A,B,C and internal node R.
\r
192 static double GotohWeightThreeWay(const Tree &tree, unsigned A,
\r
193 unsigned B, unsigned C, unsigned R)
\r
195 const double F = 1.0;
\r
197 if (tree.IsLeaf(R))
\r
198 Quit("GotohThreeWay: R must be internal node");
\r
200 double a = GetGotohLength(tree, R, A);
\r
201 double b = GetGotohLength(tree, R, B);
\r
202 double c = GetGotohLength(tree, R, C);
\r
204 double S = b*c + c*a + a*b;
\r
205 double x = b*c*(a + b)*(a + c);
\r
206 double y = a*(b + c)*F*S;
\r
208 // y is zero iff all three branch lengths are zero.
\r
214 static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,
\r
215 unsigned uNodeIndex2)
\r
219 if (!tree.IsLeaf(uNodeIndex1))
\r
221 unsigned R = uNodeIndex1;
\r
222 unsigned A = uNodeIndex2;
\r
223 unsigned B = GetFirstNeighborUnrooted(tree, R, A);
\r
224 unsigned C = GetSecondNeighborUnrooted(tree, R, A);
\r
225 w1 = GotohWeightThreeWay(tree, A, B, C, R);
\r
227 if (!tree.IsLeaf(uNodeIndex2))
\r
229 unsigned R = uNodeIndex2;
\r
230 unsigned A = uNodeIndex1;
\r
231 unsigned B = GetFirstNeighborUnrooted(tree, R, A);
\r
232 unsigned C = GetSecondNeighborUnrooted(tree, R, A);
\r
233 w2 = GotohWeightThreeWay(tree, A, B, C, R);
\r
238 void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)
\r
240 const unsigned uNodeCount = tree.GetNodeCount();
\r
241 for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
\r
243 if (tree.IsRoot(uNodeIndex1))
\r
245 for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
\r
247 const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
\r
248 if (NULL_NEIGHBOR == uNodeIndex2)
\r
251 // Avoid computing same edge twice in reversed order
\r
252 if (uNodeIndex2 < uNodeIndex1)
\r
255 const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);
\r
256 const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);
\r
259 assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));
\r
260 assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));
\r
261 const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);
\r
262 if (!BTEq(w, wRev))
\r
263 Quit("CalcThreeWayWeights: rev check failed %g %g",
\r
267 EdgeWeights[uNodeIndex1][uSub1] = w;
\r
268 EdgeWeights[uNodeIndex2][uSub2] = w;
\r
273 static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
\r
274 double dPathWeight, WEIGHT *Weights)
\r
276 if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
\r
277 Quit("SetSeqWeights, should never be called with root");
\r
279 const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);
\r
280 if (tree.IsLeaf(uNode2))
\r
282 const unsigned Id = tree.GetLeafId(uNode2);
\r
283 Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);
\r
286 const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);
\r
287 const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);
\r
288 dPathWeight *= dThisLength;
\r
289 SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);
\r
290 SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);
\r
293 void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
\r
297 Log("CalcThreeWayEdgeWeights\n");
\r
301 if (tree.IsRoot(uNode1))
\r
302 uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);
\r
303 else if (tree.IsRoot(uNode2))
\r
304 uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);
\r
305 const unsigned uNodeCount = tree.GetNodeCount();
\r
306 WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];
\r
307 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
308 EdgeWeights[uNodeIndex] = new WEIGHT[3];
\r
310 CalcThreeWayEdgeWeights(tree, EdgeWeights);
\r
314 Log("Node1 Node2 Length Gotoh EdgeWt\n");
\r
315 Log("----- ----- ------ ------ ------\n");
\r
316 for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
\r
318 if (tree.IsRoot(uNodeIndex1))
\r
320 for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
\r
322 const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
\r
323 if (NULL_NEIGHBOR == uNodeIndex2)
\r
325 if (uNodeIndex2 < uNodeIndex1)
\r
327 const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];
\r
328 const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);
\r
329 const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);
\r
330 Log("%5u %5u %6.3f %6.3f %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);
\r
336 SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);
\r
337 SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);
\r
339 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
340 delete[] EdgeWeights[uNodeIndex];
\r
341 delete[] EdgeWeights;
\r