Change Eclipse configuration
[jabaws.git] / website / archive / binaries / mac / src / muscle / threewaywt.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 #include <math.h>\r
4 \r
5 #define TRACE   0\r
6 \r
7 /***\r
8 Sequence weights derived from a tree using Gotoh's\r
9 three-way method.\r
10 \r
11         Gotoh (1995) CABIOS 11(5), 543-51.\r
12 \r
13 Each edge e is assigned a weight w(e).\r
14 \r
15 Consider first a tree with three leaves A,B and C\r
16 having branch lengths a, b and c, as follows.\r
17 \r
18             B\r
19             |\r
20             b\r
21             |\r
22     A---a---R---c---C\r
23 \r
24 The internal node is denoted by R.\r
25 \r
26 Define:\r
27 \r
28         S = (ab + ca + ab)\r
29         x = bc(a + b)(a + c)\r
30         y = a(b + c)FS\r
31 \r
32 Here F is a tunable normalization factor which is\r
33 approximately 1.0. Then the edge weight for AR\r
34 is computed as:\r
35 \r
36         w(AR) = sqrt(x/y)\r
37 \r
38 Similar expressions for the other edges follow by\r
39 symmetry.\r
40 \r
41 For a tree with more than three edges, the weight\r
42 of an edge that ends in a leaf is computed from\r
43 the three-way tree that includes the edge and\r
44 its two neighbors. The weight of an internal edge\r
45 is computed as the product of the weights for that\r
46 edge derived from the two three-way subtrees that\r
47 include that edge.\r
48 \r
49 For example, consider the following tree.\r
50 \r
51        B\r
52        |\r
53     A--R--V--C\r
54           |\r
55           D\r
56 \r
57 Here, w(RV) is computed as the product of the\r
58 two values for w(RV) derived from the three-way\r
59 trees with leaves ABV and RCD respectively.\r
60 \r
61 The calculation is done using "Gotoh lengths",\r
62 not the real edge lengths.\r
63 \r
64 The Gotoh length G of a directed edge is calculated\r
65 recursively as:\r
66 \r
67         G = d + LR/(L + R)\r
68 \r
69 where d is the length of the edge, and L and R are\r
70 the Gotoh lengths of the left and right edges adjoining\r
71 the terminal end of the edge. If the edge terminates on\r
72 a leaf, then G=d.\r
73 \r
74 Pairwise sequence weights are computed as the\r
75 product of edge weights on the path that connects\r
76 their leaves.\r
77 \r
78 If the tree is split into two subtrees by deleting\r
79 a given edge e, then the pairwise weights factorize.\r
80 For operations on profiles formed from the two\r
81 subtrees, it is possible to assign a weight to a\r
82 sequence as the product of edge weights on a path\r
83 from e to its leaf.\r
84 ***/\r
85 \r
86 // The xxxUnrooted functions present a rooted tree as\r
87 // if it had been unrooted by deleting the root node.\r
88 static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
89   unsigned uNode2)\r
90         {\r
91         if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
92                 Quit("GetFirstNeighborUnrooted, should never be called with root");\r
93         if (!tree.IsEdge(uNode1, uNode2))\r
94                 {\r
95                 if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
96                   !tree.IsRoot(tree.GetParent(uNode2)))\r
97                         Quit("GetFirstNeighborUnrooted, not edge");\r
98                 const unsigned uRoot = tree.GetRootNodeIndex();\r
99                 return tree.GetFirstNeighbor(uNode1, uRoot);\r
100                 }\r
101 \r
102         unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);\r
103         if (tree.IsRoot(uNeighbor))\r
104                 return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
105         return uNeighbor;\r
106         }\r
107 \r
108 static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
109   unsigned uNode2)\r
110         {\r
111         if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
112                 Quit("GetFirstNeighborUnrooted, should never be called with root");\r
113         if (!tree.IsEdge(uNode1, uNode2))\r
114                 {\r
115                 if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
116                   !tree.IsRoot(tree.GetParent(uNode2)))\r
117                         Quit("GetFirstNeighborUnrooted, not edge");\r
118                 const unsigned uRoot = tree.GetRootNodeIndex();\r
119                 return tree.GetSecondNeighbor(uNode1, uRoot);\r
120                 }\r
121 \r
122         unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);\r
123         if (tree.IsRoot(uNeighbor))\r
124                 return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
125         return uNeighbor;\r
126         }\r
127 \r
128 static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,\r
129   unsigned uSub)\r
130         {\r
131         unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);\r
132         if (tree.IsRoot(uNeighbor))\r
133                 return tree.GetFirstNeighbor(uNeighbor, uNode1);\r
134         return uNeighbor;\r
135         }\r
136 \r
137 static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,\r
138   unsigned uNode2)\r
139         {\r
140         if (tree.IsEdge(uNode1, uNode2))\r
141                 return tree.GetNeighborSubscript(uNode1, uNode2);\r
142         if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
143           !tree.IsRoot(tree.GetParent(uNode2)))\r
144                 Quit("GetNeighborSubscriptUnrooted, not edge");\r
145         for (unsigned uSub = 0; uSub < 3; ++uSub)\r
146                 if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)\r
147                         return uSub;\r
148         Quit("GetNeighborSubscriptUnrooted, not a neighbor");\r
149         return NULL_NEIGHBOR;\r
150         }\r
151 \r
152 static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,\r
153   unsigned uNode2)\r
154         {\r
155         if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
156                 Quit("GetEdgeLengthUnrooted, should never be called with root");\r
157         if (!tree.IsEdge(uNode1, uNode2))\r
158                 {\r
159                 if (!tree.IsRoot(tree.GetParent(uNode1)) ||\r
160                   !tree.IsRoot(tree.GetParent(uNode2)))\r
161                         Quit("GetEdgeLengthUnrooted, not edge");\r
162 \r
163                 const unsigned uRoot = tree.GetRootNodeIndex();\r
164                 return tree.GetEdgeLength(uNode1, uRoot) +\r
165                   tree.GetEdgeLength(uNode2, uRoot);\r
166                 }\r
167         return tree.GetEdgeLength(uNode1, uNode2);\r
168         }\r
169 \r
170 double GetGotohLength(const Tree &tree, unsigned R, unsigned A)\r
171         {\r
172         double dThis = GetEdgeLengthUnrooted(tree, R, A);\r
173 \r
174 // Enforce non-negative edge lengths\r
175         if (dThis < 0)\r
176                 dThis = 0;\r
177 \r
178         if (tree.IsLeaf(A))\r
179                 return dThis;\r
180 \r
181         const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);\r
182         const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);\r
183         const double dFirst = GetGotohLength(tree, A, uFirst);\r
184         const double dSecond = GetGotohLength(tree, A, uSecond);\r
185         const double dSum = dFirst + dSecond;\r
186         const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;\r
187         return dThis + dThird;\r
188         }\r
189 \r
190 // Return weight of edge A-R in three-way subtree that has\r
191 // leaves A,B,C and internal node R.\r
192 static double GotohWeightThreeWay(const Tree &tree, unsigned A,\r
193   unsigned B, unsigned C, unsigned R)\r
194         {\r
195         const double F = 1.0;\r
196 \r
197         if (tree.IsLeaf(R))\r
198                 Quit("GotohThreeWay: R must be internal node");\r
199 \r
200         double a = GetGotohLength(tree, R, A);\r
201         double b = GetGotohLength(tree, R, B);\r
202         double c = GetGotohLength(tree, R, C);\r
203 \r
204         double S = b*c + c*a + a*b;\r
205         double x = b*c*(a + b)*(a + c);\r
206         double y = a*(b + c)*F*S;\r
207 \r
208 // y is zero iff all three branch lengths are zero.\r
209         if (y < 0.001)\r
210                 return 1.0;\r
211         return sqrt(x/y);\r
212         }\r
213 \r
214 static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,\r
215   unsigned uNodeIndex2)\r
216         {\r
217         double w1 = 1.0;\r
218         double w2 = 1.0;\r
219         if (!tree.IsLeaf(uNodeIndex1))\r
220                 {\r
221                 unsigned R = uNodeIndex1;\r
222                 unsigned A = uNodeIndex2;\r
223                 unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
224                 unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
225                 w1 = GotohWeightThreeWay(tree, A, B, C, R);\r
226                 }\r
227         if (!tree.IsLeaf(uNodeIndex2))\r
228                 {\r
229                 unsigned R = uNodeIndex2;\r
230                 unsigned A = uNodeIndex1;\r
231                 unsigned B = GetFirstNeighborUnrooted(tree, R, A);\r
232                 unsigned C = GetSecondNeighborUnrooted(tree, R, A);\r
233                 w2 = GotohWeightThreeWay(tree, A, B, C, R);\r
234                 }\r
235         return w1*w2;\r
236         }\r
237 \r
238 void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)\r
239         {\r
240         const unsigned uNodeCount = tree.GetNodeCount();\r
241         for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
242                 {\r
243                 if (tree.IsRoot(uNodeIndex1))\r
244                         continue;\r
245                 for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
246                         {\r
247                         const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
248                         if (NULL_NEIGHBOR == uNodeIndex2)\r
249                                 continue;\r
250 \r
251                 // Avoid computing same edge twice in reversed order\r
252                         if (uNodeIndex2 < uNodeIndex1)\r
253                                 continue;\r
254 \r
255                         const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);\r
256                         const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);\r
257 #if     DEBUG\r
258                         {\r
259                         assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));\r
260                         assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));\r
261                         const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);\r
262                         if (!BTEq(w, wRev))\r
263                                 Quit("CalcThreeWayWeights: rev check failed %g %g",\r
264                                   w, wRev);\r
265                         }\r
266 #endif\r
267                         EdgeWeights[uNodeIndex1][uSub1] = w;\r
268                         EdgeWeights[uNodeIndex2][uSub2] = w;\r
269                         }\r
270                 }\r
271         }\r
272 \r
273 static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
274   double dPathWeight, WEIGHT *Weights)\r
275         {\r
276         if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))\r
277                 Quit("SetSeqWeights, should never be called with root");\r
278 \r
279         const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);\r
280         if (tree.IsLeaf(uNode2))\r
281                 {\r
282                 const unsigned Id = tree.GetLeafId(uNode2);\r
283                 Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);\r
284                 return;\r
285                 }\r
286         const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);\r
287         const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);\r
288         dPathWeight *= dThisLength;\r
289         SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);\r
290         SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);\r
291         }\r
292 \r
293 void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,\r
294   WEIGHT *Weights)\r
295         {\r
296 #if     TRACE\r
297         Log("CalcThreeWayEdgeWeights\n");\r
298         tree.LogMe();\r
299 #endif\r
300 \r
301         if (tree.IsRoot(uNode1))\r
302                 uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);\r
303         else if (tree.IsRoot(uNode2))\r
304                 uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);\r
305         const unsigned uNodeCount = tree.GetNodeCount();\r
306         WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];\r
307         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
308                 EdgeWeights[uNodeIndex] = new WEIGHT[3];\r
309 \r
310         CalcThreeWayEdgeWeights(tree, EdgeWeights);\r
311 \r
312 #if     TRACE\r
313         {\r
314         Log("Node1  Node2  Length   Gotoh  EdgeWt\n");\r
315         Log("-----  -----  ------  ------  ------\n");\r
316         for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)\r
317                 {\r
318                 if (tree.IsRoot(uNodeIndex1))\r
319                         continue;\r
320                 for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)\r
321                         {\r
322                         const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);\r
323                         if (NULL_NEIGHBOR == uNodeIndex2)\r
324                                 continue;\r
325                         if (uNodeIndex2 < uNodeIndex1)\r
326                                 continue;\r
327                         const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];\r
328                         const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);\r
329                         const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);\r
330                         Log("%5u  %5u  %6.3f  %6.3f  %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);\r
331                         }\r
332                 }\r
333         }\r
334 #endif\r
335 \r
336         SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);\r
337         SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);\r
338 \r
339         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
340                 delete[] EdgeWeights[uNodeIndex];\r
341         delete[] EdgeWeights;\r
342         }\r