Next version of JABA
[jabaws.git] / binaries / src / muscle / phy3.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 #include "edgelist.h"\r
4 \r
5 #define TRACE   0\r
6 \r
7 struct EdgeInfo\r
8         {\r
9         EdgeInfo()\r
10                 {\r
11                 m_bSet = false;\r
12                 }\r
13 // Is data in this structure valid (i.e, has been set)?\r
14         bool m_bSet;\r
15 \r
16 // Node at start of this edge\r
17         unsigned m_uNode1;\r
18 \r
19 // Node at end of this edge\r
20         unsigned m_uNode2;\r
21 \r
22 // Maximum distance from Node2 to a leaf\r
23         double m_dMaxDistToLeaf;\r
24 \r
25 // Sum of distances from Node2 to all leaves under Node2\r
26         double m_dTotalDistToLeaves;\r
27 \r
28 // Next node on path from Node2 to most distant leaf\r
29         unsigned m_uMaxStep;\r
30 \r
31 // Most distant leaf from Node2 (used for debugging only)\r
32         unsigned m_uMostDistantLeaf;\r
33 \r
34 // Number of leaves under Node2\r
35         unsigned m_uLeafCount;\r
36         };\r
37 \r
38 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,\r
39   unsigned *ptruNode1, unsigned *ptruNode2,\r
40   double *ptrdLength1, double *ptrdLength2);\r
41 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,\r
42   unsigned *ptruNode1, unsigned *ptruNode2,\r
43   double *ptrdLength1, double *ptrdLength2);\r
44 \r
45 static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)\r
46         {\r
47         Log("Node1  Node2  MaxDist  TotDist  MostDist  LeafCount  Step\n");\r
48         Log("-----  -----  -------  -------  --------  ---------  ----\n");\r
49         //    12345  12345  1234567  1234567  12345678  123456789\r
50 \r
51         for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)\r
52                 for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)\r
53                         {\r
54                         const EdgeInfo &EI = EIs[uNode][uNeighbor];\r
55                         if (!EI.m_bSet)\r
56                                 continue;\r
57                         Log("%5u  %5u  %7.3g  %7.3g  %8u  %9u",\r
58                           EI.m_uNode1,\r
59                           EI.m_uNode2,\r
60                           EI.m_dMaxDistToLeaf,\r
61                           EI.m_dTotalDistToLeaves,\r
62                           EI.m_uMostDistantLeaf,\r
63                           EI.m_uLeafCount);\r
64                         if (NULL_NEIGHBOR != EI.m_uMaxStep)\r
65                                 Log("  %4u", EI.m_uMaxStep);\r
66                         Log("\n");\r
67                         }\r
68         }\r
69 \r
70 static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)\r
71         {\r
72         const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);\r
73         EdgeInfo &EI = EIs[uNode1][uNeighborIndex];\r
74         EI.m_uNode1 = uNode1;\r
75         EI.m_uNode2 = uNode2;\r
76 \r
77         if (tree.IsLeaf(uNode2))\r
78                 {\r
79                 EI.m_dMaxDistToLeaf = 0;\r
80                 EI.m_dTotalDistToLeaves = 0;\r
81                 EI.m_uMaxStep = NULL_NEIGHBOR;\r
82                 EI.m_uMostDistantLeaf = uNode2;\r
83                 EI.m_uLeafCount = 1;\r
84                 EI.m_bSet = true;\r
85                 return;\r
86                 }\r
87 \r
88         double dMaxDistToLeaf = -1e29;\r
89         double dTotalDistToLeaves = 0.0;\r
90         unsigned uLeafCount = 0;\r
91         unsigned uMostDistantLeaf = NULL_NEIGHBOR;\r
92         unsigned uMaxStep = NULL_NEIGHBOR;\r
93 \r
94         const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);\r
95         for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
96                 {\r
97                 const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);\r
98                 if (uNode3 == uNode1)\r
99                         continue;\r
100                 const EdgeInfo &EINext = EIs[uNode2][uSub];\r
101                 if (!EINext.m_bSet)\r
102                         Quit("CalcInfo: internal error, dist %u->%u not known",\r
103                                 uNode2, uNode3);\r
104 \r
105 \r
106                 uLeafCount += EINext.m_uLeafCount;\r
107 \r
108                 const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);\r
109                 const double dTotalDist = EINext.m_dTotalDistToLeaves +\r
110                   EINext.m_uLeafCount*dEdgeLength;\r
111                 dTotalDistToLeaves += dTotalDist;\r
112 \r
113                 const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;\r
114                 if (dDist > dMaxDistToLeaf)\r
115                         {\r
116                         dMaxDistToLeaf = dDist;\r
117                         uMostDistantLeaf = EINext.m_uMostDistantLeaf;\r
118                         uMaxStep = uNode3;\r
119                         }\r
120                 }\r
121         if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||\r
122           0 == uLeafCount)\r
123                 Quit("CalcInfo: internal error 2");\r
124 \r
125         const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);\r
126         EI.m_dMaxDistToLeaf = dMaxDistToLeaf;\r
127         EI.m_dTotalDistToLeaves = dTotalDistToLeaves;\r
128         EI.m_uMaxStep = uMaxStep;\r
129         EI.m_uMostDistantLeaf = uMostDistantLeaf;\r
130         EI.m_uLeafCount = uLeafCount;\r
131         EI.m_bSet = true;\r
132         }\r
133 \r
134 static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,\r
135   unsigned uNodeTo)\r
136         {\r
137         const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);\r
138         return EIs[uNodeFrom][uSub].m_bSet;\r
139         }\r
140 \r
141 static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,\r
142   unsigned uNodeTo)\r
143         {\r
144         const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);\r
145         for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
146                 {\r
147                 unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);\r
148                 if (uNeighborIndex == uNodeFrom)\r
149                         continue;\r
150                 if (!EIs[uNodeTo][uSub].m_bSet)\r
151                         return false;\r
152                 }\r
153         return true;\r
154         }\r
155 \r
156 void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,\r
157   double *ptrdLength1, double *ptrdLength2,\r
158   ROOT RootMethod)\r
159         {\r
160 #if     TRACE\r
161         tree.LogMe();\r
162 #endif\r
163         if (tree.IsRooted())\r
164                 Quit("FindRoot: tree already rooted");\r
165 \r
166         const unsigned uNodeCount = tree.GetNodeCount();\r
167         const unsigned uLeafCount = tree.GetLeafCount();\r
168 \r
169         if (uNodeCount < 2)\r
170                 Quit("Root: don't support trees with < 2 edges");\r
171 \r
172         EdgeInfo **EIs = new EdgeInfo *[uNodeCount];\r
173         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
174                 EIs[uNodeIndex] = new EdgeInfo[3];\r
175 \r
176         EdgeList Edges;\r
177         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
178                 if (tree.IsLeaf(uNodeIndex))\r
179                         {\r
180                         unsigned uParent = tree.GetNeighbor1(uNodeIndex);\r
181                         Edges.Add(uParent, uNodeIndex);\r
182                         }\r
183 \r
184 #if     TRACE\r
185         Log("Edges: ");\r
186         Edges.LogMe();\r
187 #endif\r
188 \r
189 // Main loop: iterate until all distances known\r
190         double dAllMaxDist = -1e20;\r
191         unsigned uMaxFrom = NULL_NEIGHBOR;\r
192         unsigned uMaxTo = NULL_NEIGHBOR;\r
193         for (;;)\r
194                 {\r
195                 EdgeList NextEdges;\r
196 \r
197 #if     TRACE\r
198                 Log("\nTop of main loop\n");\r
199                 Log("Edges: ");\r
200                 Edges.LogMe();\r
201                 Log("MDs:\n");\r
202                 ListEIs(EIs, uNodeCount);\r
203 #endif\r
204 \r
205         // For all edges\r
206                 const unsigned uEdgeCount = Edges.GetCount();\r
207                 if (0 == uEdgeCount)\r
208                         break;\r
209                 for (unsigned n = 0; n < uEdgeCount; ++n)\r
210                         {\r
211                         unsigned uNodeFrom;\r
212                         unsigned uNodeTo;\r
213                         Edges.GetEdge(n, &uNodeFrom, &uNodeTo);\r
214 \r
215                         CalcInfo(tree, uNodeFrom, uNodeTo, EIs);\r
216 #if     TRACE\r
217                         Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);\r
218 #endif\r
219                         const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);\r
220                         for (unsigned i = 0; i < uNeighborCount; ++i)\r
221                                 {\r
222                                 const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);\r
223                                 if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&\r
224                                   AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))\r
225                                         NextEdges.Add(uNeighborIndex, uNodeFrom);\r
226                                 }\r
227                         }\r
228                 Edges.Copy(NextEdges);\r
229                 }\r
230 \r
231 #if     TRACE\r
232         ListEIs(EIs, uNodeCount);\r
233 #endif\r
234 \r
235         switch (RootMethod)\r
236                 {\r
237         case ROOT_MidLongestSpan:\r
238                 RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,\r
239                   ptrdLength1, ptrdLength2);\r
240                 break;\r
241 \r
242         case ROOT_MinAvgLeafDist:\r
243                 RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,\r
244                   ptrdLength1, ptrdLength2);\r
245                 break;\r
246 \r
247         default:\r
248                 Quit("Invalid RootMethod=%d", RootMethod);\r
249                 }\r
250 \r
251         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
252                 delete[] EIs[uNodeIndex];\r
253         delete[] EIs;\r
254         }\r
255 \r
256 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,\r
257   unsigned *ptruNode1, unsigned *ptruNode2,\r
258   double *ptrdLength1, double *ptrdLength2)\r
259         {\r
260         const unsigned uNodeCount = tree.GetNodeCount();\r
261 \r
262         unsigned uLeaf1 = NULL_NEIGHBOR;\r
263         unsigned uMostDistantLeaf = NULL_NEIGHBOR;\r
264         double dMaxDist = -VERY_LARGE_DOUBLE;\r
265         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
266                 {\r
267                 if (!tree.IsLeaf(uNodeIndex))\r
268                         continue;\r
269 \r
270                 const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);\r
271                 if (NULL_NEIGHBOR == uNode2)\r
272                         Quit("RootByMidLongestSpan: internal error 0");\r
273                 const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);\r
274                 const EdgeInfo &EI = EIs[uNodeIndex][0];\r
275                 if (!EI.m_bSet)\r
276                         Quit("RootByMidLongestSpan: internal error 1");\r
277                 if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)\r
278                         Quit("RootByMidLongestSpan: internal error 2");\r
279                 const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;\r
280                 if (dSpanLength > dMaxDist)\r
281                         {\r
282                         dMaxDist = dSpanLength;\r
283                         uLeaf1 = uNodeIndex;\r
284                         uMostDistantLeaf = EI.m_uMostDistantLeaf;\r
285                         }\r
286                 }\r
287         \r
288         if (NULL_NEIGHBOR == uLeaf1)\r
289                 Quit("RootByMidLongestSpan: internal error 3");\r
290 \r
291         const double dTreeHeight = dMaxDist/2.0;\r
292         unsigned uNode1 = uLeaf1;\r
293         unsigned uNode2 = tree.GetNeighbor1(uLeaf1);\r
294         double dAccumSpanLength = 0;\r
295 \r
296 #if     TRACE\r
297         Log("RootByMidLongestSpan: span=%u", uLeaf1);\r
298 #endif\r
299 \r
300         for (;;)\r
301                 {\r
302                 const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);\r
303 #if     TRACE\r
304                 Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);\r
305 #endif\r
306                 if (dAccumSpanLength + dEdgeLength >= dTreeHeight)\r
307                         {\r
308                         *ptruNode1 = uNode1;\r
309                         *ptruNode2 = uNode2;\r
310                         *ptrdLength1 = dTreeHeight - dAccumSpanLength;\r
311                         *ptrdLength2 = dEdgeLength - *ptrdLength1;\r
312 #if     TRACE\r
313                         {\r
314                         const EdgeInfo &EI = EIs[uLeaf1][0];\r
315                         Log("...\n");\r
316                         Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",\r
317                           uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);\r
318                         }\r
319 #endif\r
320                         return;\r
321                         }\r
322 \r
323                 if (tree.IsLeaf(uNode2))\r
324                         Quit("RootByMidLongestSpan: internal error 4");\r
325 \r
326                 dAccumSpanLength += dEdgeLength;\r
327                 const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);\r
328                 const EdgeInfo &EI = EIs[uNode1][uSub];\r
329                 if (!EI.m_bSet)\r
330                         Quit("RootByMidLongestSpan: internal error 5");\r
331 \r
332                 uNode1 = uNode2;\r
333                 uNode2 = EI.m_uMaxStep;\r
334                 }\r
335         }\r
336 \r
337 /***\r
338 Root by balancing average distance to leaves.\r
339 The root is a point p such that the average\r
340 distance to leaves to the left of p is the\r
341 same as the to the right.\r
342 \r
343 This is the method used by CLUSTALW, which\r
344 was originally used in PROFILEWEIGHT:\r
345 \r
346         Thompson et al. (1994) CABIOS (10) 1, 19-29.\r
347 ***/\r
348 \r
349 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,\r
350   unsigned *ptruNode1, unsigned *ptruNode2,\r
351   double *ptrdLength1, double *ptrdLength2)\r
352         {\r
353         const unsigned uNodeCount = tree.GetNodeCount();\r
354         const unsigned uLeafCount = tree.GetLeafCount();\r
355         unsigned uNode1 = NULL_NEIGHBOR;\r
356         unsigned uNode2 = NULL_NEIGHBOR;\r
357         double dMinHeight = VERY_LARGE_DOUBLE;\r
358         double dBestLength1 = VERY_LARGE_DOUBLE;\r
359         double dBestLength2 = VERY_LARGE_DOUBLE;\r
360 \r
361         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
362                 {\r
363                 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);\r
364                 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)\r
365                         {\r
366                         const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);\r
367 \r
368                 // Avoid visiting same edge a second time in reversed order.\r
369                         if (uNeighborIndex < uNodeIndex)\r
370                                 continue;\r
371 \r
372                         const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);\r
373                         if (NULL_NEIGHBOR == uSubRev)\r
374                                 Quit("RootByMinAvgLeafDist, internal error 1");\r
375 \r
376                 // Get info for edges Node1->Node2 and Node2->Node1 (reversed)\r
377                         const EdgeInfo &EI = EIs[uNodeIndex][uSub];\r
378                         const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];\r
379 \r
380                         if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||\r
381                           EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)\r
382                                 Quit("RootByMinAvgLeafDist, internal error 2");\r
383                         if (!EI.m_bSet)\r
384                                 Quit("RootByMinAvgLeafDist, internal error 3");\r
385                         if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)\r
386                                 Quit("RootByMinAvgLeafDist, internal error 4");\r
387 \r
388                         const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);\r
389                         if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))\r
390                                 Quit("RootByMinAvgLeafDist, internal error 5");\r
391 \r
392                 // Consider point p on edge 12 in tree (1=Node, 2=Neighbor).\r
393                 //\r
394         //      -----         ----\r
395         //           |       |\r
396         //           1----p--2\r
397         //           |       |\r
398         //      -----         ----\r
399                 //\r
400                 // Define:\r
401                 //    ADLp = average distance to leaves to left of point p.\r
402                 //        ADRp = average distance to leaves to right of point p.\r
403                 //        L = edge length = distance 12\r
404                 //    x = distance 1p\r
405                 // So distance p2 = L - x.\r
406                 // Average distance from p to leaves on left of p is:\r
407                 //              ADLp = ADL1 + x\r
408                 // Average distance from p to leaves on right of p is:\r
409                 //              ADRp = ADR2 + (L - x)\r
410                 // To be a root, we require these two distances to be equal,\r
411                 //              ADLp = ADRp\r
412                 //              ADL1 + x = ADR2 + (L - x)\r
413                 // Solving for x,\r
414                 //              x = (ADR2 - ADL1 + L)/2\r
415                 // If 0 <= x <= L, we can place the root on edge 12.\r
416 \r
417                         const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;\r
418                         const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;\r
419 \r
420                         const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;\r
421                         if (x >= 0 && x <= dEdgeLength)\r
422                                 {\r
423                                 const double dLength1 = x;\r
424                                 const double dLength2 = dEdgeLength - x;\r
425                                 const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;\r
426                                 const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;\r
427                                 const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;\r
428 #if     TRACE\r
429                                 Log("Candidate root Node1=%u Node2=%u Height=%g\n",\r
430                                   uNodeIndex, uNeighborIndex, dHeight);\r
431 #endif\r
432                                 if (dHeight < dMinHeight)\r
433                                         {\r
434                                         uNode1 = uNodeIndex;\r
435                                         uNode2 = uNeighborIndex;\r
436                                         dBestLength1 = dLength1;\r
437                                         dBestLength2 = dLength2;\r
438                                         dMinHeight = dHeight;\r
439                                         }\r
440                                 }\r
441                         }\r
442                 }\r
443 \r
444         if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)\r
445                 Quit("RootByMinAvgLeafDist, internal error 6");\r
446 \r
447 #if     TRACE\r
448         Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",\r
449           uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);\r
450 #endif\r
451 \r
452         *ptruNode1 = uNode1;\r
453         *ptruNode2 = uNode2;\r
454         *ptrdLength1 = dBestLength1;\r
455         *ptrdLength2 = dBestLength2;\r
456         }\r
457 \r
458 void FixRoot(Tree &tree, ROOT Method)\r
459         {\r
460         if (!tree.IsRooted())\r
461                 Quit("FixRoot: expecting rooted tree");\r
462 \r
463         // Pseudo-root: keep root assigned by clustering\r
464         if (ROOT_Pseudo == Method)\r
465                 return;\r
466 \r
467         tree.UnrootByDeletingRoot();\r
468         tree.RootUnrootedTree(Method);\r
469         }\r