3 #include "edgelist.h"
\r
13 // Is data in this structure valid (i.e, has been set)?
\r
16 // Node at start of this edge
\r
19 // Node at end of this edge
\r
22 // Maximum distance from Node2 to a leaf
\r
23 double m_dMaxDistToLeaf;
\r
25 // Sum of distances from Node2 to all leaves under Node2
\r
26 double m_dTotalDistToLeaves;
\r
28 // Next node on path from Node2 to most distant leaf
\r
29 unsigned m_uMaxStep;
\r
31 // Most distant leaf from Node2 (used for debugging only)
\r
32 unsigned m_uMostDistantLeaf;
\r
34 // Number of leaves under Node2
\r
35 unsigned m_uLeafCount;
\r
38 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
\r
39 unsigned *ptruNode1, unsigned *ptruNode2,
\r
40 double *ptrdLength1, double *ptrdLength2);
\r
41 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
\r
42 unsigned *ptruNode1, unsigned *ptruNode2,
\r
43 double *ptrdLength1, double *ptrdLength2);
\r
45 static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)
\r
47 Log("Node1 Node2 MaxDist TotDist MostDist LeafCount Step\n");
\r
48 Log("----- ----- ------- ------- -------- --------- ----\n");
\r
49 // 12345 12345 1234567 1234567 12345678 123456789
\r
51 for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)
\r
52 for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)
\r
54 const EdgeInfo &EI = EIs[uNode][uNeighbor];
\r
57 Log("%5u %5u %7.3g %7.3g %8u %9u",
\r
60 EI.m_dMaxDistToLeaf,
\r
61 EI.m_dTotalDistToLeaves,
\r
62 EI.m_uMostDistantLeaf,
\r
64 if (NULL_NEIGHBOR != EI.m_uMaxStep)
\r
65 Log(" %4u", EI.m_uMaxStep);
\r
70 static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)
\r
72 const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);
\r
73 EdgeInfo &EI = EIs[uNode1][uNeighborIndex];
\r
74 EI.m_uNode1 = uNode1;
\r
75 EI.m_uNode2 = uNode2;
\r
77 if (tree.IsLeaf(uNode2))
\r
79 EI.m_dMaxDistToLeaf = 0;
\r
80 EI.m_dTotalDistToLeaves = 0;
\r
81 EI.m_uMaxStep = NULL_NEIGHBOR;
\r
82 EI.m_uMostDistantLeaf = uNode2;
\r
83 EI.m_uLeafCount = 1;
\r
88 double dMaxDistToLeaf = -1e29;
\r
89 double dTotalDistToLeaves = 0.0;
\r
90 unsigned uLeafCount = 0;
\r
91 unsigned uMostDistantLeaf = NULL_NEIGHBOR;
\r
92 unsigned uMaxStep = NULL_NEIGHBOR;
\r
94 const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);
\r
95 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
\r
97 const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);
\r
98 if (uNode3 == uNode1)
\r
100 const EdgeInfo &EINext = EIs[uNode2][uSub];
\r
101 if (!EINext.m_bSet)
\r
102 Quit("CalcInfo: internal error, dist %u->%u not known",
\r
106 uLeafCount += EINext.m_uLeafCount;
\r
108 const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);
\r
109 const double dTotalDist = EINext.m_dTotalDistToLeaves +
\r
110 EINext.m_uLeafCount*dEdgeLength;
\r
111 dTotalDistToLeaves += dTotalDist;
\r
113 const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;
\r
114 if (dDist > dMaxDistToLeaf)
\r
116 dMaxDistToLeaf = dDist;
\r
117 uMostDistantLeaf = EINext.m_uMostDistantLeaf;
\r
121 if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||
\r
123 Quit("CalcInfo: internal error 2");
\r
125 const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);
\r
126 EI.m_dMaxDistToLeaf = dMaxDistToLeaf;
\r
127 EI.m_dTotalDistToLeaves = dTotalDistToLeaves;
\r
128 EI.m_uMaxStep = uMaxStep;
\r
129 EI.m_uMostDistantLeaf = uMostDistantLeaf;
\r
130 EI.m_uLeafCount = uLeafCount;
\r
134 static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
\r
137 const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);
\r
138 return EIs[uNodeFrom][uSub].m_bSet;
\r
141 static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
\r
144 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);
\r
145 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
\r
147 unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);
\r
148 if (uNeighborIndex == uNodeFrom)
\r
150 if (!EIs[uNodeTo][uSub].m_bSet)
\r
156 void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
\r
157 double *ptrdLength1, double *ptrdLength2,
\r
163 if (tree.IsRooted())
\r
164 Quit("FindRoot: tree already rooted");
\r
166 const unsigned uNodeCount = tree.GetNodeCount();
\r
167 const unsigned uLeafCount = tree.GetLeafCount();
\r
169 if (uNodeCount < 2)
\r
170 Quit("Root: don't support trees with < 2 edges");
\r
172 EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
\r
173 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
174 EIs[uNodeIndex] = new EdgeInfo[3];
\r
177 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
178 if (tree.IsLeaf(uNodeIndex))
\r
180 unsigned uParent = tree.GetNeighbor1(uNodeIndex);
\r
181 Edges.Add(uParent, uNodeIndex);
\r
189 // Main loop: iterate until all distances known
\r
190 double dAllMaxDist = -1e20;
\r
191 unsigned uMaxFrom = NULL_NEIGHBOR;
\r
192 unsigned uMaxTo = NULL_NEIGHBOR;
\r
195 EdgeList NextEdges;
\r
198 Log("\nTop of main loop\n");
\r
202 ListEIs(EIs, uNodeCount);
\r
206 const unsigned uEdgeCount = Edges.GetCount();
\r
207 if (0 == uEdgeCount)
\r
209 for (unsigned n = 0; n < uEdgeCount; ++n)
\r
211 unsigned uNodeFrom;
\r
213 Edges.GetEdge(n, &uNodeFrom, &uNodeTo);
\r
215 CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
\r
217 Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
\r
219 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
\r
220 for (unsigned i = 0; i < uNeighborCount; ++i)
\r
222 const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
\r
223 if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
\r
224 AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
\r
225 NextEdges.Add(uNeighborIndex, uNodeFrom);
\r
228 Edges.Copy(NextEdges);
\r
232 ListEIs(EIs, uNodeCount);
\r
235 switch (RootMethod)
\r
237 case ROOT_MidLongestSpan:
\r
238 RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
\r
239 ptrdLength1, ptrdLength2);
\r
242 case ROOT_MinAvgLeafDist:
\r
243 RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
\r
244 ptrdLength1, ptrdLength2);
\r
248 Quit("Invalid RootMethod=%d", RootMethod);
\r
251 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
252 delete[] EIs[uNodeIndex];
\r
256 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
\r
257 unsigned *ptruNode1, unsigned *ptruNode2,
\r
258 double *ptrdLength1, double *ptrdLength2)
\r
260 const unsigned uNodeCount = tree.GetNodeCount();
\r
262 unsigned uLeaf1 = NULL_NEIGHBOR;
\r
263 unsigned uMostDistantLeaf = NULL_NEIGHBOR;
\r
264 double dMaxDist = -VERY_LARGE_DOUBLE;
\r
265 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
267 if (!tree.IsLeaf(uNodeIndex))
\r
270 const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
\r
271 if (NULL_NEIGHBOR == uNode2)
\r
272 Quit("RootByMidLongestSpan: internal error 0");
\r
273 const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
\r
274 const EdgeInfo &EI = EIs[uNodeIndex][0];
\r
276 Quit("RootByMidLongestSpan: internal error 1");
\r
277 if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
\r
278 Quit("RootByMidLongestSpan: internal error 2");
\r
279 const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
\r
280 if (dSpanLength > dMaxDist)
\r
282 dMaxDist = dSpanLength;
\r
283 uLeaf1 = uNodeIndex;
\r
284 uMostDistantLeaf = EI.m_uMostDistantLeaf;
\r
288 if (NULL_NEIGHBOR == uLeaf1)
\r
289 Quit("RootByMidLongestSpan: internal error 3");
\r
291 const double dTreeHeight = dMaxDist/2.0;
\r
292 unsigned uNode1 = uLeaf1;
\r
293 unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
\r
294 double dAccumSpanLength = 0;
\r
297 Log("RootByMidLongestSpan: span=%u", uLeaf1);
\r
302 const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
\r
304 Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
\r
306 if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
\r
308 *ptruNode1 = uNode1;
\r
309 *ptruNode2 = uNode2;
\r
310 *ptrdLength1 = dTreeHeight - dAccumSpanLength;
\r
311 *ptrdLength2 = dEdgeLength - *ptrdLength1;
\r
314 const EdgeInfo &EI = EIs[uLeaf1][0];
\r
316 Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
\r
317 uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
\r
323 if (tree.IsLeaf(uNode2))
\r
324 Quit("RootByMidLongestSpan: internal error 4");
\r
326 dAccumSpanLength += dEdgeLength;
\r
327 const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
\r
328 const EdgeInfo &EI = EIs[uNode1][uSub];
\r
330 Quit("RootByMidLongestSpan: internal error 5");
\r
333 uNode2 = EI.m_uMaxStep;
\r
338 Root by balancing average distance to leaves.
\r
339 The root is a point p such that the average
\r
340 distance to leaves to the left of p is the
\r
341 same as the to the right.
\r
343 This is the method used by CLUSTALW, which
\r
344 was originally used in PROFILEWEIGHT:
\r
346 Thompson et al. (1994) CABIOS (10) 1, 19-29.
\r
349 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
\r
350 unsigned *ptruNode1, unsigned *ptruNode2,
\r
351 double *ptrdLength1, double *ptrdLength2)
\r
353 const unsigned uNodeCount = tree.GetNodeCount();
\r
354 const unsigned uLeafCount = tree.GetLeafCount();
\r
355 unsigned uNode1 = NULL_NEIGHBOR;
\r
356 unsigned uNode2 = NULL_NEIGHBOR;
\r
357 double dMinHeight = VERY_LARGE_DOUBLE;
\r
358 double dBestLength1 = VERY_LARGE_DOUBLE;
\r
359 double dBestLength2 = VERY_LARGE_DOUBLE;
\r
361 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
363 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
\r
364 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
\r
366 const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);
\r
368 // Avoid visiting same edge a second time in reversed order.
\r
369 if (uNeighborIndex < uNodeIndex)
\r
372 const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);
\r
373 if (NULL_NEIGHBOR == uSubRev)
\r
374 Quit("RootByMinAvgLeafDist, internal error 1");
\r
376 // Get info for edges Node1->Node2 and Node2->Node1 (reversed)
\r
377 const EdgeInfo &EI = EIs[uNodeIndex][uSub];
\r
378 const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];
\r
380 if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||
\r
381 EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)
\r
382 Quit("RootByMinAvgLeafDist, internal error 2");
\r
384 Quit("RootByMinAvgLeafDist, internal error 3");
\r
385 if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)
\r
386 Quit("RootByMinAvgLeafDist, internal error 4");
\r
388 const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);
\r
389 if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))
\r
390 Quit("RootByMinAvgLeafDist, internal error 5");
\r
392 // Consider point p on edge 12 in tree (1=Node, 2=Neighbor).
\r
401 // ADLp = average distance to leaves to left of point p.
\r
402 // ADRp = average distance to leaves to right of point p.
\r
403 // L = edge length = distance 12
\r
405 // So distance p2 = L - x.
\r
406 // Average distance from p to leaves on left of p is:
\r
408 // Average distance from p to leaves on right of p is:
\r
409 // ADRp = ADR2 + (L - x)
\r
410 // To be a root, we require these two distances to be equal,
\r
412 // ADL1 + x = ADR2 + (L - x)
\r
414 // x = (ADR2 - ADL1 + L)/2
\r
415 // If 0 <= x <= L, we can place the root on edge 12.
\r
417 const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;
\r
418 const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;
\r
420 const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;
\r
421 if (x >= 0 && x <= dEdgeLength)
\r
423 const double dLength1 = x;
\r
424 const double dLength2 = dEdgeLength - x;
\r
425 const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;
\r
426 const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;
\r
427 const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;
\r
429 Log("Candidate root Node1=%u Node2=%u Height=%g\n",
\r
430 uNodeIndex, uNeighborIndex, dHeight);
\r
432 if (dHeight < dMinHeight)
\r
434 uNode1 = uNodeIndex;
\r
435 uNode2 = uNeighborIndex;
\r
436 dBestLength1 = dLength1;
\r
437 dBestLength2 = dLength2;
\r
438 dMinHeight = dHeight;
\r
444 if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)
\r
445 Quit("RootByMinAvgLeafDist, internal error 6");
\r
448 Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",
\r
449 uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);
\r
452 *ptruNode1 = uNode1;
\r
453 *ptruNode2 = uNode2;
\r
454 *ptrdLength1 = dBestLength1;
\r
455 *ptrdLength2 = dBestLength2;
\r
458 void FixRoot(Tree &tree, ROOT Method)
\r
460 if (!tree.IsRooted())
\r
461 Quit("FixRoot: expecting rooted tree");
\r
463 // Pseudo-root: keep root assigned by clustering
\r
464 if (ROOT_Pseudo == Method)
\r
467 tree.UnrootByDeletingRoot();
\r
468 tree.RootUnrootedTree(Method);
\r