Change Eclipse configuration
[jabaws.git] / website / archive / binaries / mac / src / muscle / difftrees.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 \r
4 #define TRACE   0\r
5 \r
6 /***\r
7 Algorithm to compare two trees, X and Y.\r
8 \r
9 A node x in X and node y in Y are defined to be\r
10 similar iff the set of leaves in the subtree under\r
11 x is identical to the set of leaves under y.\r
12 \r
13 A node is defined to be dissimilar iff it is not\r
14 similar to any node in the other tree.\r
15 \r
16 Nodes x and y are defined to be married iff every\r
17 node in the subtree under x is similar to a node\r
18 in the subtree under y. Married nodes are considered\r
19 to be equal. The subtrees under two married nodes can\r
20 at most differ by exchanges of left and right branches,\r
21 which we do not consider to be significant here.\r
22 \r
23 A node is defined to be a bachelor iff it is not\r
24 married. If a node is a bachelor, then it has a\r
25 dissimilar node in its subtree, and it follows\r
26 immediately from the definition of marriage that its\r
27 parent is also a bachelor. Hence all nodes on the path\r
28 from a bachelor node to the root are bachelors.\r
29 \r
30 We assume the trees have the same set of leaves, so\r
31 every leaf is trivially both similar and married to\r
32 the same leaf in the opposite tree. Bachelor nodes\r
33 are therefore always internal (i.e., non-leaf) nodes.\r
34 \r
35 A node is defined to be a diff iff (a) it is married\r
36 and (b) its parent is a bachelor. The subtree under\r
37 a diff is maximally similar to the other tree. (In\r
38 other words, you cannot extend the subtree without\r
39 adding a bachelor). \r
40 \r
41 The set of diffs is the subset of the two trees that\r
42 we consider to be identical.\r
43 \r
44 Example:\r
45 \r
46               -----A\r
47         -----k\r
48    ----j      -----B\r
49 --i     -----C\r
50    ------D\r
51 \r
52 \r
53               -----A\r
54         -----p\r
55    ----n      -----B\r
56 --m     -----D\r
57    ------C\r
58 \r
59 \r
60 The following pairs of internal nodes are similar.\r
61 \r
62         Nodes   Set of leaves\r
63         -----   -------------\r
64         k,p             A,B\r
65         i,m             A,B,C,D\r
66 \r
67 Bachelors in the first tree are i and j, bachelors\r
68 in the second tree are m and n.\r
69 \r
70 Node k and p are married, but i and m are not (because j\r
71 and n are bachelors). The diffs are C, D and k.\r
72 \r
73 The set of bachelor nodes can be viewed as the internal\r
74 nodes of a tree, the leaves of which are diffs. (To see\r
75 that there can't be disjoint subtrees, note that the path\r
76 from a diff to a root is all bachelor nodes, so there is\r
77 always a path between two diffs that goes through the root).\r
78 We call this tree the "diffs tree".\r
79 \r
80 There is a simple O(N) algorithm to build the diffs tree.\r
81 To achieve O(N) we avoid traversing a given subtree multiple\r
82 times and also avoid comparing lists of leaves. \r
83 \r
84 We visit nodes in depth-first order (i.e., a node is visited\r
85 before its parent).\r
86 \r
87 If either child of a node is a bachelor, we flag it as\r
88 a bachelor.\r
89 \r
90 If both children of the node we are visiting are married,\r
91 we check whether the spouses of those children have the\r
92 same parent in the other tree. If the parents are different,\r
93 the current node is a bachelor. If they have the same parent,\r
94 then the node we are visiting is the spouse of that parent.\r
95 We assign this newly identified married couple a unique integer\r
96 id. The id of a node is in one-to-one correspondence with the\r
97 set of leaves in its subtree. Two nodes have the same set of\r
98 leaves iff they have the same id. Bachelor nodes do not get\r
99 an id.\r
100 ***/\r
101 \r
102 static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,\r
103   const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,\r
104   unsigned IdToDiffsLeafNodeIndex[])\r
105         {\r
106 #if     TRACE\r
107         Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",\r
108           uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));\r
109 #endif\r
110         if (bIsDiff[uTreeNodeIndex])\r
111                 {\r
112                 unsigned uLeafCount = tree.GetLeafCount();\r
113                 unsigned *Leaves = new unsigned[uLeafCount];\r
114                 GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);\r
115                 for (unsigned n = 0; n < uLeafCount; ++n)\r
116                         {\r
117                         const unsigned uLeafNodeIndex = Leaves[n];\r
118                         const unsigned uId = tree.GetLeafId(uLeafNodeIndex);\r
119                         if (uId >= tree.GetLeafCount())\r
120                                 Quit("BuildDiffs, id out of range");\r
121                         IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;\r
122 #if     TRACE\r
123                         Log("  Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);\r
124 #endif\r
125                         }\r
126                 delete[] Leaves;\r
127                 return;\r
128                 }\r
129 \r
130         if (tree.IsLeaf(uTreeNodeIndex))\r
131                 Quit("BuildDiffs: should never reach leaf");\r
132 \r
133         const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);\r
134         const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);\r
135 \r
136         const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);\r
137         const unsigned uDiffsRight = uDiffsLeft + 1;\r
138 \r
139         BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);\r
140         BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);\r
141         }\r
142 \r
143 void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,\r
144   unsigned IdToDiffsLeafNodeIndex[])\r
145         {\r
146 #if     TRACE\r
147         Log("Tree1:\n");\r
148         Tree1.LogMe();\r
149         Log("\n");\r
150         Log("Tree2:\n");\r
151         Tree2.LogMe();\r
152 #endif\r
153 \r
154         if (!Tree1.IsRooted() || !Tree2.IsRooted())\r
155                 Quit("DiffTrees: requires rooted trees");\r
156 \r
157         const unsigned uNodeCount = Tree1.GetNodeCount();\r
158         const unsigned uNodeCount2 = Tree2.GetNodeCount();\r
159         \r
160         const unsigned uLeafCount = Tree1.GetLeafCount();\r
161         const unsigned uLeafCount2 = Tree2.GetLeafCount();\r
162         assert(uLeafCount == uLeafCount2);\r
163 \r
164         if (uNodeCount != uNodeCount2)\r
165                 Quit("DiffTrees: different node counts");\r
166 \r
167 // Allocate tables so we can convert tree node index to\r
168 // and from the unique id with a O(1) lookup.\r
169         unsigned *NodeIndexToId1 = new unsigned[uNodeCount];\r
170         unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];\r
171 \r
172         bool *bIsBachelor1 = new bool[uNodeCount];\r
173         bool *bIsDiff1 = new bool[uNodeCount];\r
174 \r
175         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
176                 {\r
177                 NodeIndexToId1[uNodeIndex] = uNodeCount;\r
178                 bIsBachelor1[uNodeIndex] = false;\r
179                 bIsDiff1[uNodeIndex] = false;\r
180 \r
181         // Use uNodeCount as value meaning "not set".\r
182                 IdToNodeIndex2[uNodeIndex] = uNodeCount;\r
183                 }\r
184 \r
185 // Initialize node index <-> id lookup tables\r
186         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
187                 {\r
188                 if (Tree1.IsLeaf(uNodeIndex))\r
189                         {\r
190                         const unsigned uId = Tree1.GetLeafId(uNodeIndex);\r
191                         if (uId >= uNodeCount)\r
192                                 Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");\r
193                         NodeIndexToId1[uNodeIndex] = uId;\r
194                         }\r
195 \r
196                 if (Tree2.IsLeaf(uNodeIndex))\r
197                         {\r
198                         const unsigned uId = Tree2.GetLeafId(uNodeIndex);\r
199                         if (uId >= uNodeCount)\r
200                                 Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");\r
201                         IdToNodeIndex2[uId] = uNodeIndex;\r
202                         }\r
203                 }\r
204 \r
205 // Validity check. This verifies that the ids\r
206 // pre-assigned to the leaves in Tree1 are unique\r
207 // (note that the id<N check above does not rule\r
208 // out two leaves having duplicate ids).\r
209         for (unsigned uId = 0; uId < uLeafCount; ++uId)\r
210                 {\r
211                 unsigned uNodeIndex2 = IdToNodeIndex2[uId];\r
212                 if (uNodeCount == uNodeIndex2)\r
213                         Quit("DiffTrees, check 2");\r
214                 }\r
215 \r
216 // Ids assigned to internal nodes are N, N+1 ...\r
217 // An internal node id uniquely identifies a set\r
218 // of two or more leaves.\r
219         unsigned uInternalNodeId = uLeafCount;\r
220 \r
221 // Depth-first traversal of tree.\r
222 // The order guarantees that a node is visited before\r
223 // its parent is visited.\r
224         for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();\r
225           NULL_NEIGHBOR != uNodeIndex1;\r
226           uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))\r
227                 {\r
228 #if     TRACE\r
229                 Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",\r
230                   uNodeIndex1,\r
231                   Tree1.IsLeaf(uNodeIndex1),\r
232                   bIsBachelor1[uNodeIndex1]);\r
233 #endif\r
234 \r
235         // Leaves are trivial; nothing to do.\r
236                 if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])\r
237                         continue;\r
238 \r
239         // If either child is a bachelor, flag\r
240         // this node as a bachelor and continue.\r
241                 unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);\r
242                 if (bIsBachelor1[uLeft1])\r
243                         {\r
244                         bIsBachelor1[uNodeIndex1] = true;\r
245                         continue;\r
246                         }\r
247 \r
248                 unsigned uRight1 = Tree1.GetRight(uNodeIndex1);\r
249                 if (bIsBachelor1[uRight1])\r
250                         {\r
251                         bIsBachelor1[uNodeIndex1] = true;\r
252                         continue;\r
253                         }\r
254 \r
255         // Both children are married.\r
256         // Married nodes are guaranteed to have an id.\r
257                 unsigned uIdLeft = NodeIndexToId1[uLeft1];\r
258                 unsigned uIdRight = NodeIndexToId1[uRight1];\r
259 \r
260                 if (uIdLeft == uNodeCount || uIdRight == uNodeCount)\r
261                         Quit("DiffTrees, check 5");\r
262 \r
263         // uLeft2 is the spouse of uLeft1, and similarly for uRight2.\r
264                 unsigned uLeft2 = IdToNodeIndex2[uIdLeft];\r
265                 unsigned uRight2 = IdToNodeIndex2[uIdRight];\r
266 \r
267                 if (uLeft2 == uNodeCount || uRight2 == uNodeCount)\r
268                         Quit("DiffTrees, check 6");\r
269 \r
270         // If the spouses of uLeft1 and uRight1 have the same\r
271         // parent, then this parent is the spouse of uNodeIndex1.\r
272         // Otherwise, uNodeIndex1 is a diff.\r
273                 unsigned uParentLeft2 = Tree2.GetParent(uLeft2);\r
274                 unsigned uParentRight2 = Tree2.GetParent(uRight2);\r
275 \r
276 #if     TRACE\r
277                 Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",\r
278                   uLeft1,\r
279                   uRight1,\r
280                   uLeft2,\r
281                   uRight2,\r
282                   uParentLeft2,\r
283                   uParentRight2);\r
284 #endif\r
285 \r
286                 if (uParentLeft2 == uParentRight2)\r
287                         {\r
288                         NodeIndexToId1[uNodeIndex1] = uInternalNodeId;\r
289                         IdToNodeIndex2[uInternalNodeId] = uParentLeft2;\r
290                         ++uInternalNodeId;\r
291                         }\r
292                 else\r
293                         bIsBachelor1[uNodeIndex1] = true;\r
294                 }\r
295 \r
296         unsigned uDiffCount = 0;\r
297         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
298                 {\r
299                 if (bIsBachelor1[uNodeIndex])\r
300                         continue;\r
301                 if (Tree1.IsRoot(uNodeIndex))\r
302                         {\r
303                 // Special case: if no bachelors, consider the\r
304                 // root a diff.\r
305                         if (!bIsBachelor1[uNodeIndex])\r
306                                 bIsDiff1[uNodeIndex] = true;\r
307                         continue;\r
308                         }\r
309                 const unsigned uParent = Tree1.GetParent(uNodeIndex);\r
310                 if (bIsBachelor1[uParent])\r
311                         {\r
312                         bIsDiff1[uNodeIndex] = true;\r
313                         ++uDiffCount;\r
314                         }\r
315                 }\r
316 \r
317 #if     TRACE\r
318         Log("Tree1:\n");\r
319         Log("Node    Id  Bach  Diff  Name\n");\r
320         Log("----  ----  ----  ----  ----\n");\r
321         for (unsigned n = 0; n < uNodeCount; ++n)\r
322                 {\r
323                 Log("%4u  %4u     %d     %d",\r
324                   n,\r
325                   NodeIndexToId1[n],\r
326                   bIsBachelor1[n],\r
327                   bIsDiff1[n]);\r
328                 if (Tree1.IsLeaf(n))\r
329                         Log("  %s", Tree1.GetLeafName(n));\r
330                 Log("\n");\r
331                 }\r
332         Log("\n");\r
333         Log("Tree2:\n");\r
334         Log("Node    Id              Name\n");\r
335         Log("----  ----              ----\n");\r
336         for (unsigned n = 0; n < uNodeCount; ++n)\r
337                 {\r
338                 Log("%4u                  ", n);\r
339                 if (Tree2.IsLeaf(n))\r
340                         Log("  %s", Tree2.GetLeafName(n));\r
341                 Log("\n");\r
342                 }\r
343 #endif\r
344 \r
345         Diffs.CreateRooted();\r
346         const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();\r
347         const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();\r
348 \r
349         for (unsigned n = 0; n < uLeafCount; ++n)\r
350                 IdToDiffsLeafNodeIndex[n] = uNodeCount;\r
351 \r
352         BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,\r
353           IdToDiffsLeafNodeIndex);\r
354 \r
355 #if TRACE\r
356         Log("\n");\r
357         Log("Diffs:\n");\r
358         Diffs.LogMe();\r
359         Log("\n");\r
360         Log("IdToDiffsLeafNodeIndex:");\r
361         for (unsigned n = 0; n < uLeafCount; ++n)\r
362                 {\r
363                 if (n%16 == 0)\r
364                         Log("\n");\r
365                 else\r
366                         Log(" ");\r
367                 Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);\r
368                 }\r
369         Log("\n");\r
370 #endif\r
371 \r
372         for (unsigned n = 0; n < uLeafCount; ++n)\r
373                 if (IdToDiffsLeafNodeIndex[n] == uNodeCount)\r
374                         Quit("TreeDiffs check 7");\r
375 \r
376         delete[] NodeIndexToId1;\r
377         delete[] IdToNodeIndex2;\r
378 \r
379         delete[] bIsBachelor1;\r
380         delete[] bIsDiff1;\r
381         }\r