7 Algorithm to compare two trees, X and Y.
\r
9 A node x in X and node y in Y are defined to be
\r
10 similar iff the set of leaves in the subtree under
\r
11 x is identical to the set of leaves under y.
\r
13 A node is defined to be dissimilar iff it is not
\r
14 similar to any node in the other tree.
\r
16 Nodes x and y are defined to be married iff every
\r
17 node in the subtree under x is similar to a node
\r
18 in the subtree under y. Married nodes are considered
\r
19 to be equal. The subtrees under two married nodes can
\r
20 at most differ by exchanges of left and right branches,
\r
21 which we do not consider to be significant here.
\r
23 A node is defined to be a bachelor iff it is not
\r
24 married. If a node is a bachelor, then it has a
\r
25 dissimilar node in its subtree, and it follows
\r
26 immediately from the definition of marriage that its
\r
27 parent is also a bachelor. Hence all nodes on the path
\r
28 from a bachelor node to the root are bachelors.
\r
30 We assume the trees have the same set of leaves, so
\r
31 every leaf is trivially both similar and married to
\r
32 the same leaf in the opposite tree. Bachelor nodes
\r
33 are therefore always internal (i.e., non-leaf) nodes.
\r
35 A node is defined to be a diff iff (a) it is married
\r
36 and (b) its parent is a bachelor. The subtree under
\r
37 a diff is maximally similar to the other tree. (In
\r
38 other words, you cannot extend the subtree without
\r
39 adding a bachelor).
\r
41 The set of diffs is the subset of the two trees that
\r
42 we consider to be identical.
\r
60 The following pairs of internal nodes are similar.
\r
67 Bachelors in the first tree are i and j, bachelors
\r
68 in the second tree are m and n.
\r
70 Node k and p are married, but i and m are not (because j
\r
71 and n are bachelors). The diffs are C, D and k.
\r
73 The set of bachelor nodes can be viewed as the internal
\r
74 nodes of a tree, the leaves of which are diffs. (To see
\r
75 that there can't be disjoint subtrees, note that the path
\r
76 from a diff to a root is all bachelor nodes, so there is
\r
77 always a path between two diffs that goes through the root).
\r
78 We call this tree the "diffs tree".
\r
80 There is a simple O(N) algorithm to build the diffs tree.
\r
81 To achieve O(N) we avoid traversing a given subtree multiple
\r
82 times and also avoid comparing lists of leaves.
\r
84 We visit nodes in depth-first order (i.e., a node is visited
\r
87 If either child of a node is a bachelor, we flag it as
\r
90 If both children of the node we are visiting are married,
\r
91 we check whether the spouses of those children have the
\r
92 same parent in the other tree. If the parents are different,
\r
93 the current node is a bachelor. If they have the same parent,
\r
94 then the node we are visiting is the spouse of that parent.
\r
95 We assign this newly identified married couple a unique integer
\r
96 id. The id of a node is in one-to-one correspondence with the
\r
97 set of leaves in its subtree. Two nodes have the same set of
\r
98 leaves iff they have the same id. Bachelor nodes do not get
\r
102 static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
\r
103 const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
\r
104 unsigned IdToDiffsLeafNodeIndex[])
\r
107 Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
\r
108 uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
\r
110 if (bIsDiff[uTreeNodeIndex])
\r
112 unsigned uLeafCount = tree.GetLeafCount();
\r
113 unsigned *Leaves = new unsigned[uLeafCount];
\r
114 GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
\r
115 for (unsigned n = 0; n < uLeafCount; ++n)
\r
117 const unsigned uLeafNodeIndex = Leaves[n];
\r
118 const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
\r
119 if (uId >= tree.GetLeafCount())
\r
120 Quit("BuildDiffs, id out of range");
\r
121 IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
\r
123 Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
\r
130 if (tree.IsLeaf(uTreeNodeIndex))
\r
131 Quit("BuildDiffs: should never reach leaf");
\r
133 const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
\r
134 const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);
\r
136 const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
\r
137 const unsigned uDiffsRight = uDiffsLeft + 1;
\r
139 BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
\r
140 BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
\r
143 void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
\r
144 unsigned IdToDiffsLeafNodeIndex[])
\r
154 if (!Tree1.IsRooted() || !Tree2.IsRooted())
\r
155 Quit("DiffTrees: requires rooted trees");
\r
157 const unsigned uNodeCount = Tree1.GetNodeCount();
\r
158 const unsigned uNodeCount2 = Tree2.GetNodeCount();
\r
160 const unsigned uLeafCount = Tree1.GetLeafCount();
\r
161 const unsigned uLeafCount2 = Tree2.GetLeafCount();
\r
162 assert(uLeafCount == uLeafCount2);
\r
164 if (uNodeCount != uNodeCount2)
\r
165 Quit("DiffTrees: different node counts");
\r
167 // Allocate tables so we can convert tree node index to
\r
168 // and from the unique id with a O(1) lookup.
\r
169 unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
\r
170 unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];
\r
172 bool *bIsBachelor1 = new bool[uNodeCount];
\r
173 bool *bIsDiff1 = new bool[uNodeCount];
\r
175 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
177 NodeIndexToId1[uNodeIndex] = uNodeCount;
\r
178 bIsBachelor1[uNodeIndex] = false;
\r
179 bIsDiff1[uNodeIndex] = false;
\r
181 // Use uNodeCount as value meaning "not set".
\r
182 IdToNodeIndex2[uNodeIndex] = uNodeCount;
\r
185 // Initialize node index <-> id lookup tables
\r
186 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
188 if (Tree1.IsLeaf(uNodeIndex))
\r
190 const unsigned uId = Tree1.GetLeafId(uNodeIndex);
\r
191 if (uId >= uNodeCount)
\r
192 Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
\r
193 NodeIndexToId1[uNodeIndex] = uId;
\r
196 if (Tree2.IsLeaf(uNodeIndex))
\r
198 const unsigned uId = Tree2.GetLeafId(uNodeIndex);
\r
199 if (uId >= uNodeCount)
\r
200 Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
\r
201 IdToNodeIndex2[uId] = uNodeIndex;
\r
205 // Validity check. This verifies that the ids
\r
206 // pre-assigned to the leaves in Tree1 are unique
\r
207 // (note that the id<N check above does not rule
\r
208 // out two leaves having duplicate ids).
\r
209 for (unsigned uId = 0; uId < uLeafCount; ++uId)
\r
211 unsigned uNodeIndex2 = IdToNodeIndex2[uId];
\r
212 if (uNodeCount == uNodeIndex2)
\r
213 Quit("DiffTrees, check 2");
\r
216 // Ids assigned to internal nodes are N, N+1 ...
\r
217 // An internal node id uniquely identifies a set
\r
218 // of two or more leaves.
\r
219 unsigned uInternalNodeId = uLeafCount;
\r
221 // Depth-first traversal of tree.
\r
222 // The order guarantees that a node is visited before
\r
223 // its parent is visited.
\r
224 for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
\r
225 NULL_NEIGHBOR != uNodeIndex1;
\r
226 uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
\r
229 Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
\r
231 Tree1.IsLeaf(uNodeIndex1),
\r
232 bIsBachelor1[uNodeIndex1]);
\r
235 // Leaves are trivial; nothing to do.
\r
236 if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
\r
239 // If either child is a bachelor, flag
\r
240 // this node as a bachelor and continue.
\r
241 unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
\r
242 if (bIsBachelor1[uLeft1])
\r
244 bIsBachelor1[uNodeIndex1] = true;
\r
248 unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
\r
249 if (bIsBachelor1[uRight1])
\r
251 bIsBachelor1[uNodeIndex1] = true;
\r
255 // Both children are married.
\r
256 // Married nodes are guaranteed to have an id.
\r
257 unsigned uIdLeft = NodeIndexToId1[uLeft1];
\r
258 unsigned uIdRight = NodeIndexToId1[uRight1];
\r
260 if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
\r
261 Quit("DiffTrees, check 5");
\r
263 // uLeft2 is the spouse of uLeft1, and similarly for uRight2.
\r
264 unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
\r
265 unsigned uRight2 = IdToNodeIndex2[uIdRight];
\r
267 if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
\r
268 Quit("DiffTrees, check 6");
\r
270 // If the spouses of uLeft1 and uRight1 have the same
\r
271 // parent, then this parent is the spouse of uNodeIndex1.
\r
272 // Otherwise, uNodeIndex1 is a diff.
\r
273 unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
\r
274 unsigned uParentRight2 = Tree2.GetParent(uRight2);
\r
277 Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
\r
286 if (uParentLeft2 == uParentRight2)
\r
288 NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
\r
289 IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
\r
293 bIsBachelor1[uNodeIndex1] = true;
\r
296 unsigned uDiffCount = 0;
\r
297 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
\r
299 if (bIsBachelor1[uNodeIndex])
\r
301 if (Tree1.IsRoot(uNodeIndex))
\r
303 // Special case: if no bachelors, consider the
\r
305 if (!bIsBachelor1[uNodeIndex])
\r
306 bIsDiff1[uNodeIndex] = true;
\r
309 const unsigned uParent = Tree1.GetParent(uNodeIndex);
\r
310 if (bIsBachelor1[uParent])
\r
312 bIsDiff1[uNodeIndex] = true;
\r
319 Log("Node Id Bach Diff Name\n");
\r
320 Log("---- ---- ---- ---- ----\n");
\r
321 for (unsigned n = 0; n < uNodeCount; ++n)
\r
323 Log("%4u %4u %d %d",
\r
328 if (Tree1.IsLeaf(n))
\r
329 Log(" %s", Tree1.GetLeafName(n));
\r
334 Log("Node Id Name\n");
\r
335 Log("---- ---- ----\n");
\r
336 for (unsigned n = 0; n < uNodeCount; ++n)
\r
339 if (Tree2.IsLeaf(n))
\r
340 Log(" %s", Tree2.GetLeafName(n));
\r
345 Diffs.CreateRooted();
\r
346 const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
\r
347 const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();
\r
349 for (unsigned n = 0; n < uLeafCount; ++n)
\r
350 IdToDiffsLeafNodeIndex[n] = uNodeCount;
\r
352 BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
\r
353 IdToDiffsLeafNodeIndex);
\r
360 Log("IdToDiffsLeafNodeIndex:");
\r
361 for (unsigned n = 0; n < uLeafCount; ++n)
\r
367 Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
\r
372 for (unsigned n = 0; n < uLeafCount; ++n)
\r
373 if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
\r
374 Quit("TreeDiffs check 7");
\r
376 delete[] NodeIndexToId1;
\r
377 delete[] IdToNodeIndex2;
\r
379 delete[] bIsBachelor1;
\r