+++ /dev/null
-#include "muscle.h"\r
-#include "tree.h"\r
-\r
-#define TRACE 0\r
-\r
-/***\r
-Algorithm to compare two trees, X and Y.\r
-\r
-A node x in X and node y in Y are defined to be\r
-similar iff the set of leaves in the subtree under\r
-x is identical to the set of leaves under y.\r
-\r
-A node is defined to be dissimilar iff it is not\r
-similar to any node in the other tree.\r
-\r
-Nodes x and y are defined to be married iff every\r
-node in the subtree under x is similar to a node\r
-in the subtree under y. Married nodes are considered\r
-to be equal. The subtrees under two married nodes can\r
-at most differ by exchanges of left and right branches,\r
-which we do not consider to be significant here.\r
-\r
-A node is defined to be a bachelor iff it is not\r
-married. If a node is a bachelor, then it has a\r
-dissimilar node in its subtree, and it follows\r
-immediately from the definition of marriage that its\r
-parent is also a bachelor. Hence all nodes on the path\r
-from a bachelor node to the root are bachelors.\r
-\r
-We assume the trees have the same set of leaves, so\r
-every leaf is trivially both similar and married to\r
-the same leaf in the opposite tree. Bachelor nodes\r
-are therefore always internal (i.e., non-leaf) nodes.\r
-\r
-A node is defined to be a diff iff (a) it is married\r
-and (b) its parent is a bachelor. The subtree under\r
-a diff is maximally similar to the other tree. (In\r
-other words, you cannot extend the subtree without\r
-adding a bachelor). \r
-\r
-The set of diffs is the subset of the two trees that\r
-we consider to be identical.\r
-\r
-Example:\r
-\r
- -----A\r
- -----k\r
- ----j -----B\r
---i -----C\r
- ------D\r
-\r
-\r
- -----A\r
- -----p\r
- ----n -----B\r
---m -----D\r
- ------C\r
-\r
-\r
-The following pairs of internal nodes are similar.\r
-\r
- Nodes Set of leaves\r
- ----- -------------\r
- k,p A,B\r
- i,m A,B,C,D\r
-\r
-Bachelors in the first tree are i and j, bachelors\r
-in the second tree are m and n.\r
-\r
-Node k and p are married, but i and m are not (because j\r
-and n are bachelors). The diffs are C, D and k.\r
-\r
-The set of bachelor nodes can be viewed as the internal\r
-nodes of a tree, the leaves of which are diffs. (To see\r
-that there can't be disjoint subtrees, note that the path\r
-from a diff to a root is all bachelor nodes, so there is\r
-always a path between two diffs that goes through the root).\r
-We call this tree the "diffs tree".\r
-\r
-There is a simple O(N) algorithm to build the diffs tree.\r
-To achieve O(N) we avoid traversing a given subtree multiple\r
-times and also avoid comparing lists of leaves. \r
-\r
-We visit nodes in depth-first order (i.e., a node is visited\r
-before its parent).\r
-\r
-If either child of a node is a bachelor, we flag it as\r
-a bachelor.\r
-\r
-If both children of the node we are visiting are married,\r
-we check whether the spouses of those children have the\r
-same parent in the other tree. If the parents are different,\r
-the current node is a bachelor. If they have the same parent,\r
-then the node we are visiting is the spouse of that parent.\r
-We assign this newly identified married couple a unique integer\r
-id. The id of a node is in one-to-one correspondence with the\r
-set of leaves in its subtree. Two nodes have the same set of\r
-leaves iff they have the same id. Bachelor nodes do not get\r
-an id.\r
-***/\r
-\r
-static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,\r
- const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,\r
- unsigned IdToDiffsLeafNodeIndex[])\r
- {\r
-#if TRACE\r
- Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",\r
- uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));\r
-#endif\r
- if (bIsDiff[uTreeNodeIndex])\r
- {\r
- unsigned uLeafCount = tree.GetLeafCount();\r
- unsigned *Leaves = new unsigned[uLeafCount];\r
- GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);\r
- for (unsigned n = 0; n < uLeafCount; ++n)\r
- {\r
- const unsigned uLeafNodeIndex = Leaves[n];\r
- const unsigned uId = tree.GetLeafId(uLeafNodeIndex);\r
- if (uId >= tree.GetLeafCount())\r
- Quit("BuildDiffs, id out of range");\r
- IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;\r
-#if TRACE\r
- Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);\r
-#endif\r
- }\r
- delete[] Leaves;\r
- return;\r
- }\r
-\r
- if (tree.IsLeaf(uTreeNodeIndex))\r
- Quit("BuildDiffs: should never reach leaf");\r
-\r
- const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);\r
- const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);\r
-\r
- const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);\r
- const unsigned uDiffsRight = uDiffsLeft + 1;\r
-\r
- BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);\r
- BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);\r
- }\r
-\r
-void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,\r
- unsigned IdToDiffsLeafNodeIndex[])\r
- {\r
-#if TRACE\r
- Log("Tree1:\n");\r
- Tree1.LogMe();\r
- Log("\n");\r
- Log("Tree2:\n");\r
- Tree2.LogMe();\r
-#endif\r
-\r
- if (!Tree1.IsRooted() || !Tree2.IsRooted())\r
- Quit("DiffTrees: requires rooted trees");\r
-\r
- const unsigned uNodeCount = Tree1.GetNodeCount();\r
- const unsigned uNodeCount2 = Tree2.GetNodeCount();\r
- \r
- const unsigned uLeafCount = Tree1.GetLeafCount();\r
- const unsigned uLeafCount2 = Tree2.GetLeafCount();\r
- assert(uLeafCount == uLeafCount2);\r
-\r
- if (uNodeCount != uNodeCount2)\r
- Quit("DiffTrees: different node counts");\r
-\r
-// Allocate tables so we can convert tree node index to\r
-// and from the unique id with a O(1) lookup.\r
- unsigned *NodeIndexToId1 = new unsigned[uNodeCount];\r
- unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];\r
-\r
- bool *bIsBachelor1 = new bool[uNodeCount];\r
- bool *bIsDiff1 = new bool[uNodeCount];\r
-\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- NodeIndexToId1[uNodeIndex] = uNodeCount;\r
- bIsBachelor1[uNodeIndex] = false;\r
- bIsDiff1[uNodeIndex] = false;\r
-\r
- // Use uNodeCount as value meaning "not set".\r
- IdToNodeIndex2[uNodeIndex] = uNodeCount;\r
- }\r
-\r
-// Initialize node index <-> id lookup tables\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- if (Tree1.IsLeaf(uNodeIndex))\r
- {\r
- const unsigned uId = Tree1.GetLeafId(uNodeIndex);\r
- if (uId >= uNodeCount)\r
- Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");\r
- NodeIndexToId1[uNodeIndex] = uId;\r
- }\r
-\r
- if (Tree2.IsLeaf(uNodeIndex))\r
- {\r
- const unsigned uId = Tree2.GetLeafId(uNodeIndex);\r
- if (uId >= uNodeCount)\r
- Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");\r
- IdToNodeIndex2[uId] = uNodeIndex;\r
- }\r
- }\r
-\r
-// Validity check. This verifies that the ids\r
-// pre-assigned to the leaves in Tree1 are unique\r
-// (note that the id<N check above does not rule\r
-// out two leaves having duplicate ids).\r
- for (unsigned uId = 0; uId < uLeafCount; ++uId)\r
- {\r
- unsigned uNodeIndex2 = IdToNodeIndex2[uId];\r
- if (uNodeCount == uNodeIndex2)\r
- Quit("DiffTrees, check 2");\r
- }\r
-\r
-// Ids assigned to internal nodes are N, N+1 ...\r
-// An internal node id uniquely identifies a set\r
-// of two or more leaves.\r
- unsigned uInternalNodeId = uLeafCount;\r
-\r
-// Depth-first traversal of tree.\r
-// The order guarantees that a node is visited before\r
-// its parent is visited.\r
- for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();\r
- NULL_NEIGHBOR != uNodeIndex1;\r
- uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))\r
- {\r
-#if TRACE\r
- Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",\r
- uNodeIndex1,\r
- Tree1.IsLeaf(uNodeIndex1),\r
- bIsBachelor1[uNodeIndex1]);\r
-#endif\r
-\r
- // Leaves are trivial; nothing to do.\r
- if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])\r
- continue;\r
-\r
- // If either child is a bachelor, flag\r
- // this node as a bachelor and continue.\r
- unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);\r
- if (bIsBachelor1[uLeft1])\r
- {\r
- bIsBachelor1[uNodeIndex1] = true;\r
- continue;\r
- }\r
-\r
- unsigned uRight1 = Tree1.GetRight(uNodeIndex1);\r
- if (bIsBachelor1[uRight1])\r
- {\r
- bIsBachelor1[uNodeIndex1] = true;\r
- continue;\r
- }\r
-\r
- // Both children are married.\r
- // Married nodes are guaranteed to have an id.\r
- unsigned uIdLeft = NodeIndexToId1[uLeft1];\r
- unsigned uIdRight = NodeIndexToId1[uRight1];\r
-\r
- if (uIdLeft == uNodeCount || uIdRight == uNodeCount)\r
- Quit("DiffTrees, check 5");\r
-\r
- // uLeft2 is the spouse of uLeft1, and similarly for uRight2.\r
- unsigned uLeft2 = IdToNodeIndex2[uIdLeft];\r
- unsigned uRight2 = IdToNodeIndex2[uIdRight];\r
-\r
- if (uLeft2 == uNodeCount || uRight2 == uNodeCount)\r
- Quit("DiffTrees, check 6");\r
-\r
- // If the spouses of uLeft1 and uRight1 have the same\r
- // parent, then this parent is the spouse of uNodeIndex1.\r
- // Otherwise, uNodeIndex1 is a diff.\r
- unsigned uParentLeft2 = Tree2.GetParent(uLeft2);\r
- unsigned uParentRight2 = Tree2.GetParent(uRight2);\r
-\r
-#if TRACE\r
- Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",\r
- uLeft1,\r
- uRight1,\r
- uLeft2,\r
- uRight2,\r
- uParentLeft2,\r
- uParentRight2);\r
-#endif\r
-\r
- if (uParentLeft2 == uParentRight2)\r
- {\r
- NodeIndexToId1[uNodeIndex1] = uInternalNodeId;\r
- IdToNodeIndex2[uInternalNodeId] = uParentLeft2;\r
- ++uInternalNodeId;\r
- }\r
- else\r
- bIsBachelor1[uNodeIndex1] = true;\r
- }\r
-\r
- unsigned uDiffCount = 0;\r
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
- {\r
- if (bIsBachelor1[uNodeIndex])\r
- continue;\r
- if (Tree1.IsRoot(uNodeIndex))\r
- {\r
- // Special case: if no bachelors, consider the\r
- // root a diff.\r
- if (!bIsBachelor1[uNodeIndex])\r
- bIsDiff1[uNodeIndex] = true;\r
- continue;\r
- }\r
- const unsigned uParent = Tree1.GetParent(uNodeIndex);\r
- if (bIsBachelor1[uParent])\r
- {\r
- bIsDiff1[uNodeIndex] = true;\r
- ++uDiffCount;\r
- }\r
- }\r
-\r
-#if TRACE\r
- Log("Tree1:\n");\r
- Log("Node Id Bach Diff Name\n");\r
- Log("---- ---- ---- ---- ----\n");\r
- for (unsigned n = 0; n < uNodeCount; ++n)\r
- {\r
- Log("%4u %4u %d %d",\r
- n,\r
- NodeIndexToId1[n],\r
- bIsBachelor1[n],\r
- bIsDiff1[n]);\r
- if (Tree1.IsLeaf(n))\r
- Log(" %s", Tree1.GetLeafName(n));\r
- Log("\n");\r
- }\r
- Log("\n");\r
- Log("Tree2:\n");\r
- Log("Node Id Name\n");\r
- Log("---- ---- ----\n");\r
- for (unsigned n = 0; n < uNodeCount; ++n)\r
- {\r
- Log("%4u ", n);\r
- if (Tree2.IsLeaf(n))\r
- Log(" %s", Tree2.GetLeafName(n));\r
- Log("\n");\r
- }\r
-#endif\r
-\r
- Diffs.CreateRooted();\r
- const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();\r
- const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();\r
-\r
- for (unsigned n = 0; n < uLeafCount; ++n)\r
- IdToDiffsLeafNodeIndex[n] = uNodeCount;\r
-\r
- BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,\r
- IdToDiffsLeafNodeIndex);\r
-\r
-#if TRACE\r
- Log("\n");\r
- Log("Diffs:\n");\r
- Diffs.LogMe();\r
- Log("\n");\r
- Log("IdToDiffsLeafNodeIndex:");\r
- for (unsigned n = 0; n < uLeafCount; ++n)\r
- {\r
- if (n%16 == 0)\r
- Log("\n");\r
- else\r
- Log(" ");\r
- Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);\r
- }\r
- Log("\n");\r
-#endif\r
-\r
- for (unsigned n = 0; n < uLeafCount; ++n)\r
- if (IdToDiffsLeafNodeIndex[n] == uNodeCount)\r
- Quit("TreeDiffs check 7");\r
-\r
- delete[] NodeIndexToId1;\r
- delete[] IdToNodeIndex2;\r
-\r
- delete[] bIsBachelor1;\r
- delete[] bIsDiff1;\r
- }\r