Next version of JABA
[jabaws.git] / binaries / src / muscle / difftreese.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 \r
4 #define TRACE   0\r
5 \r
6 /***\r
7 Algorithm to compare two trees, X and Y.\r
8 \r
9 A node x in X and node y in Y are defined to be\r
10 similar iff the set of leaves in the subtree under\r
11 x is identical to the set of leaves under y.\r
12 \r
13 A node is defined to be changed iff it is not\r
14 similar to any node in the other tree.\r
15 \r
16 Nodes x and y are defined to be married iff every\r
17 node in the subtree under x is similar to a node\r
18 in the subtree under y. Married nodes are considered\r
19 to be equal. The subtrees under two married nodes can\r
20 at most differ by exchanges of left and right branches,\r
21 which we do not consider to be significant here.\r
22 \r
23 A node is changed iff it is not married. If a node is\r
24 changed, then it has a dissimilar node in its subtree,\r
25 and it follows immediately from the definition of marriage\r
26 that its parent is also a bachelor. Hence all nodes on the\r
27 path from a changed node to the root are changed.\r
28 \r
29 We assume the trees have the same set of leaves, so\r
30 every leaf is trivially both similar and married to\r
31 the same leaf in the opposite tree. Changed nodes\r
32 are therefore always internal (i.e., non-leaf) nodes.\r
33 \r
34 Example:\r
35 \r
36               -----A\r
37         -----k\r
38    ----j      -----B\r
39 --i     -----C\r
40    ------D\r
41 \r
42 \r
43               -----A\r
44         -----p\r
45    ----n      -----B\r
46 --m     -----D\r
47    ------C\r
48 \r
49 \r
50 The following pairs of internal nodes are similar.\r
51 \r
52         Nodes   Set of leaves\r
53         -----   -------------\r
54         k,p             A,B\r
55         i,m             A,B,C,D\r
56 \r
57 Changed nodes in the first tree are i and j, changed nodes\r
58 in the second tree are m and n.\r
59 \r
60 Node k and p are married, but i and m are not (because j\r
61 and n are changed). The diffs are C, D and k.\r
62 \r
63 To achieve O(N) we avoid traversing a given subtree multiple\r
64 times and also avoid comparing lists of leaves. \r
65 \r
66 We visit nodes in depth-first order (i.e., a node is visited\r
67 before its parent).\r
68 \r
69 If either child of a node is changed, we flag it as changed.\r
70 \r
71 If both children of the node we are visiting are married,\r
72 we check whether the spouses of those children have the\r
73 same parent in the other tree. If the parents are different,\r
74 the current node is a bachelor. If they have the same parent,\r
75 then the node we are visiting is the spouse of that parent.\r
76 We assign this newly identified married couple a unique integer\r
77 id. The id of a node is in one-to-one correspondence with the\r
78 set of leaves in its subtree. Two nodes have the same set of\r
79 leaves iff they have the same id. Changed nodes do not get\r
80 an id.\r
81 ***/\r
82 \r
83 void DiffTreesE(const Tree &NewTree, const Tree &OldTree,\r
84   unsigned NewNodeIndexToOldNodeIndex[])\r
85         {\r
86 #if     TRACE\r
87         Log("DiffTreesE NewTree:\n");\r
88         NewTree.LogMe();\r
89         Log("\n");\r
90         Log("OldTree:\n");\r
91         OldTree.LogMe();\r
92 #endif\r
93 \r
94         if (!NewTree.IsRooted() || !OldTree.IsRooted())\r
95                 Quit("DiffTrees: requires rooted trees");\r
96 \r
97         const unsigned uNodeCount = NewTree.GetNodeCount();\r
98         const unsigned uOldNodeCount = OldTree.GetNodeCount();\r
99         const unsigned uLeafCount = NewTree.GetLeafCount();\r
100         const unsigned uOldLeafCount = OldTree.GetLeafCount();\r
101         if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)\r
102                 Quit("DiffTreesE: different node counts");\r
103 \r
104         {\r
105         unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];\r
106         for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)\r
107                 {\r
108                 if (OldTree.IsLeaf(uOldNodeIndex))\r
109                         {\r
110                         unsigned Id = OldTree.GetLeafId(uOldNodeIndex);\r
111                         IdToOldNodeIndex[Id] = uOldNodeIndex;\r
112                         }\r
113                 }\r
114 \r
115 // Initialize NewNodeIndexToOldNodeIndex[]\r
116 // All internal nodes are marked as changed, but may be updated later.\r
117         for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)\r
118                 {\r
119                 if (NewTree.IsLeaf(uNewNodeIndex))\r
120                         {\r
121                         unsigned uId = NewTree.GetLeafId(uNewNodeIndex);\r
122                         assert(uId < uLeafCount);\r
123 \r
124                         unsigned uOldNodeIndex = IdToOldNodeIndex[uId];\r
125                         assert(uOldNodeIndex < uNodeCount);\r
126 \r
127                         NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;\r
128                         }\r
129                 else\r
130                         NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;\r
131                 }\r
132         delete[] IdToOldNodeIndex;\r
133         }\r
134 \r
135 // Depth-first traversal of tree.\r
136 // The order guarantees that a node is visited before\r
137 // its parent is visited.\r
138         for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();\r
139           NULL_NEIGHBOR != uNewNodeIndex;\r
140           uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))\r
141                 {\r
142                 if (NewTree.IsLeaf(uNewNodeIndex))\r
143                         continue;\r
144 \r
145         // If either child is changed, flag this node as changed and continue.\r
146                 unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);\r
147                 unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];\r
148                 if (NODE_CHANGED == uOldLeft)\r
149                         {\r
150                         NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;\r
151                         continue;\r
152                         }\r
153 \r
154                 unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);\r
155                 unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];\r
156                 if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])\r
157                         {\r
158                         NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;\r
159                         continue;\r
160                         }\r
161 \r
162                 unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);\r
163                 unsigned uOldParentRight = OldTree.GetParent(uOldRight);\r
164                 if (uOldParentLeft == uOldParentRight)\r
165                         NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;\r
166                 else\r
167                         NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;\r
168                 }\r
169 \r
170 #if TRACE\r
171         {\r
172         Log("NewToOld ");\r
173         for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)\r
174                 {\r
175                 Log(" [%3u]=", uNewNodeIndex);\r
176                 if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])\r
177                         Log("  X");\r
178                 else\r
179                         Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);\r
180                 if ((uNewNodeIndex+1)%8 == 0)\r
181                         Log("\n         ");\r
182                 }\r
183         Log("\n");\r
184         }\r
185 #endif\r
186 \r
187 #if     DEBUG\r
188         {\r
189         for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)\r
190                 {\r
191                 unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];\r
192                 if (NewTree.IsLeaf(uNewNodeIndex))\r
193                         {\r
194                         if (uOld >= uNodeCount)\r
195                                 {\r
196                                 Log("NewNode=%u uOld=%u > uNodeCount=%u\n",\r
197                                   uNewNodeIndex, uOld, uNodeCount);\r
198                                 Quit("Diff check failed");\r
199                                 }\r
200                         unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);\r
201                         unsigned uIdOld = OldTree.GetLeafId(uOld);\r
202                         if (uIdNew != uIdOld)\r
203                                 {\r
204                                 Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",\r
205                                   uNewNodeIndex, uOld, uIdNew, uIdOld);\r
206                                 Quit("Diff check failed");\r
207                                 }\r
208                         continue;\r
209                         }\r
210 \r
211                 if (NODE_CHANGED == uOld)\r
212                         continue;\r
213 \r
214                 unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);\r
215                 unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);\r
216 \r
217                 unsigned uOldLeft = OldTree.GetLeft(uOld);\r
218                 unsigned uOldRight = OldTree.GetRight(uOld);\r
219 \r
220                 unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];\r
221                 unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];\r
222 \r
223                 bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);\r
224                 bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);\r
225                 if (!bSameNotRotated && !bSameRotated)\r
226                         {\r
227                         Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);\r
228                         Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);\r
229                         Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);\r
230                         Quit("Diff check failed");\r
231                         }\r
232                 }\r
233         }\r
234 #endif\r
235         }\r