Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / cluster.cpp
1 #include "muscle.h"\r
2 #include "cluster.h"\r
3 #include "distfunc.h"\r
4 \r
5 static inline float Min(float d1, float d2)\r
6         {\r
7         return d1 < d2 ? d1 : d2;\r
8         }\r
9 \r
10 static inline float Max(float d1, float d2)\r
11         {\r
12         return d1 > d2 ? d1 : d2;\r
13         }\r
14 \r
15 static inline float Mean(float d1, float d2)\r
16         {\r
17         return (float) ((d1 + d2)/2.0);\r
18         }\r
19 \r
20 #if     _DEBUG\r
21 void ClusterTree::Validate(unsigned uNodeCount)\r
22         {\r
23         unsigned n;\r
24         ClusterNode *pNode;\r
25         unsigned uDisjointListCount = 0;\r
26         for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())\r
27                 {\r
28                 ClusterNode *pPrev = pNode->GetPrevDisjoint();\r
29                 ClusterNode *pNext = pNode->GetNextDisjoint();\r
30                 if (0 != pPrev)\r
31                         {\r
32                         if (pPrev->GetNextDisjoint() != pNode)\r
33                                 {\r
34                                 Log("Prev->This mismatch, prev=\n");\r
35                                 pPrev->LogMe();\r
36                                 Log("This=\n");\r
37                                 pNode->LogMe();\r
38                                 Quit("ClusterTree::Validate()");\r
39                                 }\r
40                         }\r
41                 else\r
42                         {\r
43                         if (pNode != m_ptrDisjoints)\r
44                                 {\r
45                                 Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",\r
46                                   pNode->GetIndex(),\r
47                                   m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);\r
48                                 pNode->LogMe();\r
49                                 Quit("ClusterTree::Validate()");\r
50                                 }\r
51                         }\r
52                 if (0 != pNext)\r
53                         {\r
54                         if (pNext->GetPrevDisjoint() != pNode)\r
55                                 {\r
56                                 Log("Next->This mismatch, next=\n");\r
57                                 pNext->LogMe();\r
58                                 Log("This=\n");\r
59                                 pNode->LogMe();\r
60                                 Quit("ClusterTree::Validate()");\r
61                                 }\r
62                         }\r
63                 ++uDisjointListCount;\r
64                 if (uDisjointListCount > m_uNodeCount)\r
65                         Quit("Loop in disjoint list");\r
66                 }\r
67 \r
68         unsigned uParentlessNodeCount = 0;\r
69         for (n = 0; n < uNodeCount; ++n)\r
70                 if (0 == m_Nodes[n].GetParent())\r
71                         ++uParentlessNodeCount;\r
72         \r
73         if (uDisjointListCount != uParentlessNodeCount)\r
74                 Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,\r
75                   uParentlessNodeCount);\r
76         }\r
77 #else   // !_DEBUG\r
78 #define Validate(uNodeCount)    // empty\r
79 #endif\r
80 \r
81 void ClusterNode::LogMe() const\r
82         {\r
83         unsigned uClusterSize = GetClusterSize();\r
84         Log("[%02u] w=%5.3f  CW=%5.3f  LBW=%5.3f  RBW=%5.3f  LWT=%5.3f  RWT=%5.3f  L=%02d  R=%02d  P=%02d  NxDj=%02d  PvDj=%02d  Sz=%02d  {",\r
85                 m_uIndex,\r
86                 m_dWeight,\r
87                 GetClusterWeight(),\r
88                 GetLeftBranchWeight(),\r
89                 GetRightBranchWeight(),\r
90                 GetLeftWeight(),\r
91                 GetRightWeight(),\r
92                 m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,\r
93                 m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,\r
94                 m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,\r
95                 m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,\r
96                 m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,\r
97                 uClusterSize);\r
98         for (unsigned i = 0; i < uClusterSize; ++i)\r
99                 Log(" %u", GetClusterLeaf(i)->GetIndex());\r
100         Log(" }\n");\r
101         }\r
102 \r
103 // How many leaves in the sub-tree under this node?\r
104 unsigned ClusterNode::GetClusterSize() const\r
105         {\r
106         unsigned uLeafCount = 0;\r
107 \r
108         if (0 == m_ptrLeft && 0 == m_ptrRight)\r
109                 return 1;\r
110 \r
111         if (0 != m_ptrLeft)\r
112                 uLeafCount += m_ptrLeft->GetClusterSize();\r
113         if (0 != m_ptrRight)\r
114                 uLeafCount += m_ptrRight->GetClusterSize();\r
115         assert(uLeafCount > 0);\r
116         return uLeafCount;\r
117         }\r
118 \r
119 double ClusterNode::GetClusterWeight() const\r
120         {\r
121         double dWeight = 0.0;\r
122         if (0 != m_ptrLeft)\r
123                 dWeight += m_ptrLeft->GetClusterWeight();\r
124         if (0 != m_ptrRight)\r
125                 dWeight += m_ptrRight->GetClusterWeight();\r
126         return dWeight + GetWeight();\r
127         }\r
128 \r
129 double ClusterNode::GetLeftBranchWeight() const\r
130         {\r
131         const ClusterNode *ptrLeft = GetLeft();\r
132         if (0 == ptrLeft)\r
133                 return 0.0;\r
134 \r
135         return GetWeight() - ptrLeft->GetWeight();\r
136         }\r
137 \r
138 double ClusterNode::GetRightBranchWeight() const\r
139         {\r
140         const ClusterNode *ptrRight = GetRight();\r
141         if (0 == ptrRight)\r
142                 return 0.0;\r
143 \r
144         return GetWeight() - ptrRight->GetWeight();\r
145         }\r
146 \r
147 double ClusterNode::GetRightWeight() const\r
148         {\r
149         const ClusterNode *ptrRight = GetRight();\r
150         if (0 == ptrRight)\r
151                 return 0.0;\r
152         return ptrRight->GetClusterWeight() + GetWeight();\r
153         }\r
154 \r
155 double ClusterNode::GetLeftWeight() const\r
156         {\r
157         const ClusterNode *ptrLeft = GetLeft();\r
158         if (0 == ptrLeft)\r
159                 return 0.0;\r
160         return ptrLeft->GetClusterWeight() + GetWeight();\r
161         }\r
162 \r
163 // Return n'th leaf in the sub-tree under this node.\r
164 const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const\r
165         {\r
166         if (0 != m_ptrLeft)\r
167                 {\r
168                 if (0 == m_ptrRight)\r
169                         return this;\r
170 \r
171                 unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();\r
172 \r
173                 if (uLeafIndex < uLeftLeafCount)\r
174                         return m_ptrLeft->GetClusterLeaf(uLeafIndex);\r
175 \r
176                 assert(uLeafIndex >= uLeftLeafCount);\r
177                 return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);\r
178                 }\r
179         if (0 == m_ptrRight)\r
180                 return this;\r
181         return m_ptrRight->GetClusterLeaf(uLeafIndex);\r
182         }\r
183 \r
184 void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)\r
185         {\r
186         ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();\r
187         ClusterNode *ptrNext = ptrNode->GetNextDisjoint();\r
188 \r
189         if (0 != ptrPrev)\r
190                 ptrPrev->SetNextDisjoint(ptrNext);\r
191         else\r
192                 m_ptrDisjoints = ptrNext;\r
193 \r
194         if (0 != ptrNext)\r
195                 ptrNext->SetPrevDisjoint(ptrPrev);\r
196 \r
197 #if     _DEBUG\r
198 // not algorithmically necessary, but improves clarity\r
199 // and supports Validate().\r
200         ptrNode->SetPrevDisjoint(0);\r
201         ptrNode->SetNextDisjoint(0);\r
202 #endif\r
203         }\r
204 \r
205 void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)\r
206         {\r
207         ptrNode->SetNextDisjoint(m_ptrDisjoints);\r
208         ptrNode->SetPrevDisjoint(0);\r
209         if (0 != m_ptrDisjoints)\r
210                 m_ptrDisjoints->SetPrevDisjoint(ptrNode);\r
211         m_ptrDisjoints = ptrNode;\r
212         }\r
213 \r
214 ClusterTree::ClusterTree()\r
215         {\r
216         m_ptrDisjoints = 0;\r
217         m_Nodes = 0;\r
218         m_uNodeCount = 0;\r
219         }\r
220 \r
221 ClusterTree::~ClusterTree()\r
222         {\r
223         delete[] m_Nodes;\r
224         }\r
225 \r
226 void ClusterTree::LogMe() const\r
227         {\r
228         Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);\r
229         for (unsigned i = 0; i < m_uNodeCount; ++i)\r
230                 {\r
231                 m_Nodes[i].LogMe();\r
232                 }\r
233         }\r
234 \r
235 ClusterNode *ClusterTree::GetRoot() const\r
236         {\r
237         return &m_Nodes[m_uNodeCount - 1];\r
238         }\r
239 \r
240 // This is the UPGMA algorithm as described in Durbin et al. p166.\r
241 void ClusterTree::Create(const DistFunc &Dist)\r
242         {\r
243         unsigned i;\r
244         m_uLeafCount = Dist.GetCount();\r
245         m_uNodeCount = 2*m_uLeafCount - 1;\r
246 \r
247         delete[] m_Nodes;\r
248         m_Nodes = new ClusterNode[m_uNodeCount];\r
249 \r
250         for (i = 0; i < m_uNodeCount; ++i)\r
251                 m_Nodes[i].SetIndex(i);\r
252 \r
253         for (i = 0; i < m_uLeafCount - 1; ++i)\r
254                 m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);\r
255 \r
256         for (i = 1; i < m_uLeafCount; ++i)\r
257                 m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);\r
258         \r
259         m_ptrDisjoints = &m_Nodes[0];\r
260 \r
261 //      Log("Initial state\n");\r
262 //      LogMe();\r
263 //      Log("\n");\r
264 \r
265         DistFunc ClusterDist;\r
266         ClusterDist.SetCount(m_uNodeCount);\r
267         double dMaxDist = 0.0;\r
268         for (i = 0; i < m_uLeafCount; ++i)\r
269                 for (unsigned j = 0; j < m_uLeafCount; ++j)\r
270                         {\r
271                         float dDist = Dist.GetDist(i, j);\r
272                         ClusterDist.SetDist(i, j, dDist);\r
273                         }\r
274 \r
275         Validate(m_uLeafCount);\r
276 \r
277 // Iteration. N-1 joins needed to create a binary tree from N leaves.\r
278         for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;\r
279           ++uJoinIndex)\r
280                 {\r
281         // Find closest pair of clusters\r
282                 unsigned uIndexClosest1;\r
283                 unsigned uIndexClosest2;\r
284                 bool bFound = false;\r
285                 double dDistClosest = 9e99;\r
286                 for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;\r
287                   ptrNode1 = ptrNode1->GetNextDisjoint())\r
288                         {\r
289                         for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;\r
290                           ptrNode2 = ptrNode2->GetNextDisjoint())\r
291                                 {\r
292                                 unsigned i1 = ptrNode1->GetIndex();\r
293                                 unsigned i2 = ptrNode2->GetIndex();\r
294                                 double dDist = ClusterDist.GetDist(i1, i2);\r
295                                 if (dDist < dDistClosest)\r
296                                         {\r
297                                         bFound = true;\r
298                                         dDistClosest = dDist;\r
299                                         uIndexClosest1 = i1;\r
300                                         uIndexClosest2 = i2;\r
301                                         }\r
302                                 }\r
303                         }\r
304                 assert(bFound);\r
305 \r
306                 ClusterNode &Join = m_Nodes[uJoinIndex];\r
307                 ClusterNode &Child1 = m_Nodes[uIndexClosest1];\r
308                 ClusterNode &Child2 = m_Nodes[uIndexClosest2];\r
309 \r
310                 Join.SetLeft(&Child1);\r
311                 Join.SetRight(&Child2);\r
312                 Join.SetWeight(dDistClosest);\r
313 \r
314                 Child1.SetParent(&Join);\r
315                 Child2.SetParent(&Join);\r
316 \r
317                 DeleteFromDisjoints(&Child1);\r
318                 DeleteFromDisjoints(&Child2);\r
319                 AddToDisjoints(&Join);\r
320 \r
321 //              Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);\r
322 //              LogMe();\r
323 \r
324         // Calculate distance of every remaining disjoint cluster to the\r
325         // new cluster created by the join\r
326                 for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;\r
327                   ptrNode = ptrNode->GetNextDisjoint())\r
328                         {\r
329                         unsigned uNodeIndex = ptrNode->GetIndex();\r
330                         float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);\r
331                         float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);\r
332                         float dDist = Min(dDist1, dDist2);\r
333                         ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);\r
334                         }\r
335                 Validate(uJoinIndex+1);\r
336                 }\r
337         GetRoot()->GetClusterWeight();\r
338 //      LogMe();\r
339         }\r