1 /* Last changed Time-stamp: <95/07/12 19:52:41 ivo> */
3 Cluster Analysis using Ward's Method
4 Ward J Amer Stat Ass, 58 (1963), p236
5 c Peter Stadler and Ivo Hofacker
13 #define PRIVATE static
15 #define INFINITY 1000000
32 PUBLIC Union *wards_cluster(float **clmat);
33 PUBLIC Union *neighbour_joining(float **clmat);
34 PUBLIC void printf_phylogeny(Union *tree, char *type);
37 /*--------------------------------------------------------------------*/
39 PUBLIC Union *wards_cluster(float **clmat)
47 float min,deno,xa,xb,x;
48 int i,j,step,s=0,t=0,n;
50 n= (int)(clmat[0][0]);
52 size = (int *) space((n+1)*sizeof(int));
53 d = (float **) space((n+1)*sizeof(float *));
55 d[i] = (float *) space((n+1)*sizeof(float));
56 indic = (int *) space((n+1)*sizeof(int));
57 help = (float *) space((n+1)*sizeof(float));
58 tree = (Union *) space((n+1)*sizeof(Union));
62 tree[0].distance = 0.0;
63 tree[0].distance2 = 0.0;
65 for (i=1;i<=n;i++) size[i]=1;
68 d[i][j] = clmat[i][j];
72 /* look for the indices [s,t] with minimum d[s][t]*/
73 for(step=1;step<n; step++){
91 /* now we have to join the clusters s and t and update the working array*/
95 tree[step].distance = min;
100 deno = (float) (size[i]+size[s]+size[t]);
101 xa = ((float) (size[i]+size[s]))/deno;
102 xb = ((float) (size[i]+size[t]))/deno;
103 x = ((float) size[i])/deno;
104 help[i] = xa*d[i][s] + xb*d[i][t] - x*d[s][t];
107 for (i=1; i<=n; i++){
118 for(i=0;i<=n;i++) free(d[i]);
125 /*--------------------------------------------------------------------*/
127 PUBLIC Union *neighbour_joining(float **clmat)
129 int n,i,j,k,l,step,ll[3];
130 float b1,b2,b3,nn,tot,tmin,d1,d2;
137 n = (int) (clmat[0][0]);
139 tree = (Union *) space((n+1)*sizeof(Union));
140 indic = (int *) space((n+1)*sizeof(int) );
141 av = (float *) space((n+1)*sizeof(float) );
142 temp = (float *) space((n+1)*sizeof(float) );
143 d = (float**) space((n+1)*sizeof(float*));
144 for(i=0;i<=n;i++) d[i] = (float*) space((n+1)*sizeof(float));
147 for (i=1; i<=n; i++){
149 d[i][j] = clmat[i][j];
156 tree[0].distance = 0.0;
157 tree[0].distance2 = 0.0;
161 for(step=1;step<=n-3;step++) {
162 /* for(j=2;j<=n;j++) for(i=1;i<j;i++) d[j][i]=d[i][j]; */
173 tot=(nn-2.0)*d[k][l]-d1-d2;
189 d1 = (d1-d[mini][minj])/(nn-2.0);
190 d2 = (d2-d[mini][minj])/(nn-2.0);
192 tree[step].set1 = mini;
193 tree[step].distance = (d[mini][minj]+d1-d2)*0.5-av[mini];
194 tree[step].set2 = minj;
195 tree[step].distance2 = d[mini][minj]-(d[mini][minj]+d1-d2)*0.5-av[minj];
197 av[mini]=d[mini][minj]*0.5;
203 temp[j]=(d[mini][j]+d[minj][j])*0.5;
207 d[mini][j] = temp[j];
208 d[j][mini] = temp[j];
225 b1=(d[ll[0]][ll[1]]+d[ll[0]][ll[2]]-d[ll[1]][ll[2]])*0.5;
226 b2=d[ll[0]][ll[1]]-b1;
227 b3=d[ll[0]][ll[2]]-b1;
231 tree[step].set1 = ll[1];
232 tree[step].distance = b2;
233 tree[step].set2 = ll[2];
234 tree[step].distance2 = b3;
236 tree[step].set1 = ll[0];
237 tree[step].distance = 0.0;
238 tree[step].set2 = ll[1];
239 tree[step].distance2 = b1;
241 for(i=0;i<=n;i++) free(d[i]); free(d);
249 /*--------------------------------------------------------------------*/
252 PUBLIC void printf_phylogeny(Union *tree, char *type)
257 printf("> %d %s ( Phylogeny using ",n, type);
260 printf("Ward's Method )\n");
261 printf("> Nodes Variance\n");
263 printf("%3d %3d %9.4f \n",
264 tree[i].set1, tree[i].set2, tree[i].distance);
267 printf("Saitou's Neighbour Joining Method )\n");
268 printf("> Nodes Branch Length in Tree\n");
270 printf("%3d %3d %9.4f %9.4f\n",
271 tree[i].set1, tree[i].set2, tree[i].distance, tree[i].distance2);
274 printf(" non-identified Method )\n");
276 printf("%3d %3d %9.4f %9.4f\n",
277 tree[i].set1, tree[i].set2, tree[i].distance, tree[i].distance2);
281 /*--------------------------------------------------------------------*/