5 /* version 3.6. (c) Copyright 1993-2004 by the University of Washington.
6 Written by Mary Kuhner, Jon Yamato, Joseph Felsenstein, Akiko Fuseki,
7 Sean Lamont, and Andrew Keeffe.
8 Permission is granted to copy and use this program provided no fee is
9 charged for it and provided that this copyright notice is not removed. */
13 /* function prototypes */
14 void getoptions(void);
17 void inputoptions(void);
19 void describe(node *, double);
21 void nodelabel(boolean);
25 /* function prototypes */
29 Char infilename[FNMLNGTH], outfilename[FNMLNGTH], outtreename[FNMLNGTH];
30 long nonodes2, outgrno, col, datasets, ith;
34 boolean jumble, lower, upper, outgropt, replicates, trout,
35 printdata, progress, treeprint, mulsets, njoin;
41 /* variables for maketree, propagated globally for C version: */
47 /* interactively set options */
48 long inseed0 = 0, loopcount;
51 fprintf(outfile, "\nNeighbor-Joining/UPGMA method version %s\n\n",VERSION);
67 printf("\nNeighbor-Joining/UPGMA method version %s\n\n",VERSION);
68 printf("Settings for this run:\n");
69 printf(" N Neighbor-joining or UPGMA tree? %s\n",
70 (njoin ? "Neighbor-joining" : "UPGMA"));
72 printf(" O Outgroup root?");
74 printf(" Yes, at species number%3ld\n", outgrno);
76 printf(" No, use as outgroup species%3ld\n", outgrno);
78 printf(" L Lower-triangular data matrix? %s\n",
79 (lower ? "Yes" : "No"));
80 printf(" R Upper-triangular data matrix? %s\n",
81 (upper ? "Yes" : "No"));
82 printf(" S Subreplicates? %s\n",
83 (replicates ? "Yes" : "No"));
84 printf(" J Randomize input order of species?");
86 printf(" Yes (random number seed =%8ld)\n", inseed0);
88 printf(" No. Use input order\n");
89 printf(" M Analyze multiple data sets?");
91 printf(" Yes, %2ld sets\n", datasets);
94 printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n",
95 (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"));
96 printf(" 1 Print out the data at start of run %s\n",
97 (printdata ? "Yes" : "No"));
98 printf(" 2 Print indications of progress of run %s\n",
99 (progress ? "Yes" : "No"));
100 printf(" 3 Print out tree %s\n",
101 (treeprint ? "Yes" : "No"));
102 printf(" 4 Write out trees onto tree file? %s\n",
103 (trout ? "Yes" : "No"));
104 printf("\n\n Y to accept these or type the letter for one to change\n");
106 phyFillScreenColor();
108 scanf("%c%*[^\n]", &ch);
115 if (strchr("NJOULRSM01234",ch) != NULL){
121 initseed(&inseed, &inseed0, seed);
129 outgropt = !outgropt;
131 initoutgroup(&outgrno, spp);
141 replicates = !replicates;
151 initdatasets(&datasets);
154 initseed(&inseed, &inseed0, seed);
158 initterminal(&ibmpc, &ansi);
162 printdata = !printdata;
166 progress = !progress;
170 treeprint = !treeprint;
178 printf("Not a possible option!\n");
179 countup(&loopcount, 100);
188 x = (vector *)Malloc(spp*sizeof(vector));
189 for (i = 0; i < spp; i++)
190 x[i] = (vector)Malloc(spp*sizeof(double));
191 reps = (intvector *)Malloc(spp*sizeof(intvector));
192 for (i = 0; i < spp; i++)
193 reps[i] = (intvector)Malloc(spp*sizeof(long));
194 nayme = (naym *)Malloc(spp*sizeof(naym));
195 enterorder = (long *)Malloc(spp*sizeof(long));
196 cluster = (node **)Malloc(spp*sizeof(node *));
204 for (i = 0; i < spp; i++)
207 for (i = 0; i < spp; i++)
218 /* initializes variables */
221 inputnumbers2(&spp, &nonodes2, 2);
222 nonodes2 += (njoin ? 0 : 1);
224 alloctree(&curtree.nodep, nonodes2+1);
225 p = curtree.nodep[nonodes2]->next->next;
226 curtree.nodep[nonodes2]->next = curtree.nodep[nonodes2];
235 /* read options information */
241 fprintf(outfile, " Neighbor-joining method\n");
243 fprintf(outfile, " UPGMA method\n");
244 fprintf(outfile, "\n Negative branch lengths allowed\n\n");
248 void describe(node *p, double height)
250 /* print out information for one branch */
256 fprintf(outfile, "%4ld ", q->index - spp);
258 fprintf(outfile, "%4ld ", q->index - spp);
260 for (i = 0; i < nmlngth; i++)
261 putc(nayme[p->index - 1][i], outfile);
265 fprintf(outfile, "%4ld ", p->index - spp);
267 fprintf(outfile, "%4ld ", p->index - spp);
271 fprintf(outfile, "%12.5f\n", q->v);
273 fprintf(outfile, "%10.5f %10.5f\n", q->v, q->v+height);
275 describe(p->next->back, height+q->v);
276 describe(p->next->next->back, height+q->v);
283 /* print out branch lengths etc. */
286 fprintf(outfile, "remember:");
288 fprintf(outfile, " (although rooted by outgroup)");
289 fprintf(outfile, " this is an unrooted tree!\n");
292 fprintf(outfile, "\nBetween And Length\n");
293 fprintf(outfile, "------- --- ------\n");
295 fprintf(outfile, "From To Length Height\n");
296 fprintf(outfile, "---- -- ------ ------\n");
298 describe(curtree.start->next->back, 0.0);
299 describe(curtree.start->next->next->back, 0.0);
301 describe(curtree.start->back, 0.0);
302 fprintf(outfile, "\n\n");
306 void nodelabel(boolean isnode)
317 /* calculate the tree */
318 long nc, nextnode, mini=0, minj=0, i, j, ia, ja, ii, jj, nude, iter;
319 double fotu2, total, tmin, dio, djo, bi, bj, bk, dmin=0, da;
324 double *R; /* added in revisions by Y. Ina */
325 R = (double *)Malloc(spp * sizeof(double));
327 for (i = 0; i <= spp - 2; i++) {
328 for (j = i + 1; j < spp; j++) {
329 da = (x[i][j] + x[j][i]) / 2.0;
334 /* First initialization */
337 av = (vector)Malloc(spp*sizeof(double));
338 oc = (intvector)Malloc(spp*sizeof(long));
339 for (i = 0; i < spp; i++) {
343 /* Enter the main cycle */
348 for (nc = 1; nc <= iter; nc++) {
349 for (j = 2; j <= spp; j++) {
350 for (i = 0; i <= j - 2; i++)
351 x[j - 1][i] = x[i][j - 1];
354 /* Compute sij and minimize */
355 if (njoin) { /* many revisions by Y. Ina from here ... */
356 for (i = 0; i < spp; i++)
358 for (ja = 2; ja <= spp; ja++) {
359 jj = enterorder[ja - 1];
360 if (cluster[jj - 1] != NULL) {
361 for (ia = 0; ia <= ja - 2; ia++) {
363 if (cluster[ii - 1] != NULL) {
364 R[ii - 1] += x[ii - 1][jj - 1];
365 R[jj - 1] += x[ii - 1][jj - 1];
371 for (ja = 2; ja <= spp; ja++) {
372 jj = enterorder[ja - 1];
373 if (cluster[jj - 1] != NULL) {
374 for (ia = 0; ia <= ja - 2; ia++) {
376 if (cluster[ii - 1] != NULL) {
378 total = fotu2 * x[ii - 1][jj - 1] - R[ii - 1] - R[jj - 1];
379 /* this statement part of revisions by Y. Ina */
381 total = x[ii - 1][jj - 1];
391 /* compute lengths and print */
395 for (i = 0; i < spp; i++) {
396 dio += x[i][mini - 1];
397 djo += x[i][minj - 1];
399 dmin = x[mini - 1][minj - 1];
400 dio = (dio - dmin) / fotu2;
401 djo = (djo - dmin) / fotu2;
402 bi = (dmin + dio - djo) * 0.5;
407 bi = x[mini - 1][minj - 1] / 2.0 - av[mini - 1];
408 bj = x[mini - 1][minj - 1] / 2.0 - av[minj - 1];
412 printf("Cycle %3ld: ", iter - nc + 1);
414 nodelabel((boolean)(av[mini - 1] > 0.0));
416 nodelabel((boolean)(oc[mini - 1] > 1.0));
417 printf(" %ld (%10.5f) joins ", mini, bi);
419 nodelabel((boolean)(av[minj - 1] > 0.0));
421 nodelabel((boolean)(oc[minj - 1] > 1.0));
422 printf(" %ld (%10.5f)\n", minj, bj);
424 phyFillScreenColor();
427 hookup(curtree.nodep[nextnode - 1]->next, cluster[mini - 1]);
428 hookup(curtree.nodep[nextnode - 1]->next->next, cluster[minj - 1]);
429 cluster[mini - 1]->v = bi;
430 cluster[minj - 1]->v = bj;
431 cluster[mini - 1]->back->v = bi;
432 cluster[minj - 1]->back->v = bj;
433 cluster[mini - 1] = curtree.nodep[nextnode - 1];
434 cluster[minj - 1] = NULL;
437 av[mini - 1] = dmin * 0.5;
438 /* re-initialization */
440 for (j = 0; j < spp; j++) {
441 if (cluster[j] != NULL) {
443 da = (x[mini - 1][j] + x[minj - 1][j]) * 0.5;
444 if (mini - j - 1 < 0)
446 if (mini - j - 1 > 0)
449 da = x[mini - 1][j] * oc[mini - 1] + x[minj - 1][j] * oc[minj - 1];
450 da /= oc[mini - 1] + oc[minj - 1];
456 for (j = 0; j < spp; j++) {
457 x[minj - 1][j] = 0.0;
458 x[j][minj - 1] = 0.0;
460 oc[mini - 1] += oc[minj - 1];
464 for (i = 1; i <= spp; i++) {
465 if (cluster[i - 1] != NULL) {
471 curtree.start = cluster[el[0] - 1];
472 curtree.start->back = NULL;
477 bi = (x[el[0] - 1][el[1] - 1] + x[el[0] - 1][el[2] - 1] - x[el[1] - 1]
479 bj = x[el[0] - 1][el[1] - 1] - bi;
480 bk = x[el[0] - 1][el[2] - 1] - bi;
485 printf("last cycle:\n");
487 nodelabel((boolean)(av[el[0] - 1] > 0.0));
488 printf(" %ld (%10.5f) joins ", el[0], bi);
489 nodelabel((boolean)(av[el[1] - 1] > 0.0));
490 printf(" %ld (%10.5f) joins ", el[1], bj);
491 nodelabel((boolean)(av[el[2] - 1] > 0.0));
492 printf(" %ld (%10.5f)\n", el[2], bk);
494 phyFillScreenColor();
497 hookup(curtree.nodep[nextnode - 1], cluster[el[0] - 1]);
498 hookup(curtree.nodep[nextnode - 1]->next, cluster[el[1] - 1]);
499 hookup(curtree.nodep[nextnode - 1]->next->next, cluster[el[2] - 1]);
500 cluster[el[0] - 1]->v = bi;
501 cluster[el[1] - 1]->v = bj;
502 cluster[el[2] - 1]->v = bk;
503 cluster[el[0] - 1]->back->v = bi;
504 cluster[el[1] - 1]->back->v = bj;
505 cluster[el[2] - 1]->back->v = bk;
506 curtree.start = cluster[el[0] - 1]->back;
515 /* construct the tree */
518 inputdata(replicates, printdata, lower, upper, x, reps);
519 if (njoin && (spp < 3)) {
520 printf("\nERROR: Neighbor-Joining runs must have at least 3 species\n\n");
526 setuptree(&curtree, nonodes2 + 1);
527 for (i = 1; i <= spp; i++)
528 enterorder[i - 1] = i;
530 randumize(seed, enterorder);
531 for (i = 0; i < spp; i++)
532 cluster[i] = curtree.nodep[i];
535 curtree.start = curtree.nodep[outgrno - 1]->back;
536 printree(curtree.start, treeprint, njoin, (boolean)(!njoin));
542 treeout(curtree.start, &col, 0.43429448222, njoin, curtree.start);
544 curtree.root = curtree.start,
545 treeoutr(curtree.start,&col,&curtree);
548 printf("\nOutput written on file \"%s\"\n\n", outfilename);
550 printf("Tree written on file \"%s\"\n\n", outtreename);
555 int main(int argc, Char *argv[])
558 argc = 1; /* macsetup("Neighbor",""); */
559 argv[0] = "Neighbor";
562 openfile(&infile,INFILE,"input file", "r",argv[0],infilename);
563 openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename);
570 openfile(&outtree,OUTTREE,"output tree file", "w",argv[0],outtreename);
572 while (ith <= datasets) {
574 fprintf(outfile, "Data set # %ld:\n",ith);
576 printf("Data set # %ld:\n",ith);
580 if (eoln(infile) && (ith < datasets))
589 fixmacfile(outfilename);
590 fixmacfile(outtreename);
594 phyRestoreConsoleAttributes();