5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
16 /* Modified by Christian Zmasek to:
17 - name and pairwise dist. output as one line per seq.
18 - removed some unnecessary -- for my puposes -- output.
21 !WARNING: Use ONLY together with FORESTER/RIO!
22 !For all other puposes download the excellent original!
24 last modification: 05/19/01
28 void putdistance(FILE *fp):
30 removed: "if ((j + 1) % 7 == 0 && j+1 != Maxspc)
36 int main(int argc, char *argv[]):
39 "FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE);
40 openfiletowrite(&ofp, OUTFILE, "general output");
41 writeoutputfile(ofp,WRITEPARAMS);
44 "openfiletoappend(&ofp, OUTFILE, "general output");
45 writeoutputfile(ofp,WRITEREST);"
47 "openfiletoappend(&ofp, OUTFILE, "general output");
48 writeoutputfile(ofp,WRITEREST);"
50 "openfiletoappend(&ofp, OUTFILE, "general output");
51 writeoutputfile(ofp,WRITEREST);"
66 void num2quart(uli qnum, int *a, int *b, int *c, int *d)
70 uli lowval=0, highval=0;
72 aa=0; bb=1; cc=2; dd=3;
74 temp = (double)(24 * qnum);
77 /* temp = pow(temp, (double)(1/4)); */
78 dd = (uli) floor(temp) + 1;
80 lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
81 highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
83 while ((lowval > qnum)) {
84 dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
87 while (highval <= qnum) {
88 dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
90 lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
94 temp = (double)(6 * qnum);
95 temp = pow(temp, (double)(1/3));
96 cc = (uli) floor(temp);
98 lowval = (uli) cc*(cc-1)*(cc-2)/6;
99 highval = (uli) (cc+1)*cc*(cc-1)/6;
101 while ((lowval > qnum)) {
102 cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6;
105 while (highval <= qnum) {
106 cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6;
108 lowval = (uli) cc*(cc-1)*(cc-2)/6;
112 temp = (double)(2 * qnum);
114 bb = (uli) floor(temp);
116 lowval = (uli) bb*(bb-1)/2;
117 highval = (uli) (bb+1)*bb/2;
119 while ((lowval > qnum)) {
120 bb -= 1; lowval = (uli) bb*(bb-1)/2;
123 while (highval <= qnum) {
124 bb += 1; highval = (uli) (bb+1)*bb/2;
126 lowval = (uli) bb*(bb-1)/2;
143 uli numquarts(int maxspc)
158 (uli) b * (b-1) / 2 +
159 (uli) c * (c-1) * (c-2) / 6 +
160 (uli) d * (d-1) * (d-2) * (d-3) / 24;
167 uli quart2num (int a, int b, int c, int d)
170 if ((a>b) || (b>c) || (c>d)) {
171 fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c,
176 (uli) b * (b-1) / 2 +
177 (uli) c * (c-1) * (c-2) / 6 +
178 (uli) d * (d-1) * (d-2) * (d-3) / 24;
186 /* flag=0 old allquart binary */
187 /* flag=1 allquart binary */
188 /* flag=2 allquart ACSII */
189 /* flag=3 quartlh binary */
190 /* flag=4 quartlh ASCII */
192 void writetpqfheader(int nspec,
198 unsigned long nquart;
199 unsigned long blocklen;
201 nquart = numquarts(nspec);
202 /* compute number of bytes */
203 if (nquart % 2 == 0) { /* even number */
204 blocklen = (nquart)/2;
205 } else { /* odd number */
206 blocklen = (nquart + 1)/2;
208 /* FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename); */
209 fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION);
210 fprintf(ofp, "species: %d\n", nspec);
211 fprintf(ofp, "quartets: %lu\n", nquart);
212 fprintf(ofp, "bytes: %lu\n\n", blocklen);
215 /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */
218 if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
219 if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
220 if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
221 if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
223 for (currspec=0; currspec<nspec; currspec++) {
224 fputid(ofp, currspec);
226 } /* for each species */
229 } /* writetpqfheader */
233 void writeallquarts(int nspec,
235 unsigned char *quartetinfo)
236 { unsigned long nquart;
237 unsigned long blocklen;
240 nquart = numquarts(nspec);
242 /* compute number of bytes */
243 if (nquart % 2 == 0) { /* even number */
244 blocklen = (nquart)/2;
245 } else { /* odd number */
246 blocklen = (nquart + 1)/2;
249 FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename);
251 openfiletowrite(&ofp, filename, "all quartets");
253 writetpqfheader(nspec, ofp, 0);
255 fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp);
258 } /* writeallquart */
262 void readallquarts(int nspec,
264 unsigned char *quartetinfo)
265 { unsigned long nquart;
266 unsigned long blocklen;
268 unsigned long dummynquart;
269 unsigned long dummyblocklen;
271 char dummyversion[128];
275 nquart = numquarts(nspec);
277 /* compute number of bytes */
278 if (nquart % 2 == 0) { /* even number */
279 blocklen = (nquart)/2;
280 } else { /* odd number */
281 blocklen = (nquart + 1)/2;
284 /* &(quartetinfo[0] */
286 openfiletoread(&ifp, filename, "all quartets");
288 fscanf(ifp, "TREE-PUZZLE\n");
289 fscanf(ifp, "%s\n\n", dummyversion);
291 fscanf(ifp, "species: %d\n", &dummynspec);
292 fscanf(ifp, "quartets: %lu\n", &dummynquart);
293 fscanf(ifp, "bytes: %lu\n\n", &dummyblocklen);
295 if ((nspec != dummynspec) ||
296 (nquart != dummynquart) ||
297 (blocklen != dummyblocklen)) {
298 FPRINTF(STDOUTFILE "ERROR: Parameters in quartet file %s do not match!!!\n",
303 # endif /* PARALLEL */
307 FPRINTF(STDOUTFILE "Reading quartet file: %s\n", filename);
308 FPRINTF(STDOUTFILE " generated by: TREE-PUZZLE %s\n", dummyversion);
309 FPRINTF(STDOUTFILE " current: TREE-PUZZLE %s\n", VERSION);
311 FPRINTF(STDOUTFILE " %d species, %lu quartets, %lu bytes\n",
312 dummynspec, dummynquart, dummyblocklen);
314 for (currspec=0; currspec<nspec; currspec++) {
316 fscanf(ifp, "%s\n", dummyname);
317 FPRINTF(STDOUTFILE " %3d. %s (", currspec+1, dummyname);
318 fputid(STDOUT, currspec);
319 FPRINTF(STDOUTFILE ")\n");
321 } /* for each species */
322 FPRINTF(STDOUTFILE "\n");
324 fread(&(quartetinfo[0]), sizeof(char), blocklen, ifp);
327 } /* writeallquart */
332 /******************************************************************************/
333 /* options - file I/O - output */
334 /******************************************************************************/
336 /* compute TN parameters according to F84 Ts/Tv ratio */
339 double rho, piA, piC, piG, piT, piR, piY, ts, yr;
347 if (piC*piT*piR + piA*piG*piY == 0.0) {
348 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
349 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
353 rho = (piR*piY*(piR*piY*tstvf84 - (piA*piG + piC*piT)))/
354 (piC*piT*piR + piA*piG*piY);
356 if(piR == 0.0 || piY == 0.0 || (piR + rho) == 0.0) {
357 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
358 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
362 ts = 0.5 + 0.25*rho*(1.0/piR + 1.0/piY);
363 yr = (piY + rho)/piY * piR/(piR + rho);
364 if (ts < MINTS || ts > MAXTS) {
365 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
366 FPRINTF(STDOUTFILE "(bad Ts/Tv parameter)\n");
370 if (yr < MINYR || yr > MAXYR) {
371 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
372 FPRINTF(STDOUTFILE "(bad Y/R transition parameter)\n");
381 /* compute number of quartets used in LM analysis */
386 Numquartets = (uli) clustA*clustB*clustC*clustD;
388 Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2;
390 Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2;
392 Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24;
398 /* set options interactively */
406 rhetmode = UNIFORMRATE; /* assume rate homogeneity */
411 fracinv_optim = FALSE;
413 compclock = FALSE; /* compute clocklike branch lengths */
414 locroot = -1; /* search for optimal place of root */
415 qcalg_optn = FALSE; /* don't use sampling of quartets */
416 approxp_optn = TRUE; /* approximate parameter estimates */
417 listqptrees = PSTOUT_NONE; /* list puzzling step trees */
419 /* approximate QP quartets? */
420 if (Maxspc <= 6) approxqp = FALSE;
421 else approxqp = TRUE;
423 codon_optn = 0; /* use all positions in a codon */
425 /* number of puzzling steps */
426 if (Maxspc <= 25) Numtrial = 1000;
427 else if (Maxspc <= 50) Numtrial = 10000;
428 else if (Maxspc <= 75) Numtrial = 25000;
429 else Numtrial = 50000;
431 utree_optn = TRUE; /* use first user tree for estimation */
432 outgroup = 0; /* use first taxon as outgroup */
433 sym_optn = FALSE; /* symmetrize doublet frequencies */
434 tstvf84 = 0.0; /* disable F84 model */
435 show_optn = FALSE; /* show unresolved quartets */
436 typ_optn = TREERECON_OPTN; /* tree reconstruction */
437 numclust = 1; /* one clusters in LM analysis */
438 lmqts = 0; /* all quartets in LM analysis */
440 if (Numquartets > 10000) {
441 lmqts = 10000; /* 10000 quartets in LM analysis */
446 FPRINTF(STDOUTFILE "\n\n\nGENERAL OPTIONS\n");
447 FPRINTF(STDOUTFILE " b Type of analysis? ");
448 if (typ_optn == TREERECON_OPTN) FPRINTF(STDOUTFILE "Tree reconstruction\n");
449 if (typ_optn == LIKMAPING_OPTN) FPRINTF(STDOUTFILE "Likelihood mapping\n");
450 if (typ_optn == TREERECON_OPTN) {
451 FPRINTF(STDOUTFILE " k Tree search procedure? ");
452 if (puzzlemode == QUARTPUZ) FPRINTF(STDOUTFILE "Quartet puzzling\n");
453 if (puzzlemode == USERTREE) FPRINTF(STDOUTFILE "User defined trees\n");
454 if (puzzlemode == PAIRDIST) FPRINTF(STDOUTFILE "Pairwise distances only (no tree)\n");
455 if (puzzlemode == QUARTPUZ) {
456 FPRINTF(STDOUTFILE " v Approximate quartet likelihood? %s\n",
457 (approxqp ? "Yes" : "No"));
458 FPRINTF(STDOUTFILE " u List unresolved quartets? %s\n",
459 (show_optn ? "Yes" : "No"));
460 FPRINTF(STDOUTFILE " n Number of puzzling steps? %lu\n",
462 FPRINTF(STDOUTFILE " j List puzzling step trees? ");
463 switch (listqptrees) {
464 case PSTOUT_NONE: FPRINTF(STDOUTFILE "No\n"); break;
465 case PSTOUT_ORDER: FPRINTF(STDOUTFILE "Unique topologies\n"); break;
466 case PSTOUT_LISTORDER: FPRINTF(STDOUTFILE "Unique topologies & Chronological list\n"); break;
467 case PSTOUT_LIST: FPRINTF(STDOUTFILE "Chronological list only\n"); break;
470 FPRINTF(STDOUTFILE " o Display as outgroup? ");
471 fputid(STDOUT, outgroup);
472 FPRINTF(STDOUTFILE "\n");
474 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
475 FPRINTF(STDOUTFILE " z Compute clocklike branch lengths? ");
476 if (compclock) FPRINTF(STDOUTFILE "Yes\n");
477 else FPRINTF(STDOUTFILE "No\n");
480 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
481 FPRINTF(STDOUTFILE " l Location of root? ");
482 if (locroot < 0) FPRINTF(STDOUTFILE "Best place (automatic search)\n");
483 else if (locroot < Maxspc) {
484 FPRINTF(STDOUTFILE "Branch %d (", locroot + 1);
485 fputid(STDOUT, locroot);
486 FPRINTF(STDOUTFILE ")\n");
487 } else FPRINTF(STDOUTFILE "Branch %d (internal branch)\n", locroot + 1);
490 if (typ_optn == LIKMAPING_OPTN) {
491 FPRINTF(STDOUTFILE " g Group sequences in clusters? ");
492 if (numclust == 1) FPRINTF(STDOUTFILE "No\n");
493 else FPRINTF(STDOUTFILE "Yes (%d clusters as specified)\n", numclust);
494 FPRINTF(STDOUTFILE " n Number of quartets? ");
495 if (lmqts == 0) FPRINTF(STDOUTFILE "%lu (all possible)\n", Numquartets);
496 else FPRINTF(STDOUTFILE "%lu (random choice)\n", lmqts);
498 FPRINTF(STDOUTFILE " e Parameter estimates? ");
499 if (approxp_optn) FPRINTF(STDOUTFILE "Approximate (faster)\n");
500 else FPRINTF(STDOUTFILE "Exact (slow)\n");
501 if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) {
502 FPRINTF(STDOUTFILE " x Parameter estimation uses? ");
503 if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
504 else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
507 FPRINTF(STDOUTFILE " x Parameter estimation uses? ");
509 FPRINTF(STDOUTFILE "1st input tree\n");
510 else if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
511 else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
513 FPRINTF(STDOUTFILE "SUBSTITUTION PROCESS\n");
514 FPRINTF(STDOUTFILE " d Type of sequence input data? ");
515 if (auto_datatype == AUTO_GUESS) FPRINTF(STDOUTFILE "Auto: ");
516 if (data_optn == NUCLEOTIDE) FPRINTF(STDOUTFILE "Nucleotides\n");
517 if (data_optn == AMINOACID) FPRINTF(STDOUTFILE "Amino acids\n");
518 if (data_optn == BINARY) FPRINTF(STDOUTFILE "Binary states\n");
519 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
520 FPRINTF(STDOUTFILE " h Codon positions selected? ");
521 if (codon_optn == 0) FPRINTF(STDOUTFILE "Use all positions\n");
522 if (codon_optn == 1) FPRINTF(STDOUTFILE "Use only 1st positions\n");
523 if (codon_optn == 2) FPRINTF(STDOUTFILE "Use only 2nd positions\n");
524 if (codon_optn == 3) FPRINTF(STDOUTFILE "Use only 3rd positions\n");
525 if (codon_optn == 4) FPRINTF(STDOUTFILE "Use 1st and 2nd positions\n");
527 FPRINTF(STDOUTFILE " m Model of substitution? ");
528 if (data_optn == NUCLEOTIDE) { /* nucleotides */
531 FPRINTF(STDOUTFILE "HKY (Hasegawa et al. 1985)\n");
533 FPRINTF(STDOUTFILE "TN (Tamura-Nei 1993)\n");
534 FPRINTF(STDOUTFILE " p Constrain TN model to F84 model? ");
536 FPRINTF(STDOUTFILE "No\n");
537 else FPRINTF(STDOUTFILE "Yes (Ts/Tv ratio = %.2f)\n", tstvf84);
539 FPRINTF(STDOUTFILE " t Transition/transversion parameter? ");
541 FPRINTF(STDOUTFILE "Estimate from data set\n");
543 FPRINTF(STDOUTFILE "%.2f\n", TSparam);
545 FPRINTF(STDOUTFILE " r Y/R transition parameter? ");
547 FPRINTF(STDOUTFILE "Estimate from data set\n");
549 FPRINTF(STDOUTFILE "%.2f\n", YRparam);
553 FPRINTF(STDOUTFILE "SH (Schoeniger-von Haeseler 1994)\n");
554 FPRINTF(STDOUTFILE " t Transition/transversion parameter? ");
556 FPRINTF(STDOUTFILE "Estimate from data set\n");
558 FPRINTF(STDOUTFILE "%.2f\n", TSparam);
561 if (data_optn == NUCLEOTIDE && SH_optn) {
562 FPRINTF(STDOUTFILE " h Doublets defined by? ");
564 FPRINTF(STDOUTFILE "1st and 2nd codon positions\n");
566 FPRINTF(STDOUTFILE "1st+2nd, 3rd+4th, etc. site\n");
568 if (data_optn == AMINOACID) { /* amino acids */
569 switch (auto_aamodel) {
571 FPRINTF(STDOUTFILE "Auto: ");
574 FPRINTF(STDOUTFILE "Def.: ");
577 if (Dayhf_optn) FPRINTF(STDOUTFILE "Dayhoff (Dayhoff et al. 1978)\n");
578 if (Jtt_optn) FPRINTF(STDOUTFILE "JTT (Jones et al. 1992)\n");
579 if (mtrev_optn) FPRINTF(STDOUTFILE "mtREV24 (Adachi-Hasegawa 1996)\n");
580 if (cprev_optn) FPRINTF(STDOUTFILE "cpREV45 (Adachi et al. 2000)\n");
581 if (blosum62_optn) FPRINTF(STDOUTFILE "BLOSUM62 (Henikoff-Henikoff 92)\n");
582 if (vtmv_optn) FPRINTF(STDOUTFILE "VT (Mueller-Vingron 2000)\n");
583 if (wag_optn) FPRINTF(STDOUTFILE "WAG (Whelan-Goldman 2000)\n");
585 if (data_optn == BINARY) { /* binary states */
586 FPRINTF(STDOUTFILE "Two-state model (Felsenstein 1981)\n");
588 if (data_optn == AMINOACID)
589 FPRINTF(STDOUTFILE " f Amino acid frequencies? ");
590 else if (data_optn == NUCLEOTIDE && SH_optn)
591 FPRINTF(STDOUTFILE " f Doublet frequencies? ");
592 else if (data_optn == NUCLEOTIDE && nuc_optn)
593 FPRINTF(STDOUTFILE " f Nucleotide frequencies? ");
594 else if (data_optn == BINARY)
595 FPRINTF(STDOUTFILE " f Binary state frequencies? ");
596 FPRINTF(STDOUTFILE "%s\n", (Frequ_optn ? "Estimate from data set" :
597 "Use specified values"));
598 if (data_optn == NUCLEOTIDE && SH_optn)
599 FPRINTF(STDOUTFILE " s Symmetrize doublet frequencies? %s\n",
600 (sym_optn ? "Yes" : "No"));
602 FPRINTF(STDOUTFILE "RATE HETEROGENEITY\n");
603 FPRINTF(STDOUTFILE " w Model of rate heterogeneity? ");
604 if (rhetmode == UNIFORMRATE) FPRINTF(STDOUTFILE "Uniform rate\n");
605 if (rhetmode == GAMMARATE ) FPRINTF(STDOUTFILE "Gamma distributed rates\n");
606 if (rhetmode == TWORATE ) FPRINTF(STDOUTFILE "Two rates (1 invariable + 1 variable)\n");
607 if (rhetmode == MIXEDRATE ) FPRINTF(STDOUTFILE "Mixed (1 invariable + %d Gamma rates)\n", numcats);
609 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
610 FPRINTF(STDOUTFILE " i Fraction of invariable sites? ");
611 if (fracinv_optim) FPRINTF(STDOUTFILE "Estimate from data set");
612 else FPRINTF(STDOUTFILE "%.2f", fracinv);
613 if (fracinv == 0.0 && !fracinv_optim) FPRINTF(STDOUTFILE " (all sites variable)");
614 FPRINTF(STDOUTFILE "\n");
616 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
617 FPRINTF(STDOUTFILE " a Gamma distribution parameter alpha? ");
619 FPRINTF(STDOUTFILE "Estimate from data set\n");
621 FPRINTF(STDOUTFILE "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta);
622 else FPRINTF(STDOUTFILE "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta);
623 FPRINTF(STDOUTFILE " c Number of Gamma rate categories? %d\n", numcats);
626 FPRINTF(STDOUTFILE "\nQuit [q], confirm [y], or change [menu] settings: ");
632 while (getchar() != '\n');
634 ch = (char) tolower((int) ch);
636 /* letters in use: a b c d e f g h i j k l m n o p q r s t u v w y x z */
637 /* letters not in use: */
643 case 'z': if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) {
644 compclock = compclock + 1;
645 if (compclock == 2) compclock = 0;
647 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
651 case 'l': if (compclock && typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) {
652 FPRINTF(STDOUTFILE "\n\n\nEnter an invalid branch number to search ");
653 FPRINTF(STDOUTFILE "for the best location!\n");
654 FPRINTF(STDOUTFILE "\nPlace root at branch (1-%d): ",
656 scanf("%d", &locroot);
658 while (getchar() != '\n');
659 if (locroot < 1 || locroot > 2*Maxspc-3) locroot = 0;
660 locroot = locroot - 1;
662 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
666 case 'e': if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && fracinv_optim) {
667 FPRINTF(STDOUTFILE "\n\n\nInvariable sites estimation needs to be exact!\n");
669 approxp_optn = approxp_optn + 1;
670 if (approxp_optn == 2) approxp_optn = 0;
674 case 'w': rhetmode = rhetmode + 1;
675 if (rhetmode == 4) rhetmode = UNIFORMRATE;
676 if (rhetmode == UNIFORMRATE) { /* uniform rate */
681 fracinv_optim = FALSE;
683 if (rhetmode == GAMMARATE ) { /* Gamma distributed rates */
688 fracinv_optim = FALSE;
690 if (rhetmode == TWORATE ) { /* two rates (1 invariable + 1 variable) */
691 approxp_optn = FALSE;
696 fracinv_optim = TRUE;
698 if (rhetmode == MIXEDRATE ) { /* mixed (1 invariable + Gamma rates) */
699 approxp_optn = FALSE;
704 fracinv_optim = TRUE;
708 case 'i': if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
709 FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for ");
710 FPRINTF(STDOUTFILE "estimation from data set!\n");
711 FPRINTF(STDOUTFILE "\nFraction of invariable sites among all sites (%.2f-%.2f): ",
713 scanf("%lf", &fracinv);
715 while (getchar() != '\n');
716 if (fracinv < MINFI || fracinv > MAXFI) {
717 fracinv_optim = TRUE;
720 fracinv_optim = FALSE;
723 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
727 case 'a': if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
728 FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for estimation from data set!\n");
729 FPRINTF(STDOUTFILE "\nGamma distribution parameter alpha (%.2f-%.2f): ",
730 (1.0-MAXGE)/MAXGE, (1.0-MINGE)/MINGE);
733 while (getchar() != '\n');
734 if (Geta < (1.0-MAXGE)/MAXGE || Geta > (1.0-MINGE)/MINGE) {
739 Geta = 1.0/(1.0 + Geta);
742 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
745 case 'c': if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
746 FPRINTF(STDOUTFILE "\n\n\nNumber of Gamma rate categories (%d-%d): ",
748 scanf("%d", &numcats);
750 while (getchar() != '\n');
751 if (numcats < MINCAT || numcats > MAXCAT) {
752 FPRINTF(STDOUTFILE "\n\n\nThis number of categories is not available!\n");
756 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
760 case 'h': if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
761 codon_optn = codon_optn + 1;
762 if (codon_optn == 5) codon_optn = 0;
764 /* reestimate nucleotide frequencies only
765 if user did not specify other values */
766 if (Frequ_optn) estimatebasefreqs();
768 } else if (data_optn == NUCLEOTIDE && SH_optn) {
769 if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0) {
771 FPRINTF(STDOUTFILE "\n\n\nThis is the only possible option for the data set!\n");
773 if (Maxseqc % 3 != 0 && Maxseqc % 2 == 0) {
775 FPRINTF(STDOUTFILE "\n\n\nThis is the only possible option for the data set!\n");
777 if (Maxseqc % 2 == 0 && Maxseqc % 3 == 0) {
783 /* reestimate nucleotide frequencies only
784 if user did not specify other values */
785 if (Frequ_optn) estimatebasefreqs();
788 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
792 case 'x': if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
797 qcalg_optn = qcalg_optn + 1;
798 if (qcalg_optn == 2) {
804 qcalg_optn = qcalg_optn + 1;
805 if (qcalg_optn == 2) qcalg_optn = 0;
809 case 'k': if (typ_optn == TREERECON_OPTN) {
810 puzzlemode = (puzzlemode + 1) % 3;
811 /* puzzlemode = puzzlemode + 1;
812 if (puzzlemode == 3) puzzlemode = 0;
815 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
819 case 'b': typ_optn = (typ_optn + 1) % 2;
820 /* typ_optn = typ_optn + 1;
821 if (typ_optn == 2) typ_optn = TREERECON_OPTN;
825 case 'g': if (typ_optn == LIKMAPING_OPTN) {
826 clustA = clustB = clustC = clustD = 0;
830 FPRINTF(STDOUTFILE "\n\n\nNumber of clusters (2-4): ");
831 scanf("%d", &numclust);
833 while (getchar() != '\n');
834 if (numclust < 2 || numclust > 4) {
836 FPRINTF(STDOUTFILE "\n\n\nOnly 2, 3, or 4 ");
837 FPRINTF(STDOUTFILE "clusters possible\n");
839 FPRINTF(STDOUTFILE "\nDistribute all sequences over the ");
841 FPRINTF(STDOUTFILE "two clusters a and b (At least two\n");
842 FPRINTF(STDOUTFILE "sequences per cluster are necessary), ");
845 FPRINTF(STDOUTFILE "three clusters a, b, and c\n");
846 FPRINTF(STDOUTFILE "(At least one sequence in cluster a and b, and at least two\n");
847 FPRINTF(STDOUTFILE "sequences in c are necessary), ");
850 FPRINTF(STDOUTFILE "four clusters a, b, c, and d\n");
851 FPRINTF(STDOUTFILE "(At least one sequence per cluster is necessary),\n");
853 FPRINTF(STDOUTFILE "type x to exclude a sequence:\n\n");
855 for (i = 0; i < Maxspc; i++) {
859 FPRINTF(STDOUTFILE ": ");
864 while (getchar() != '\n');
866 ch = (char) tolower((int) ch);
867 if (ch == 'a' || ch == 'b' || ch == 'x')
869 if (numclust == 3 || numclust == 4)
870 if (ch == 'c') valid = TRUE;
872 if (ch == 'd') valid = TRUE;
875 clusterA[clustA] = i;
879 clusterB[clustB] = i;
883 clusterC[clustC] = i;
887 clusterD[clustD] = i;
897 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n");
902 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n");
907 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster c\n");
912 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster d\n");
919 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n");
924 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n");
930 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster c\n");
932 FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster c\n");
940 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n");
942 FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster a\n");
948 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n");
950 FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster b\n");
954 FPRINTF(STDOUTFILE "\nNumber of sequences in each cluster:\n\n");
955 FPRINTF(STDOUTFILE "Cluster a: %d\n", clustA);
956 FPRINTF(STDOUTFILE "Cluster b: %d\n", clustB);
958 FPRINTF(STDOUTFILE "Cluster c: %d\n", clustC);
960 FPRINTF(STDOUTFILE "Cluster d: %d\n", clustD);
961 FPRINTF(STDOUTFILE "\nExcluded sequences: ");
962 if (numclust == 2) FPRINTF(STDOUTFILE "%d\n",
963 Maxspc-clustA-clustB);
964 if (numclust == 3) FPRINTF(STDOUTFILE "%d\n",
965 Maxspc-clustA-clustB-clustC);
966 if (numclust == 4) FPRINTF(STDOUTFILE "%d\n",
967 Maxspc-clustA-clustB-clustC-clustD);
972 /* number of resulting quartets */
976 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
980 case 'd': if (auto_datatype == AUTO_GUESS) {
981 auto_datatype = AUTO_OFF;
982 guessdata_optn = data_optn;
985 data_optn = data_optn + 1;
986 if (data_optn == 3) {
987 auto_datatype = AUTO_GUESS;
988 data_optn = guessdata_optn;
991 /* translate characters into format used by ML engine */
996 case 'u': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
997 show_optn = 1 - show_optn;
999 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1002 case 'j': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1003 listqptrees = (listqptrees + 1) % 4;
1005 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1008 case 'v': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1009 approxqp = 1 - approxqp;
1011 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1014 case 'f': if (Frequ_optn) {
1018 if (data_optn == AMINOACID)
1019 FPRINTF(STDOUTFILE "\n\n\nAmino acid");
1020 else if (data_optn == NUCLEOTIDE && SH_optn)
1021 FPRINTF(STDOUTFILE "\n\n\nDoublet");
1022 else if (data_optn == NUCLEOTIDE && nuc_optn)
1023 FPRINTF(STDOUTFILE "\n\n\nNucleotide");
1024 else if (data_optn == BINARY)
1025 FPRINTF(STDOUTFILE "\n\n\nBinary state");
1026 FPRINTF(STDOUTFILE " frequencies (in %%):\n\n");
1027 for (i = 0; i < gettpmradix() - 1; i++) {
1028 FPRINTF(STDOUTFILE "pi(%s) = ", int2code(i));
1029 scanf("%lf", &(Freqtpm[i]));
1031 while (getchar() != '\n');
1032 Freqtpm[i] = Freqtpm[i]/100.0;
1033 if (Freqtpm[i] < 0.0) {
1034 FPRINTF(STDOUTFILE "\n\n\nNegative frequency not possible\n");
1035 estimatebasefreqs();
1038 sumfreq = sumfreq + Freqtpm[i];
1039 if (sumfreq > 1.0) {
1040 FPRINTF(STDOUTFILE "\n\n\nThe sum of ");
1041 FPRINTF(STDOUTFILE "all frequencies exceeds");
1042 FPRINTF(STDOUTFILE " 100%%\n");
1043 estimatebasefreqs();
1046 if (i == gettpmradix() - 2)
1047 Freqtpm[i+1] = 1.0 - sumfreq;
1049 } else estimatebasefreqs();
1052 case 's': if (data_optn == NUCLEOTIDE && SH_optn) {
1053 sym_optn = 1 - sym_optn;
1055 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1059 case 'n': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1061 FPRINTF(STDOUTFILE "\n\n\nNumber of puzzling steps: ");
1062 scanf("%lu", &Numtrial);
1064 while (getchar() != '\n');
1066 FPRINTF(STDOUTFILE "\n\n\nThe number of puzzling");
1067 FPRINTF(STDOUTFILE " steps can't be smaller than one\n");
1071 else if (typ_optn == LIKMAPING_OPTN)
1073 FPRINTF(STDOUTFILE "\n\nEnter zero to use all possible");
1074 FPRINTF(STDOUTFILE " quartets in the analysis!\n");
1075 FPRINTF(STDOUTFILE "\nNumber of random quartets: ");
1076 scanf("%lu", &lmqts);
1078 while (getchar() != '\n');
1080 /* compute number of quartets used */
1085 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1089 case 'o': if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
1090 FPRINTF(STDOUTFILE "\n\n\nSequence to be displayed as outgroup (1-%d): ",
1092 scanf("%d", &outgroup);
1094 while (getchar() != '\n');
1095 if (outgroup < 1 || outgroup > Maxspc) {
1096 FPRINTF(STDOUTFILE "\n\n\nSequences are numbered ");
1097 FPRINTF(STDOUTFILE "from 1 to %d\n",
1101 outgroup = outgroup - 1;
1103 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1107 case 'm': if (data_optn == NUCLEOTIDE) { /* nucleotide data */
1108 if(HKY_optn && nuc_optn) {
1120 if(TN_optn && nuc_optn) {
1121 if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) {
1122 /* number of chars needs to be a multiple 2 or 3 */
1124 if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0)
1136 /* translate characters into format */
1137 /* used by ML engine */
1139 estimatebasefreqs();
1141 FPRINTF(STDOUTFILE "\n\n\nSH model not ");
1142 FPRINTF(STDOUTFILE "available for the data set!\n");
1165 /* translate characters into format */
1166 /* used by ML engine */
1168 estimatebasefreqs();
1173 if (data_optn == AMINOACID) { /* amino acid data */
1175 /* AUTO -> Dayhoff */
1180 blosum62_optn = FALSE;
1183 auto_aamodel = AUTO_OFF;
1187 /* Dayhoff -> JTT */
1192 blosum62_optn = FALSE;
1195 auto_aamodel = AUTO_OFF;
1204 blosum62_optn = FALSE;
1207 auto_aamodel = AUTO_OFF;
1212 /* mtREV -> cpREV */
1217 blosum62_optn = FALSE;
1220 auto_aamodel = AUTO_OFF;
1225 /* mtREV -> BLOSUM 62 */
1230 blosum62_optn = TRUE;
1233 auto_aamodel = AUTO_OFF;
1236 #endif /* ! CPREV */
1240 /* cpREV -> BLOSUM 62 */
1245 blosum62_optn = TRUE;
1248 auto_aamodel = AUTO_OFF;
1252 if (blosum62_optn) {
1253 /* BLOSUM 62 -> VT model */
1258 blosum62_optn = FALSE;
1261 auto_aamodel = AUTO_OFF;
1265 /* VT model -> WAG model */
1270 blosum62_optn = FALSE;
1273 auto_aamodel = AUTO_OFF;
1277 /* WAG model -> AUTO */
1278 Dayhf_optn = guessDayhf_optn;
1279 Jtt_optn = guessJtt_optn;
1280 mtrev_optn = guessmtrev_optn;
1281 cprev_optn = guesscprev_optn;
1282 blosum62_optn = guessblosum62_optn;
1283 vtmv_optn = guessvtmv_optn;
1284 wag_optn = guesswag_optn;
1285 auto_aamodel = guessauto_aamodel;
1290 if (data_optn == BINARY) {
1291 FPRINTF(STDOUTFILE "\n\n\nNo other model available!\n");
1295 case 't': if (data_optn != NUCLEOTIDE) {
1296 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1299 FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for ");
1300 FPRINTF(STDOUTFILE "estimation from data set!\n");
1301 FPRINTF(STDOUTFILE "\nTransition/transversion parameter (%.2f-%.2f): ",
1303 scanf("%lf", &TSparam);
1305 while (getchar() != '\n');
1306 if (TSparam < MINTS || TSparam > MAXTS) {
1315 case 'q': FPRINTF(STDOUTFILE "\n\n\n");
1319 # endif /* PARALLEL */
1324 case 'r': if (!(TN_optn && nuc_optn)){
1325 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1328 FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value ");
1329 FPRINTF(STDOUTFILE "for estimation from data set!\n");
1330 FPRINTF(STDOUTFILE "\nY/R transition parameter (%.2f-%.2f): ", MINYR, MAXYR);
1331 scanf("%lf", &YRparam);
1333 while (getchar() != '\n');
1334 if (YRparam < MINYR || YRparam > MAXYR) {
1337 } else if (YRparam == 1.0) {
1340 if (optim_optn) TSparam = 2.0;
1347 case 'p': if (!(TN_optn && nuc_optn)){
1348 FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1350 FPRINTF(STDOUTFILE "\n\n\nThe F84 model (Felsenstein 1984) is a restricted");
1351 FPRINTF(STDOUTFILE " TN model, and the one\nF84 parameter uniquely");
1352 FPRINTF(STDOUTFILE " determines the two corresponding TN parameters!\n\n");
1353 FPRINTF(STDOUTFILE "F84 expected transition/transversion ratio: ");
1354 scanf("%lf", &tstvf84);
1356 while (getchar() != '\n');
1357 if (tstvf84 <= 0.0) tstvf84 = 0.0;
1358 else makeF84model();
1364 default: FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1367 } while (ch != 'y');
1369 FPRINTF(STDOUTFILE "\n\n\n");
1372 /* open file for reading */
1373 void openfiletoread(FILE **fp, char name[], char descr[])
1378 if ((*fp = fopen(name, "r")) == NULL) {
1379 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
1381 while ((*fp = fopen(str, "r")) == NULL)
1386 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
1389 FPRINTF(STDOUTFILE "File '%s' not found, ", str);
1390 FPRINTF(STDOUTFILE "please enter alternative name: ");
1395 FPRINTF(STDOUTFILE "\n");
1397 } /* openfiletoread */
1400 /* open file for writing */
1401 void openfiletowrite(FILE **fp, char name[], char descr[])
1406 if ((*fp = fopen(name, "w")) == NULL) {
1407 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
1409 while ((*fp = fopen(str, "w")) == NULL)
1414 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
1417 FPRINTF(STDOUTFILE "File '%s' not created, ", str);
1418 FPRINTF(STDOUTFILE "please enter other name: ");
1423 FPRINTF(STDOUTFILE "\n");
1425 } /* openfiletowrite */
1428 /* open file for appending */
1429 void openfiletoappend(FILE **fp, char name[], char descr[])
1434 if ((*fp = fopen(name, "a")) == NULL) {
1435 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
1437 while ((*fp = fopen(str, "a")) == NULL)
1442 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
1445 FPRINTF(STDOUTFILE "File '%s' not created, ", str);
1446 FPRINTF(STDOUTFILE "please enter other name: ");
1451 FPRINTF(STDOUTFILE "\n");
1453 } /* openfiletowrite */
1457 void closefile(FILE *fp)
1462 /* symmetrize doublet frequencies */
1468 if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) {
1469 /* ML frequencies */
1470 mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */
1473 mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */
1476 mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */
1479 mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */
1482 mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */
1485 mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */
1489 /* base composition of each taxon */
1490 for (i = 0; i < Maxspc; i++) {
1491 imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */
1492 Basecomp[i][1] = imean;
1493 Basecomp[i][4] = imean;
1494 imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */
1495 Basecomp[i][2] = imean;
1496 Basecomp[i][8] = imean;
1497 imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */
1498 Basecomp[i][3] = imean;
1499 Basecomp[i][12] = imean;
1500 imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */
1501 Basecomp[i][6] = imean;
1502 Basecomp[i][9] = imean;
1503 imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */
1504 Basecomp[i][7] = imean;
1505 Basecomp[i][13] = imean;
1506 imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */
1507 Basecomp[i][11] = imean;
1508 Basecomp[i][14] = imean;
1513 /* show Ts/Tv ratio and Ts Y/R ratio */
1514 void computeexpectations()
1516 double AlphaYBeta, AlphaRBeta, piR, piY, num, denom, pyr, pur;
1518 if (nuc_optn == TRUE) { /* 4x4 nucs */
1519 piR = Freqtpm[0] + Freqtpm[2];
1520 piY = Freqtpm[1] + Freqtpm[3];
1521 AlphaRBeta = 4.0*TSparam / (1 + YRparam);
1522 AlphaYBeta = AlphaRBeta * YRparam;
1523 tstvratio = (AlphaRBeta*Freqtpm[0]*Freqtpm[2] +
1524 AlphaYBeta*Freqtpm[1]*Freqtpm[3])/(piR * piY);
1525 yrtsratio = (AlphaYBeta*Freqtpm[1]*Freqtpm[3]) /
1526 (AlphaRBeta*Freqtpm[0]*Freqtpm[2]);
1527 } else { /* 16x16 nucs */
1528 pyr = Freqtpm[1]*Freqtpm[3] + Freqtpm[5]*Freqtpm[7] +
1529 Freqtpm[9]*Freqtpm[11] + Freqtpm[4]*Freqtpm[12] +
1530 Freqtpm[5]*Freqtpm[13] + Freqtpm[6]*Freqtpm[14] +
1531 Freqtpm[7]*Freqtpm[15] + Freqtpm[13]*Freqtpm[15];
1532 pur = Freqtpm[0]*Freqtpm[2] + Freqtpm[4]*Freqtpm[6] +
1533 Freqtpm[0]*Freqtpm[8] + Freqtpm[1]*Freqtpm[9] +
1534 Freqtpm[2]*Freqtpm[10] + Freqtpm[8]*Freqtpm[10] +
1535 Freqtpm[3]*Freqtpm[11] + Freqtpm[12]*Freqtpm[14];
1537 denom = Freqtpm[0]*Freqtpm[1] + Freqtpm[1]*Freqtpm[2] +
1538 Freqtpm[0]*Freqtpm[3] + Freqtpm[2]*Freqtpm[3] +
1539 Freqtpm[0]*Freqtpm[4] + Freqtpm[1]*Freqtpm[5] +
1540 Freqtpm[4]*Freqtpm[5] + Freqtpm[2]*Freqtpm[6] +
1541 Freqtpm[5]*Freqtpm[6] + Freqtpm[3]*Freqtpm[7] +
1542 Freqtpm[4]*Freqtpm[7] + Freqtpm[6]*Freqtpm[7] +
1543 Freqtpm[4]*Freqtpm[8] + Freqtpm[5]*Freqtpm[9] +
1544 Freqtpm[8]*Freqtpm[9] + Freqtpm[6]*Freqtpm[10] +
1545 Freqtpm[9]*Freqtpm[10] + Freqtpm[7]*Freqtpm[11] +
1546 Freqtpm[8]*Freqtpm[11] + Freqtpm[10]*Freqtpm[11] +
1547 Freqtpm[0]*Freqtpm[12] + Freqtpm[8]*Freqtpm[12] +
1548 Freqtpm[1]*Freqtpm[13] + Freqtpm[9]*Freqtpm[13] +
1549 Freqtpm[12]*Freqtpm[13] + Freqtpm[2]*Freqtpm[14] +
1550 Freqtpm[10]*Freqtpm[14] + Freqtpm[13]*Freqtpm[14] +
1551 Freqtpm[3]*Freqtpm[15] + Freqtpm[11]*Freqtpm[15] +
1552 Freqtpm[12]*Freqtpm[15] + Freqtpm[14]*Freqtpm[15];
1553 tstvratio = 2.0*TSparam * num/denom;
1554 yrtsratio = pyr/pur;
1558 /* write ML distance matrix to file */
1559 void putdistance(FILE *fp) /* mod CZ 05/19/01 */
1563 fprintf(fp, " %d\n", Maxspc);
1564 for (i = 0; i < Maxspc; i++) {
1566 for (j = 0; j < Maxspc; j++) {
1567 fprintf(fp, " %.5f", Distanmat[i][j]/100.0);
1574 /* find identical sequences */
1575 void findidenticals(FILE *fp)
1580 useqs = new_cvector(Maxspc);
1582 for (i = 0; i < Maxspc; i++)
1586 for (i = 0; i < Maxspc && noids; i++)
1587 for (j = i + 1; j < Maxspc && noids; j++)
1588 if (Distanmat[i][j] == 0.0) noids = FALSE;
1591 fprintf(fp, " All sequences are unique.\n");
1593 for (i = 0; i < Maxspc; i++) {
1595 for (j = i + 1; j < Maxspc && noids; j++)
1596 if (Distanmat[i][j] == 0.0) noids = FALSE;
1598 if (!noids && useqs[i] == 0) {
1601 for (j = i + 1; j < Maxspc; j++)
1602 if (Distanmat[i][j] == 0.0) {
1611 free_cvector(useqs);
1614 /* compute average distance */
1615 double averagedist()
1621 for (i = 0; i < Maxspc; i++)
1622 for (j = i + 1; j < Maxspc; j++)
1623 sum = sum + Distanmat[i][j];
1625 sum = sum / (double) Maxspc / ((double) Maxspc - 1.0) * 2.0;
1630 /* first lines of EPSF likelihood mapping file */
1631 void initps(FILE *ofp)
1633 fprintf(ofp, "%%!PS-Adobe-3.0 EPSF-3.0\n");
1634 fprintf(ofp, "%%%%BoundingBox: 60 210 550 650\n");
1635 fprintf(ofp, "%%%%Pages: 1\n");
1637 fprintf(ofp, "%%%%Creator: %s (version %s)\n", PACKAGE, VERSION);
1639 fprintf(ofp, "%%%%Creator: %s (version %s%s)\n", PACKAGE, VERSION, ALPHA);
1641 fprintf(ofp, "%%%%Title: Likelihood Mapping Analysis\n");
1642 fprintf(ofp, "%%%%CreationDate: %s", asctime(localtime(&Starttime)) );
1643 fprintf(ofp, "%%%%DocumentFonts: Helvetica\n");
1644 fprintf(ofp, "%%%%DocumentNeededFonts: Helvetica\n");
1645 fprintf(ofp, "%%%%EndComments\n");
1646 fprintf(ofp, "%% use inch as unit\n");
1647 fprintf(ofp, "/inch {72 mul} def\n");
1648 fprintf(ofp, "%% triangle side length (3 inch)\n");
1649 fprintf(ofp, "/tl {3 inch mul} def\n");
1650 fprintf(ofp, "%% plot one dot (x-y coordinates on stack)\n");
1651 fprintf(ofp, "/dot {\n");
1652 fprintf(ofp, "newpath\n");
1653 fprintf(ofp, "0.002 tl 0 360 arc %% radius is 0.002 of the triangle length\n");
1654 fprintf(ofp, "closepath\n");
1655 fprintf(ofp, "fill\n");
1656 fprintf(ofp, "} def\n");
1657 fprintf(ofp, "%% preamble\n");
1658 fprintf(ofp, "/Helvetica findfont\n");
1659 fprintf(ofp, "12 scalefont\n");
1660 fprintf(ofp, "setfont\n");
1661 fprintf(ofp, "%% 0/0 for triangle of triangles\n");
1662 fprintf(ofp, "0.9 inch 3 inch translate\n");
1663 fprintf(ofp, "%% first triangle (the one with dots)\n");
1664 fprintf(ofp, "0.6 tl 1.2 tl 0.8660254038 mul translate\n");
1665 fprintf(ofp, "newpath\n");
1666 fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1667 fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1668 fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1669 fprintf(ofp, "closepath\n");
1670 fprintf(ofp, "stroke\n");
1673 /* plot one point of likelihood mapping analysis */
1674 void plotlmpoint(FILE *ofp, double w1, double w2)
1676 fprintf(ofp,"%.10f tl %.10f tl dot\n",
1677 0.5*w1 + w2, w1*0.8660254038);
1680 /* last lines of EPSF likelihood mapping file */
1681 void finishps(FILE *ofp)
1683 fprintf(ofp, "stroke\n");
1684 fprintf(ofp, "%% second triangle (the one with 3 basins)\n");
1685 fprintf(ofp, "/secondtriangle {\n");
1686 fprintf(ofp, "newpath\n");
1687 fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1688 fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1689 fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1690 fprintf(ofp, "closepath\n");
1691 fprintf(ofp, "stroke\n");
1692 fprintf(ofp, "newpath\n");
1693 fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
1694 fprintf(ofp, " 0.50 tl 0.0000000000 tl lineto\n");
1695 fprintf(ofp, "stroke\n");
1696 fprintf(ofp, "newpath\n");
1697 fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
1698 fprintf(ofp, " 0.25 tl 0.4330127019 tl lineto\n");
1699 fprintf(ofp, "stroke\n");
1700 fprintf(ofp, "newpath\n");
1701 fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
1702 fprintf(ofp, " 0.75 tl 0.4330127019 tl lineto\n");
1703 fprintf(ofp, "stroke\n");
1704 fprintf(ofp, "0.44 tl 0.5 tl moveto %% up\n");
1705 fprintf(ofp, "(%.1f%%) show\n", (double) ar1*100.0/Numquartets);
1706 fprintf(ofp, "0.25 tl 0.15 tl moveto %% down left\n");
1707 fprintf(ofp, "(%.1f%%) show\n", (double) ar3*100.0/Numquartets);
1708 fprintf(ofp, "0.63 tl 0.15 tl moveto %% down right\n");
1709 fprintf(ofp, "(%.1f%%) show\n", (double) ar2*100.0/Numquartets);
1710 fprintf(ofp, "} def\n");
1711 fprintf(ofp, "%% third triangle (the one with 7 basins)\n");
1712 fprintf(ofp, "/thirdtriangle {\n");
1713 fprintf(ofp, "newpath\n");
1714 fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1715 fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1716 fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1717 fprintf(ofp, "closepath\n");
1718 fprintf(ofp, "stroke\n");
1719 fprintf(ofp, "newpath\n");
1720 fprintf(ofp, " 0.25 tl 0.1443375673 tl moveto\n");
1721 fprintf(ofp, " 0.75 tl 0.1443375673 tl lineto\n");
1722 fprintf(ofp, " 0.50 tl 0.5773502692 tl lineto\n");
1723 fprintf(ofp, "closepath\n");
1724 fprintf(ofp, "stroke\n");
1725 fprintf(ofp, "newpath\n");
1726 fprintf(ofp, " 0.125 tl 0.2165063509 tl moveto\n");
1727 fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n");
1728 fprintf(ofp, "stroke\n");
1729 fprintf(ofp, "newpath\n");
1730 fprintf(ofp, " 0.375 tl 0.6495190528 tl moveto\n");
1731 fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n");
1732 fprintf(ofp, "stroke\n");
1733 fprintf(ofp, "newpath\n");
1734 fprintf(ofp, " 0.625 tl 0.6495190528 tl moveto\n");
1735 fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n");
1736 fprintf(ofp, "stroke\n");
1737 fprintf(ofp, "newpath\n");
1738 fprintf(ofp, " 0.875 tl 0.2165063509 tl moveto\n");
1739 fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n");
1740 fprintf(ofp, "stroke\n");
1741 fprintf(ofp, "newpath\n");
1742 fprintf(ofp, " 0.750 tl 0.00 tl moveto\n");
1743 fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n");
1744 fprintf(ofp, "stroke\n");
1745 fprintf(ofp, "newpath\n");
1746 fprintf(ofp, " 0.250 tl 0.00 tl moveto\n");
1747 fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n");
1748 fprintf(ofp, "stroke\n");
1749 fprintf(ofp, "0.42 tl 0.66 tl moveto %% up\n");
1750 fprintf(ofp, "(%.1f%%) show\n", (double) reg1*100.0/Numquartets);
1751 fprintf(ofp, "0.07 tl 0.05 tl moveto %% down left\n");
1752 fprintf(ofp, "(%.1f%%) show\n", (double) reg3*100.0/Numquartets);
1753 fprintf(ofp, "0.77 tl 0.05 tl moveto %% down right\n");
1754 fprintf(ofp, "(%.1f%%) show\n", (double) reg2*100.0/Numquartets);
1755 fprintf(ofp, "0.43 tl 0.05 tl moveto %% down side\n");
1756 fprintf(ofp, "(%.1f%%) show\n", (double) reg5*100.0/Numquartets);
1757 fprintf(ofp, "0.43 tl 0.28 tl moveto %% center\n");
1758 fprintf(ofp, "(%.1f%%) show\n", (double) reg7*100.0/Numquartets);
1759 fprintf(ofp, "gsave\n");
1760 fprintf(ofp, "-60 rotate\n");
1761 fprintf(ofp, "-0.07 tl 0.77 tl moveto %% right side\n");
1762 fprintf(ofp, "(%.1f%%) show\n", (double) reg4*100.0/Numquartets);
1763 fprintf(ofp, "grestore\n");
1764 fprintf(ofp, "gsave\n");
1765 fprintf(ofp, "60 rotate\n");
1766 fprintf(ofp, "0.4 tl -0.09 tl moveto %% left side\n");
1767 fprintf(ofp, "(%.1f%%) show\n", (double) reg6*100.0/Numquartets);
1768 fprintf(ofp, "grestore\n");
1769 fprintf(ofp, "} def\n");
1770 fprintf(ofp, "%% print the other two triangles\n");
1771 fprintf(ofp, "-0.6 tl -1.2 tl 0.8660254038 mul translate\n");
1772 fprintf(ofp, "secondtriangle\n");
1773 fprintf(ofp, "1.2 tl 0 translate\n");
1774 fprintf(ofp, "thirdtriangle\n");
1775 if (numclust == 4) { /* four cluster analysis */
1776 fprintf(ofp, "%% label corners\n");
1777 fprintf(ofp, "0.375 tl 0.9 tl moveto\n");
1778 fprintf(ofp, "((a,b)-(c,d)) show %% CHANGE HERE IF NECESSARY\n");
1779 fprintf(ofp, "-0.16 tl -0.08 tl moveto\n");
1780 fprintf(ofp, "((a,d)-(b,c)) show %% CHANGE HERE IF NECESSARY\n");
1781 fprintf(ofp, "0.92 tl -0.08 tl moveto\n");
1782 fprintf(ofp, "((a,c)-(b,d)) show %% CHANGE HERE IF NECESSARY\n");
1784 if (numclust == 3) { /* three cluster analysis */
1785 fprintf(ofp, "%% label corners\n");
1786 fprintf(ofp, "0.375 tl 0.9 tl moveto\n");
1787 fprintf(ofp, "((a,b)-(c,c)) show %% CHANGE HERE IF NECESSARY\n");
1788 fprintf(ofp, "-0.16 tl -0.08 tl moveto\n");
1789 fprintf(ofp, "((a,c)-(b,c)) show %% CHANGE HERE IF NECESSARY\n");
1790 fprintf(ofp, "0.92 tl -0.08 tl moveto\n");
1791 fprintf(ofp, "((a,c)-(b,c)) show %% CHANGE HERE IF NECESSARY\n");
1793 if (numclust == 2) { /* two cluster analysis */
1794 fprintf(ofp, "%% label corners\n");
1795 fprintf(ofp, "0.375 tl 0.9 tl moveto\n");
1796 fprintf(ofp, "((a,a)-(b,b)) show %% CHANGE HERE IF NECESSARY\n");
1797 fprintf(ofp, "-0.16 tl -0.08 tl moveto\n");
1798 fprintf(ofp, "((a,b)-(a,b)) show %% CHANGE HERE IF NECESSARY\n");
1799 fprintf(ofp, "0.92 tl -0.08 tl moveto\n");
1800 fprintf(ofp, "((a,b)-(a,b)) show %% CHANGE HERE IF NECESSARY\n");
1802 fprintf(ofp, "showpage\n");
1803 fprintf(ofp, "%%%%EOF\n");
1806 /* computes LM point from the three log-likelihood values,
1807 plots the point, and does some statistics */
1808 void makelmpoint(FILE *fp, double b1, double b2, double b3)
1810 double w1, w2, w3, temp;
1811 unsigned char qpbranching;
1812 double temp1, temp2, temp3, onethird;
1813 unsigned char discreteweight[3], treebits[3];
1816 treebits[0] = (unsigned char) 1;
1817 treebits[1] = (unsigned char) 2;
1818 treebits[2] = (unsigned char) 4;
1820 /* sort in descending order */
1824 sort3doubles(qweight, qworder);
1826 /* compute Bayesian weights */
1827 qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
1828 qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
1829 qweight[qworder[0]] = 1.0;
1830 temp = qweight[0] + qweight[1] + qweight[2];
1831 qweight[0] = qweight[0]/temp;
1832 qweight[1] = qweight[1]/temp;
1833 qweight[2] = qweight[2]/temp;
1835 /* plot one point in likelihood mapping triangle */
1839 plotlmpoint(fp, w1, w2);
1841 /* check areas 1,2,3 */
1842 if (treebits[qworder[0]] == 1) ar1++;
1843 else if (treebits[qworder[0]] == 2) ar2++;
1846 /* check out regions 1,2,3,4,5,6,7 */
1848 /* 100 distribution */
1849 temp1 = 1.0 - qweight[qworder[0]];
1850 sqdiff[0] = temp1*temp1 +
1851 qweight[qworder[1]]*qweight[qworder[1]] +
1852 qweight[qworder[2]]*qweight[qworder[2]];
1853 discreteweight[0] = treebits[qworder[0]];
1855 /* 110 distribution */
1856 temp1 = 0.5 - qweight[qworder[0]];
1857 temp2 = 0.5 - qweight[qworder[1]];
1858 sqdiff[1] = temp1*temp1 + temp2*temp2 +
1859 qweight[qworder[2]]*qweight[qworder[2]];
1860 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
1862 /* 111 distribution */
1863 temp1 = onethird - qweight[qworder[0]];
1864 temp2 = onethird - qweight[qworder[1]];
1865 temp3 = onethird - qweight[qworder[2]];
1866 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
1867 discreteweight[2] = (unsigned char) 7;
1869 /* sort in descending order */
1870 sort3doubles(sqdiff, sqorder);
1872 qpbranching = (unsigned char) discreteweight[sqorder[2]];
1874 if (qpbranching == 1) {
1876 if (w2 < w3) reg1l++;
1879 if (qpbranching == 2) {
1881 if (w1 < w3) reg2d++;
1884 if (qpbranching == 4) {
1886 if (w1 < w2) reg3d++;
1889 if (qpbranching == 3) {
1891 if (w1 < w2) reg4d++;
1894 if (qpbranching == 6) {
1896 if (w2 < w3) reg5l++;
1899 if (qpbranching == 5) {
1901 if (w1 < w3) reg6d++;
1904 if (qpbranching == 7) reg7++;
1907 /* print tree statistics */
1908 void printtreestats(FILE *ofp)
1911 double bestlkl, difflkl, difflklps, temp, sum;
1913 /* find best tree */
1916 for (i = 1; i < numutrees; i++)
1917 if (ulkl[i] > bestlkl) {
1922 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n");
1923 fprintf(ofp, "Tree log L difference S.E. Significantly worse\n");
1924 fprintf(ofp, "--------------------------------------------------------\n");
1925 for (i = 0; i < numutrees; i++) {
1926 difflkl = ulkl[besttree]-ulkl[i];
1927 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl);
1928 if (i == besttree) {
1929 fprintf(ofp, " <----------------- best tree");
1931 /* compute variance of Log L differences over sites */
1932 difflklps = difflkl/(double)Maxsite;
1934 for (j = 0; j < Numptrn; j++) {
1935 temp = allsites[besttree][j] - allsites[i][j] - difflklps;
1936 sum += temp*temp*Weight[j];
1938 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1939 fprintf(ofp, "%11.2f ", sum);
1940 if (difflkl > 1.96*sum)
1941 fprintf(ofp, "yes");
1947 fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1951 /* find best tree */
1954 for (i = 1; i < numutrees; i++)
1955 if (ulklc[i] > bestlkl) {
1960 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n");
1961 fprintf(ofp, "Tree log L difference S.E. Significantly worse\n");
1962 fprintf(ofp, "--------------------------------------------------------\n");
1963 for (i = 0; i < numutrees; i++) {
1964 difflkl = ulklc[besttree]-ulklc[i];
1965 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl);
1966 if (i == besttree) {
1967 fprintf(ofp, " <----------------- best tree");
1969 /* compute variance of Log L differences over sites */
1970 difflklps = difflkl/(double)Maxsite;
1972 for (j = 0; j < Numptrn; j++) {
1973 temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps;
1974 sum += temp*temp*Weight[j];
1976 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1977 fprintf(ofp, "%11.2f ", sum);
1978 if (difflkl > 1.96*sum)
1979 fprintf(ofp, "yes");
1985 fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1990 void timestamp(FILE* ofp)
1994 timespan = difftime(Stoptime, Starttime);
1995 cpuspan = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC);
1996 fprintf(ofp, "\n\nTIME STAMP\n\n");
1997 fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) );
1998 fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
1999 timespan, timespan/60., timespan/3600.);
2000 fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
2001 fulltime, fulltime/60., fulltime/3600.);
2003 fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n",
2004 fullcpu, fullcpu/60., fullcpu/3600.);
2005 #endif /* TIMEDEBUG */
2009 /* extern int bestrfound; */
2011 /* write output file */
2012 void writeoutputfile(FILE *ofp, int part)
2018 if ((part == WRITEPARAMS) || (part == WRITEALL)) {
2020 fprintf(ofp, "TREE-PUZZLE %s\n\n", VERSION);
2022 fprintf(ofp, "TREE-PUZZLE %s%s\n\n", VERSION, ALPHA);
2025 fprintf(ofp, "Input file name: %s\n",INFILE);
2026 if (puzzlemode == USERTREE) fprintf(ofp, "User tree file name: %s\n",INTREE);
2029 fprintf(ofp, "Type of analysis: ");
2030 if (typ_optn == TREERECON_OPTN) fprintf(ofp, "tree reconstruction\n");
2031 if (typ_optn == LIKMAPING_OPTN) fprintf(ofp, "likelihood mapping\n");
2032 fprintf(ofp, "Parameter estimation: ");
2033 if (approxp_optn) fprintf(ofp, "approximate (faster)\n");
2034 else fprintf(ofp, "accurate (slow)\n");
2035 if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) {
2036 fprintf(ofp, "Parameter estimation uses: ");
2038 fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n");
2040 fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n");
2042 fprintf(ofp, "Parameter estimation uses: ");
2044 fprintf(ofp, "1st user tree (for substitution process and rate variation)\n");
2045 else if (qcalg_optn)
2046 fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n");
2048 fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n");
2050 fprintf(ofp, "\nStandard errors (S.E.) are obtained by the curvature method.\n");
2051 fprintf(ofp, "The upper and lower bounds of an approximate 95%% confidence interval\n");
2052 fprintf(ofp, "for parameter or branch length x are x-1.96*S.E. and x+1.96*S.E.\n");
2053 fprintf(ofp, "\n\n");
2054 fprintf(ofp, "SEQUENCE ALIGNMENT\n\n");
2055 fprintf(ofp, "Input data: %d sequences with %d ", Maxspc, Maxsite);
2056 if (data_optn == AMINOACID)
2057 fprintf(ofp, "amino acid");
2058 else if (data_optn == NUCLEOTIDE && SH_optn)
2059 fprintf(ofp, "doublet (%d nucleotide)", Maxsite*2);
2060 else if (data_optn == NUCLEOTIDE && nuc_optn)
2061 fprintf(ofp, "nucleotide");
2062 else if (data_optn == BINARY)
2063 fprintf(ofp, "binary state");
2064 fprintf(ofp, " sites");
2065 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
2066 if (codon_optn == 1) fprintf(ofp, " (1st codon positions)");
2067 if (codon_optn == 2) fprintf(ofp, " (2nd codon positions)");
2068 if (codon_optn == 3) fprintf(ofp, " (3rd codon positions)");
2069 if (codon_optn == 4) fprintf(ofp, " (1st and 2nd codon positions)");
2071 if (data_optn == NUCLEOTIDE && SH_optn) {
2073 fprintf(ofp, " (1st and 2nd codon positions)");
2075 fprintf(ofp, " (1st+2nd, 3rd+4th, etc. site)");
2078 fprintf(ofp, "Number of constant sites: %d (= %.1f%% of all sites)\n",
2079 Numconst, 100.0*fracconst);
2080 fprintf(ofp, "Number of site patterns: %d\n",
2082 fprintf(ofp, "Number of constant site patterns: %d (= %.1f%% of all site patterns)\n\n\n",
2083 Numconstpat, 100.0*fracconstpat);
2084 fprintf(ofp, "SUBSTITUTION PROCESS\n\n");
2085 fprintf(ofp, "Model of substitution: ");
2086 if (data_optn == NUCLEOTIDE) { /* nucleotides */
2088 if(HKY_optn) fprintf(ofp, "HKY (Hasegawa et al. 1985)\n");
2089 else fprintf(ofp, "TN (Tamura-Nei 1993)\n");
2090 fprintf(ofp, "Transition/transversion parameter");
2092 fprintf(ofp, " (estimated from data set)");
2093 fprintf(ofp, ": %.2f", TSparam);
2095 fprintf(ofp, " (S.E. %.2f)", tserr);
2098 if (optim_optn && TSparam > MAXTS - 1.0)
2099 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2100 if (optim_optn && TSparam < MINTS + 0.1)
2101 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
2104 fprintf(ofp, "Y/R transition parameter");
2106 fprintf(ofp, " (estimated from data set)");
2107 fprintf(ofp, ": %.2f", YRparam);
2109 fprintf(ofp, " (S.E. %.2f)", yrerr);
2112 if (optim_optn && YRparam > MAXYR - 0.5)
2113 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2114 if (optim_optn && YRparam < MINYR + 0.1)
2115 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
2120 fprintf(ofp, "SH (Schoeniger-von Haeseler 1994)\n");
2121 fprintf(ofp, "Transition/transversion parameter");
2122 if (optim_optn) fprintf(ofp, " (estimated from data set)");
2123 fprintf(ofp, ": %.2f\n", TSparam);
2125 fprintf(ofp, " (S.E. %.2f)", tserr);
2128 if (optim_optn && TSparam > MAXTS - 1.0)
2129 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2130 if (optim_optn && TSparam < MINTS + 0.1)
2131 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
2135 if (data_optn == AMINOACID) { /* amino acids */
2136 if (Dayhf_optn) fprintf(ofp, "Dayhoff (Dayhoff et al. 1978)\n");
2137 if (Jtt_optn) fprintf(ofp, "JTT (Jones et al. 1992)\n");
2138 if (mtrev_optn) fprintf(ofp, "mtREV24 (Adachi-Hasegawa 1996)\n");
2139 if (cprev_optn) fprintf(ofp, "cpREV45 (Adachi et al. 2000)\n");
2140 if (blosum62_optn) fprintf(ofp, "BLOSUM 62 (Henikoff-Henikoff 1992)\n");
2141 if (vtmv_optn) fprintf(ofp, "VT (Mueller-Vingron 2000)\n");
2142 if (wag_optn) fprintf(ofp, "WAG (Whelan-Goldman 2000)\n");
2144 if (data_optn == BINARY) { /* binary states */
2145 fprintf(ofp, "Two-state model (Felsenstein 1981)\n");
2147 if (data_optn == AMINOACID)
2148 fprintf(ofp, "Amino acid ");
2149 else if (data_optn == NUCLEOTIDE && SH_optn)
2150 fprintf(ofp, "Doublet ");
2151 else if (data_optn == NUCLEOTIDE && nuc_optn)
2152 fprintf(ofp, "Nucleotide ");
2153 else if (data_optn == BINARY)
2154 fprintf(ofp, "Binary state ");
2155 fprintf(ofp, "frequencies (");
2156 if (Frequ_optn) fprintf(ofp, "estimated from data set");
2157 else fprintf(ofp, "user specified");
2158 if (data_optn == NUCLEOTIDE && SH_optn && sym_optn)
2159 fprintf(ofp, " and symmetrized");
2160 fprintf(ofp, "):\n\n");
2161 for (i = 0; i < gettpmradix(); i++)
2162 fprintf(ofp, " pi(%s) = %5.1f%%\n",
2163 int2code(i), Freqtpm[i]*100);
2164 if (data_optn == NUCLEOTIDE) {
2165 fprintf(ofp, "\nExpected transition/transversion ratio: %.2f",
2167 if (tstvf84 == 0.0) fprintf(ofp, "\n");
2168 else fprintf(ofp, " (= F84 parameter)\n");
2169 fprintf(ofp, "Expected pyrimidine transition/purine transition");
2170 fprintf(ofp, " ratio: %.2f\n", yrtsratio);
2171 if (tstvf84 != 0.0 && TN_optn)
2173 "This TN model is equivalent to a F84 model (Felsenstein 1984).\n");
2175 fprintf(ofp, "\n\nRATE HETEROGENEITY\n\n");
2176 fprintf(ofp, "Model of rate heterogeneity: ");
2177 if (rhetmode == UNIFORMRATE) fprintf(ofp, "uniform rate\n");
2178 if (rhetmode == GAMMARATE ) fprintf(ofp, "Gamma distributed rates\n");
2179 if (rhetmode == TWORATE ) fprintf(ofp, "two rates (1 invariable + 1 variable)\n");
2180 if (rhetmode == MIXEDRATE ) fprintf(ofp, "mixed (1 invariable + %d Gamma rates)\n", numcats);
2181 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
2182 fprintf(ofp, "Fraction of invariable sites");
2183 if (fracinv_optim) fprintf(ofp, " (estimated from data set)");
2184 fprintf(ofp, ": %.2f", fracinv);
2185 if (fracinv_optim) fprintf(ofp, " (S.E. %.2f)", fierr);
2188 if (fracinv_optim && fracinv > MAXFI - 0.05)
2189 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2191 fprintf(ofp, "Number of invariable sites: %.0f\n", floor(fracinv*Maxsite));
2193 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
2194 fprintf(ofp, "Gamma distribution parameter alpha");
2195 if (grate_optim) fprintf(ofp, " (estimated from data set)");
2196 fprintf(ofp, ": %.2f", (1.0-Geta)/Geta);
2197 if (grate_optim) fprintf(ofp, " (S.E. %.2f)",
2198 geerr/(Geta*Geta)); /* first order approximation */
2201 if (grate_optim && Geta > MAXGE - 0.02)
2202 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2203 if (grate_optim && Geta < MINGE + 0.01)
2204 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");
2206 fprintf(ofp, "Number of Gamma rate categories: %d\n", numcats);
2208 if (rhetmode == MIXEDRATE) {
2209 fprintf(ofp, "Total rate heterogeneity (invariable sites + Gamma model): ");
2210 fprintf(ofp, "%.2f", fracinv + Geta - fracinv*Geta);
2211 if (grate_optim && fracinv_optim)
2212 fprintf(ofp, " (S.E. %.2f)", geerr + fierr); /* first order approximation */
2213 else if (grate_optim && !fracinv_optim)
2214 fprintf(ofp, " (S.E. %.2f)", geerr);
2215 else if (!grate_optim && fracinv_optim)
2216 fprintf(ofp, " (S.E. %.2f)", fierr);
2219 if (rhetmode != UNIFORMRATE) {
2220 fprintf(ofp, "\nRates and their respective probabilities used in the likelihood function:\n");
2221 fprintf(ofp, "\n Category Relative rate Probability\n");
2222 if (rhetmode == TWORATE || rhetmode == MIXEDRATE)
2223 fprintf(ofp, " 0 0.0000 %.4f\n", fracinv);
2224 for (i = 0; i < numcats; i++)
2225 fprintf(ofp, " %d %.4f %.4f\n",
2226 i+1, Rates[i], (1.0-fracinv)/(double) numcats);
2228 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
2229 fprintf(ofp, "\nCategories 1-%d approximate a continous ", numcats);
2230 fprintf(ofp, "Gamma-distribution with expectation 1\n");
2231 fprintf(ofp, "and variance ");
2232 if (Geta == 1.0) fprintf(ofp, "infinity");
2233 else fprintf(ofp, "%.2f", Geta/(1.0-Geta));
2234 fprintf(ofp, ".\n");
2237 if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE))
2238 if (rhetmode != UNIFORMRATE) {
2239 fprintf(ofp, "\nCombination of categories that contributes");
2240 fprintf(ofp, " the most to the likelihood\n");
2241 fprintf(ofp, "(computation done without clock assumption assuming ");
2242 if (puzzlemode == QUARTPUZ) fprintf(ofp, "quartet-puzzling tree");
2243 if (puzzlemode == USERTREE) {
2244 if (utree_optn) fprintf(ofp, "1st user tree");
2245 else fprintf(ofp, "NJ tree");
2247 fprintf(ofp, "):\n\n");
2248 if (bestratefound==0) findbestratecombination();
2249 printbestratecombination(ofp);
2252 fprintf(ofp, "\n\nSEQUENCE COMPOSITION (SEQUENCES IN INPUT ORDER)\n\n");
2254 fprintf(ofp, " 5%% chi-square test p-value\n");
2255 for (i = 0; i < Maxspc; i++) {
2258 pval = homogentest(i);
2259 if ( pval < 0.05 ) fprintf(ofp, " failed ");
2260 else fprintf(ofp, " passed ");
2261 if (chi2fail) fail = TRUE;
2262 fprintf(ofp, " %6.2f%% ", pval*100.0);
2266 fprintf(ofp, "The chi-square tests compares the ");
2267 if (data_optn == AMINOACID)
2268 fprintf(ofp, "amino acid");
2269 else if (data_optn == NUCLEOTIDE && SH_optn)
2270 fprintf(ofp, "doublet");
2271 else if (data_optn == NUCLEOTIDE && nuc_optn)
2272 fprintf(ofp, "nucleotide");
2273 else if (data_optn == BINARY)
2274 fprintf(ofp, "binary state");
2275 fprintf(ofp," composition of each sequence\n");
2276 fprintf(ofp, "to the frequency distribution assumed in the maximum likelihood model.\n");
2278 fprintf(ofp, "\nWARNING: Result of chi-square test may not be valid");
2279 fprintf(ofp, " because of small\nmaximum likelihood frequencies and");
2280 fprintf(ofp, " short sequence length!\n");
2282 fprintf(ofp, "\n\nIDENTICAL SEQUENCES\n\n");
2283 fprintf(ofp, "The sequences in each of the following groups are all identical. To speed\n");
2284 fprintf(ofp, "up computation please remove all but one of each group from the data set.\n\n");
2285 findidenticals(ofp);
2286 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD DISTANCES\n\n");
2287 fprintf(ofp, "Maximum likelihood distances are computed using the ");
2288 fprintf(ofp, "selected model of\nsubstitution and rate heterogeneity.\n\n");
2290 fprintf(ofp, "\nAverage distance (over all possible pairs of sequences): %.5f\n",
2291 averagedist() / 100.0);
2294 } /* if WRITEPARAMS) || WRITEALL */
2296 if ((part == WRITEREST) || (part == WRITEALL)) {
2298 if (puzzlemode == QUARTPUZ &&typ_optn == TREERECON_OPTN) {
2299 fprintf(ofp, "\n\nBAD QUARTET STATISTICS (SEQUENCES IN INPUT ORDER)\n\n");
2300 for (i = 0; i < Maxspc; i++) {
2304 fprintf(ofp, " [%lu] %6.2f%%\n", badtaxon[i], (double) (100 * badtaxon[i]) / (double) badqs);
2306 fprintf(ofp, " [%lu]\n", badtaxon[i]);
2308 fprintf(ofp, "\nThe number in square brackets indicates how often each sequence is\n");
2309 fprintf(ofp, "involved in one of the %lu completely unresolved quartets of the\n", badqs);
2310 fprintf(ofp, "quartet puzzling tree search.\n");
2312 fprintf(ofp, "Additionally the according percentages are given.\n");
2315 if (typ_optn == TREERECON_OPTN) {
2317 fprintf(ofp, "\n\nTREE SEARCH\n\n");
2318 if (puzzlemode == QUARTPUZ) {
2319 fprintf(ofp, "Quartet puzzling is used to choose from the possible tree topologies\n");
2320 fprintf(ofp, "and to simultaneously infer support values for internal branches.\n\n");
2321 fprintf(ofp, "Number of puzzling steps: %lu\n", Numtrial);
2322 fprintf(ofp, "Analysed quartets: %lu\n", Numquartets);
2323 fprintf(ofp, "Unresolved quartets: %lu (= %.1f%%)\n",
2324 badqs, (double) badqs / (double) Numquartets * 100);
2325 fprintf(ofp, "\nQuartet trees are based on %s maximum likelihood values\n",
2326 (approxqp ? "approximate" : "exact"));
2327 fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n");
2329 if (puzzlemode == USERTREE) {
2330 fprintf(ofp, "%d tree topologies were specified by the user.\n", numutrees);
2332 if (puzzlemode == PAIRDIST) {
2333 fprintf(ofp, "No tree search performed (maximum likelihood distances only).\n");
2336 if (puzzlemode == QUARTPUZ) {
2337 fprintf(ofp, "QUARTET PUZZLING TREE\n\n");
2338 fprintf(ofp, "Support for the internal branches of the unrooted quartet puzzling\n");
2339 fprintf(ofp, "tree topology is shown in percent.\n");
2340 if (consincluded == Maxspc - 3)
2341 fprintf(ofp,"\nThis quartet puzzling tree is completely resolved.\n");
2343 fprintf(ofp,"\nThis quartet puzzling tree is not completely resolved!\n");
2344 fprintf(ofp, "\n\n");
2345 plotconsensustree(ofp);
2346 fprintf(ofp, "\n\nQuartet puzzling tree (in CLUSTAL W notation):\n\n");
2347 writeconsensustree(ofp);
2348 fprintf(ofp, "\n\nBIPARTITIONS\n\n");
2349 fprintf(ofp, "The following bipartitions occured at least once");
2350 fprintf(ofp, " in all intermediate\ntrees that have been generated ");
2351 fprintf(ofp, "in the %lu puzzling steps:\n\n", Numtrial);
2352 fprintf(ofp, "Bipartitions included in the quartet puzzling tree:\n");
2354 "(bipartition with sequences in input order : number of times seen)\n\n");
2355 for (li = 0; li < consincluded; li++) {
2357 printsplit(ofp, splitfreqs[2*li+1]);
2358 fprintf(ofp, " : %lu\n", splitfreqs[2*li]);
2360 if (consincluded == 0) fprintf(ofp, " None (no bipartition included)\n");
2361 fprintf(ofp, "\nBipartitions not included in the quartet puzzling tree:\n");
2363 "(bipartition with sequences in input order : number of times seen)\n\n");
2365 if (consincluded == numbiparts) {
2366 fprintf(ofp, " None (all bipartitions are included)\n");
2368 /* print first 20 bipartions not included */
2369 for (li = consincluded; (li < numbiparts) && (li < consincluded + 20UL); li++) {
2371 printsplit(ofp, splitfreqs[2*li+1]);
2372 fprintf(ofp, " : %lu\n", splitfreqs[2*li]);
2374 if ((li == consincluded + 20UL) && (li != numbiparts))
2375 fprintf(ofp, "\n(%lu other less frequent bipartitions not shown)\n",
2376 numbiparts - consincluded - 20UL);
2378 fprintfsortedpstrees(ofp, psteptreelist, psteptreenum, psteptreesum, 0, 5.0);
2381 if (puzzlemode == QUARTPUZ) {
2382 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS ON QUARTET");
2383 fprintf(ofp, " PUZZLING TREE (NO CLOCK)\n\nBranch lengths are computed using");
2384 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
2385 clockmode = 0; /* nonclocklike branch lengths */
2389 fprintf(ofp, "\n\nQuartet puzzling tree with maximum likelihood branch lengths");
2390 fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
2393 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF QUARTET");
2394 fprintf(ofp, " PUZZLING TREE (WITH CLOCK)\n\nBranch lengths are computed using");
2395 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n");
2396 fprintf(ofp, "\nRoot located at branch: %d ", locroot+1);
2397 if (rootsearch == 0) fprintf(ofp, "(user specified)\n\n\n");
2398 if (rootsearch == 1) {
2399 fprintf(ofp, "(automatic search)");
2400 if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot);
2401 fprintf(ofp, "\n\n");
2402 fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n");
2403 fprintf(ofp, "(rename \"outtree\" to \"intree\") and select location of root manually!");
2404 fprintf(ofp, "\n\n\n");
2406 if (rootsearch == 2) fprintf(ofp, "(displayed outgroup)\n\n\n");
2407 clockmode = 1; /* clocklike branch lengths */
2410 fprintf(ofp, "\nTree drawn as unrooted tree for better ");
2411 fprintf(ofp, "comparison with non-clock tree!\n");
2415 fprintf(ofp, "\n\nRooted quartet puzzling tree with clocklike");
2416 fprintf(ofp, " maximum likelihood branch lengths\n");
2417 fprintf(ofp, "(in CLUSTAL W notation):\n\n");
2418 fputrooted(ofp, locroot);
2422 fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST\n\n");
2423 fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n",
2424 Ctree->lklhd, Numspc + Numibrnch);
2425 fprintf(ofp, "log L with clock: %.2f (independent branch parameters: %d)\n\n",
2426 Ctree->lklhdc, Numhts + 1);
2427 delta = 2.0*((Ctree->lklhd) - (Ctree->lklhdc));
2428 fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta);
2429 df = Numspc + Numibrnch - Numhts - 1;
2430 fprintf(ofp, "Degress of freedom of chi-square distribution: %d\n", df);
2432 pval = IncompleteGammaQ(df*0.5, delta*0.5);
2434 fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0);
2436 fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n");
2437 fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n");
2438 fprintf(ofp, "is not significantly increased.\n");
2440 fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n");
2441 fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n");
2442 fprintf(ofp, "significantly increased.\n");
2444 fprintf(ofp, "\nPlease take care that the correct root is used!\n");
2450 if (typ_optn == LIKMAPING_OPTN) {
2452 fprintf(ofp, "\n\nLIKELIHOOD MAPPING ANALYSIS\n\n");
2453 fprintf(ofp, "Number of quartets: %lu", Numquartets);
2454 if (lmqts == 0) fprintf(ofp, " (all possible)\n");
2455 else fprintf(ofp, " (random choice)\n");
2456 fprintf(ofp, "\nQuartet trees are based on approximate maximum likelihood values\n");
2457 fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n");
2458 if (numclust == 1) {
2459 fprintf(ofp, "Sequences are not grouped in clusters.\n");
2461 fprintf(ofp, "Sequences are grouped in %d clusters.\n", numclust);
2462 fprintf(ofp, "\nCluster a: %d sequences\n\n", clustA);
2463 for (i = 0; i < clustA; i++) {
2465 fputid(ofp, clusterA[i]);
2468 fprintf(ofp, "\nCluster b: %d sequences\n\n", clustB);
2469 for (i = 0; i < clustB; i++) {
2471 fputid(ofp, clusterB[i]);
2475 fprintf(ofp, "\nCluster c: %d sequences\n\n", clustC);
2476 for (i = 0; i < clustC; i++) {
2478 fputid(ofp, clusterC[i]);
2482 if (numclust == 4) {
2483 fprintf(ofp, "\nCluster d: %d sequences\n\n", clustD);
2484 for (i = 0; i < clustD; i++) {
2486 fputid(ofp, clusterD[i]);
2490 fprintf(ofp, "\nQuartets of sequences used in the likelihood");
2491 fprintf(ofp, " mapping analysis are generated\n");
2493 fprintf(ofp, "by drawing two sequences from cluster a and two from cluster b.");
2495 fprintf(ofp, "by drawing one sequence from clusters a and b and two from cluster c.");
2497 fprintf(ofp, "by drawing one sequence from each of the clusters a, b, c, and d.");
2500 fprintf(ofp, "\n\nLIKELIHOOD MAPPING STATISTICS\n\n");
2501 fprintf(ofp, "Occupancies of the three areas 1, 2, 3:\n\n");
2503 fprintf(ofp, " (a,b)-(c,d)\n");
2505 fprintf(ofp, " (a,b)-(c,c)\n");
2507 fprintf(ofp, " (a,a)-(b,b)\n");
2508 fprintf(ofp, " /\\\n");
2509 fprintf(ofp, " / \\\n");
2510 fprintf(ofp, " / \\\n");
2511 fprintf(ofp, " / 1 \\\n");
2512 fprintf(ofp, " / \\ / \\\n");
2513 fprintf(ofp, " / \\ / \\\n");
2514 fprintf(ofp, " / \\/ \\\n");
2515 fprintf(ofp, " / 3 : 2 \\\n");
2516 fprintf(ofp, " / : \\\n");
2517 fprintf(ofp, " /__________________\\\n");
2519 fprintf(ofp, " (a,d)-(b,c) (a,c)-(b,d)\n");
2521 fprintf(ofp, " (a,c)-(b,c) (a,c)-(b,c)\n");
2523 fprintf(ofp, " (a,b)-(a,b) (a,b)-(a,b)\n");
2525 fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%)\n",
2526 ar1, (double) ar1*100.0/Numquartets);
2527 fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%)\n",
2528 ar2, (double) ar2*100.0/Numquartets);
2529 fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%)\n\n",
2530 ar3, (double) ar3*100.0/Numquartets);
2531 fprintf(ofp, "Occupancies of the seven areas 1, 2, 3, 4, 5, 6, 7:\n\n");
2533 fprintf(ofp, " (a,b)-(c,d)\n");
2535 fprintf(ofp, " (a,b)-(c,c)\n");
2537 fprintf(ofp, " (a,a)-(b,b)\n");
2538 fprintf(ofp, " /\\\n");
2539 fprintf(ofp, " / \\\n");
2540 fprintf(ofp, " / 1 \\\n");
2541 fprintf(ofp, " / \\ / \\\n");
2542 fprintf(ofp, " / /\\ \\\n");
2543 fprintf(ofp, " / 6 / \\ 4 \\\n");
2544 fprintf(ofp, " / / 7 \\ \\\n");
2545 fprintf(ofp, " / \\ /______\\ / \\\n");
2546 fprintf(ofp, " / 3 : 5 : 2 \\\n");
2547 fprintf(ofp, " /__________________\\\n");
2549 fprintf(ofp, " (a,d)-(b,c) (a,c)-(b,d)\n");
2551 fprintf(ofp, " (a,c)-(b,c) (a,c)-(b,c)\n");
2553 fprintf(ofp, " (a,b)-(a,b) (a,b)-(a,b)\n");
2555 fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%) left: %lu right: %lu\n",
2556 reg1, (double) reg1*100.0/Numquartets, reg1l, reg1r);
2557 fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%) bottom: %lu top: %lu\n",
2558 reg2, (double) reg2*100.0/Numquartets, reg2d, reg2u);
2559 fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%) bottom: %lu top: %lu\n",
2560 reg3, (double) reg3*100.0/Numquartets, reg3d, reg3u);
2561 fprintf(ofp, "Number of quartets in region 4: %lu (= %.1f%%) bottom: %lu top: %lu\n",
2562 reg4, (double) reg4*100.0/Numquartets, reg4d, reg4u);
2563 fprintf(ofp, "Number of quartets in region 5: %lu (= %.1f%%) left: %lu right: %lu\n",
2564 reg5, (double) reg5*100.0/Numquartets, reg5l, reg5r);
2565 fprintf(ofp, "Number of quartets in region 6: %lu (= %.1f%%) bottom: %lu top: %lu\n",
2566 reg6, (double) reg6*100.0/Numquartets, reg6d, reg6u);
2567 fprintf(ofp, "Number of quartets in region 7: %lu (= %.1f%%)\n",
2568 reg7, (double) reg7*100.0/Numquartets);
2571 } /* if WRITEREST) || WRITEALL */
2576 void writetimesstat(FILE *ofp)
2579 double cpusum = 0.0;
2580 double wallmax = 0.0;
2581 cputimes[0] = ((double)(cputimestop - cputimestart) / CLOCKS_PER_SEC);
2582 walltimes[0] = difftime(walltimestop, walltimestart);
2583 fullcpu = tarr.fullcpu;
2584 fulltime = tarr.fulltime;
2585 fullcputimes[0] = tarr.fullcpu;
2586 fullwalltimes[0] = tarr.fulltime;
2587 altcputimes[0] = tarr.cpu;
2588 altwalltimes[0] = tarr.time;
2589 fprintf(ofp, "\n\n\nPARALLEL LOAD STATISTICS\n\n");
2591 fprintf(ofp, "The analysis was performed with %d parallel processes (1 master and \n", PP_NumProcs);
2592 fprintf(ofp, "%d worker processes).\n\n", PP_NumProcs-1);
2593 fprintf(ofp, "The following table the distribution of computation to the processes.\n");
2594 fprintf(ofp, "The first column gives the process number, where 0 is the master process.\n");
2595 fprintf(ofp, "The second and third column show the number of quartets computed (3 topologies \n");
2596 fprintf(ofp, "each) and the the number of scheduling blocks the came in. The last two columns \n");
2597 fprintf(ofp, "state the number of puzzling steps done by a process and number of scheduling \n");
2598 fprintf(ofp, "blocks.\n\n");
2599 fprintf(ofp, "process #quartets #chunks #puzzlings #chunks \n");
2600 fprintf(ofp, "-----------------------------------------------\n");
2601 for (n=0; n<PP_NumProcs; n++) {
2602 fprintf(ofp, "%6d %9d %7d %10d %7d \n", n,
2603 quartsent[n], quartsentn[n],
2604 splitsent[n], splitsentn[n]);
2606 fprintf(ofp, "-----------------------------------------------\n");
2607 fprintf(ofp, " Sums: %9d %7d %10d %7d \n",
2608 PP_quartrecved, PP_quartrecvedn,
2609 PP_splitrecved, PP_splitrecvedn);
2612 fprintf(ofp, "\n\nBelow the distribution of computing times (CPU and wallclock) per host is shown.\n");
2613 fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n");
2614 fprintf(ofp, "sum of CPU times and the maximum wallclock time is shown.\n\n");
2615 fprintf(ofp, "process CPU-time[s] [min] [hours] | wallclock[s] [min] [hours] \n");
2616 fprintf(ofp, "----------------------------------------------------------------------------\n");
2617 for (n=0; n<PP_NumProcs; n++) {
2620 fprintf(ofp, "%6d %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f\n", n,
2621 cputimes[n], cputimes[n] /60, cputimes[n] /3600,
2622 walltimes[n], walltimes[n]/60, walltimes[n]/3600);
2623 # endif /* TIMEDEBUG */
2625 fprintf(ofp, "%6d %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f\n", n,
2626 fullcputimes[n], fullcputimes[n] /60, fullcputimes[n] /3600,
2627 fullwalltimes[n], fullwalltimes[n]/60, fullwalltimes[n]/3600);
2630 fprintf(ofp, "%6d %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f alt\n", n,
2631 altcputimes[n], altcputimes[n] /60, altcputimes[n] /3600,
2632 altwalltimes[n], altwalltimes[n]/60, altwalltimes[n]/3600);
2633 # endif /* TIMEDEBUG */
2635 if (fullwalltimes[n] > wallmax) wallmax=fullwalltimes[n];
2636 cpusum += fullcputimes[n];
2638 fprintf(ofp, "----------------------------------------------------------------------------\n");
2639 fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f | %11.1f %9.1f %9.1f \n",
2640 cpusum, cpusum/60, cpusum/3600, wallmax, wallmax/60, wallmax/3600);
2641 #else /* TIMEDEBUG */
2642 fprintf(ofp, "\n\nBelow the distribution of computing times (wallclock) per host is shown.\n");
2643 fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n");
2644 fprintf(ofp, "the maximum wallclock times is shown.\n\n");
2645 fprintf(ofp, "process wallclock[s] [min] [hours] \n");
2646 fprintf(ofp, "----------------------------------------------------------------------------\n");
2647 for (n=0; n<PP_NumProcs; n++) {
2650 fprintf(ofp, "%6d %11.1f %9.1f %9.1f\n", n,
2651 walltimes[n], walltimes[n]/60, walltimes[n]/3600);
2652 # endif /* TIMEDEBUG */
2654 fprintf(ofp, "%6d %11.1f %9.1f %9.1f\n", n,
2655 fullwalltimes[n], fullwalltimes[n]/60, fullwalltimes[n]/3600);
2658 fprintf(ofp, "%6d %11.1f %9.1f %9.1f alt\n", n,
2659 altwalltimes[n], altwalltimes[n]/60, altwalltimes[n]/3600);
2660 # endif /* TIMEDEBUG */
2662 if (fullwalltimes[n] > wallmax) wallmax=fullwalltimes[n];
2663 cpusum += fullcputimes[n];
2665 fprintf(ofp, "----------------------------------------------------------------------------\n");
2666 fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f \n",
2667 wallmax, wallmax/60, wallmax/3600);
2668 #endif /* TIMEDEBUG */
2673 } /* writetimesstat */
2677 /* write current user tree to file */
2678 void writecutree(FILE *ofp, int num)
2684 if (typ_optn == TREERECON_OPTN) {
2686 if (puzzlemode == USERTREE) {
2687 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER");
2688 fprintf(ofp, " DEFINED TREE # %d (NO CLOCK)\n\nBranch lengths are computed using", num);
2689 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
2690 clockmode = 0; /* nonclocklike branch lengths */
2694 fprintf(ofp, "\n\nUnrooted user defined tree with maximum likelihood branch lengths");
2695 fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
2698 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER");
2699 fprintf(ofp, " DEFINED TREE # %d (WITH CLOCK)\n\nBranch lengths are computed using", num);
2700 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n");
2701 fprintf(ofp, "\nRoot located at branch: %d ", locroot+1);
2702 if (rootsearch == 0) fprintf(ofp, "(user specified)\n\n\n");
2703 if (rootsearch == 1) {
2704 fprintf(ofp, "(automatic search)");
2705 if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot);
2706 fprintf(ofp, "\n\n");
2707 fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n");
2708 fprintf(ofp, "and select location of root manually!");
2709 fprintf(ofp, "\n\n\n");
2712 if (rootsearch == 2) fprintf(ofp, "(displayed outgroup)\n\n\n");
2713 clockmode = 1; /* clocklike branch lengths */
2719 fprintf(ofp, "\n\nRooted user defined tree with clocklike ");
2720 fprintf(ofp, "maximum likelihood branch lengths\n");
2721 fprintf(ofp, "(in CLUSTAL W notation):\n\n");
2722 fputrooted(ofp, locroot);
2726 fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST FOR USER TREE # %d\n\n", num);
2727 fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n",
2728 Ctree->lklhd, Numspc + Numibrnch);
2729 fprintf(ofp, "log L with clock: %.2f (independent branch parameters: %d)\n\n",
2730 Ctree->lklhdc, Numhts + 1);
2731 delta = 2.0*((Ctree->lklhd) - (Ctree->lklhdc));
2732 fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta);
2733 df = Numspc + Numibrnch - Numhts - 1;
2734 fprintf(ofp, "Degrees of freedom of chi-square distribution: %d\n", df);
2736 pval = IncompleteGammaQ (df*0.5, delta*0.5);
2738 fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0);
2740 fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n");
2741 fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n");
2742 fprintf(ofp, "is not significantly increased.\n");
2744 fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n");
2745 fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n");
2746 fprintf(ofp, "significantly increased.\n");
2748 fprintf(ofp, "\nPlease take care that the correct root is used!\n");
2755 /******************************************************************************/
2756 /* timer routines */
2757 /******************************************************************************/
2766 /* check remaining time and print message if necessary */
2767 void checktimer(uli numqts)
2769 double tc2, mintogo, minutes, hours;
2772 if ( (time2 - time1) > 900) { /* generate message every 15 minutes */
2773 /* every 900 seconds */
2774 /* percentage of completed quartets */
2777 FPRINTF(STDOUTFILE "\n");
2779 tc2 = 100.*numqts/Numquartets;
2780 mintogo = (100.0-tc2) *
2781 (double) (time2-time0)/60.0/tc2;
2782 hours = floor(mintogo/60.0);
2783 minutes = mintogo - 60.0*hours;
2784 FPRINTF(STDOUTFILE "%.2f%%", tc2);
2785 FPRINTF(STDOUTFILE " completed (remaining");
2786 FPRINTF(STDOUTFILE " time: %.0f", hours);
2787 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2788 FPRINTF(STDOUTFILE " minutes)\n");
2795 /* check remaining time and print message if necessary */
2796 void checktimer2(uli numqts, uli all, int flag)
2798 double tc2, mintogo, minutes, hours;
2808 if ( (tt2 - tt1) > 900) { /* generate message every 15 minutes */
2809 /* every 900 seconds */
2810 /* percentage of completed quartets */
2813 FPRINTF(STDOUTFILE "\n");
2815 tc2 = 100.*numqts/Numquartets;
2816 mintogo = (100.0-tc2) *
2817 (double) (tt2-time0)/60.0/tc2;
2818 hours = floor(mintogo/60.0);
2819 minutes = mintogo - 60.0*hours;
2820 FPRINTF(STDOUTFILE "%.2f%%", tc2);
2821 FPRINTF(STDOUTFILE " completed (remaining");
2822 FPRINTF(STDOUTFILE " time: %.0f", hours);
2823 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2824 FPRINTF(STDOUTFILE " minutes)\n");
2831 void resetqblocktime(timearray_t *ta)
2833 ta->quartcpu += ta->quartblockcpu;
2834 ta->quartblockcpu = 0.0;
2835 ta->quarttime += ta->quartblocktime;
2836 ta->quartblocktime = 0.0;
2837 } /* resetqblocktime */
2840 void resetpblocktime(timearray_t *ta)
2842 ta->puzzcpu += ta->puzzblockcpu;
2843 ta->puzzblockcpu = 0.0;
2844 ta->puzztime += ta->puzzblocktime;
2845 ta->puzzblocktime = 0.0;
2846 } /* resetpblocktime */
2850 void printtimearr(timearray_t *ta)
2856 printf("(%2d) MMCPU: %11ld / %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu);
2857 printf("(%2d) CTick: %11.6f [tks] / %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime);
2859 printf("(%2d) MMTIM: %11ld / %11ld \n", PP_Myid, ta->maxtime, ta->mintime);
2861 printf("(%2d) Mxblk: %11.6e / %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock);
2862 printf("(%2d) Mnblk: %11.6e / %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock);
2864 printf("(%2d) Gnrl: %11.6e / %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime);
2865 printf("(%2d) Optn: %11.6e / %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime);
2866 printf("(%2d) Estm: %11.6e / %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime);
2867 printf("(%2d) Qurt: %11.6e / %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime);
2868 printf("(%2d) QBlk: %11.6e / %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime);
2869 printf("(%2d) QMax: %11.6e / %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime);
2870 printf("(%2d) QMin: %11.6e / %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime);
2872 printf("(%2d) Puzz: %11.6e / %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime);
2873 printf("(%2d) PBlk: %11.6e / %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime);
2874 printf("(%2d) PMax: %11.6e / %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime);
2875 printf("(%2d) PMin: %11.6e / %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime);
2877 printf("(%2d) Tree: %11.6e / %11.6e \n", PP_Myid, ta->treecpu, ta->treetime);
2878 printf("(%2d) TBlk: %11.6e / %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime);
2879 printf("(%2d) TMax: %11.6e / %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime);
2880 printf("(%2d) TMin: %11.6e / %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime);
2882 printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid,
2883 (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu),
2884 (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime));
2885 printf("(%2d) CPU: %11.6e / Time: %11.6e \n", PP_Myid, ta->cpu, ta->time);
2886 printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime);
2888 } /* printtimearr */
2889 #endif /* TIMEDEBUG */
2893 void inittimearr(timearray_t *ta)
2897 jtype[OVERALL] = "OVERALL";
2898 jtype[GENERAL] = "GENERAL";
2899 jtype[OPTIONS] = "OPTIONS";
2900 jtype[PARAMEST] = "PARAMeter ESTimation";
2901 jtype[QUARTETS] = "QUARTETS";
2902 jtype[PUZZLING] = "PUZZLING steps";
2903 jtype[TREEEVAL] = "TREE EVALuation";
2904 ta->currentjob = GENERAL;
2910 ta->mincputick = (double)(c2 - c1);
2911 ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC;
2913 ta->tempcpu = clock();
2914 ta->tempcpustart = ta->tempcpu;
2915 ta->tempfullcpu = ta->tempcpu;
2916 time(&(ta->temptime));
2917 ta->temptimestart = ta->temptime;
2918 ta->tempfulltime = ta->temptime;
2920 c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);;
2924 c2 = (clock_t)((2 * c1) + 1);
2926 if (c1 == c2) ta->maxcpu=c0;
2927 if (c1 > c2) ta->maxcpu=c1;
2929 c0=0; c1=0; c2=(clock_t)((2 * c1) - 1);
2933 c2 = (clock_t)((2 * c1) - 1);
2935 if (c1 == c2) ta->mincpu=c0;
2936 if (c1 < c2) ta->mincpu=c1;
2943 ta->maxcpublock = 0;
2944 ta->mincpublock = DBL_MAX;
2945 ta->maxtimeblock = 0;
2946 ta->mintimeblock = DBL_MAX;
2954 ta->generalcpu = 0.0;
2955 ta->optionscpu = 0.0;
2956 ta->paramestcpu = 0.0;
2958 ta->quartblockcpu = 0.0;
2959 ta->quartmaxcpu = 0.0;
2960 ta->quartmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
2962 ta->puzzblockcpu = 0.0;
2963 ta->puzzmaxcpu = 0.0;
2964 ta->puzzmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
2966 ta->treeblockcpu = 0.0;
2967 ta->treemaxcpu = 0.0;
2968 ta->treemincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
2970 ta->generaltime = 0.0;
2971 ta->optionstime = 0.0;
2972 ta->paramesttime = 0.0;
2973 ta->quarttime = 0.0;
2974 ta->quartblocktime = 0.0;
2975 ta->quartmaxtime = 0.0;
2976 ta->quartmintime = DBL_MAX;
2978 ta->puzzblocktime = 0.0;
2979 ta->puzzmaxtime = 0.0;
2980 ta->puzzmintime = DBL_MAX;
2982 ta->treeblocktime = 0.0;
2983 ta->treemaxtime = 0.0;
2984 ta->treemintime = DBL_MAX;
2990 void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta)
2995 if (t2 != t1) t = difftime(t2, t1);
2999 c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC +
3000 ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC;
3002 c = ((double)(c2 - c1))/CLOCKS_PER_SEC;
3004 if (jobtype != OVERALL) {
3006 if (ta->mincpublock > c) ta->mincpublock = c;
3007 if (ta->maxcpublock < c) ta->maxcpublock = c;
3008 if (ta->mintimeblock > t) ta->mintimeblock = t;
3009 if (ta->maxtimeblock < t) ta->maxtimeblock = t;
3012 case GENERAL: ta->generalcpu += c;
3013 ta->generaltime += t;
3015 case OPTIONS: ta->optionscpu += c;
3016 ta->optionstime += t;
3018 case PARAMEST: ta->paramestcpu += c;
3019 ta->paramesttime += t;
3021 case QUARTETS: ta->quartblockcpu += c;
3022 ta->quartblocktime += t;
3023 if (ta->quartmincpu > c) ta->quartmincpu = c;
3024 if (ta->quartmaxcpu < c) ta->quartmaxcpu = c;
3025 if (ta->quartmintime > t) ta->quartmintime = t;
3026 if (ta->quartmaxtime < t) ta->quartmaxtime = t;
3028 case PUZZLING: ta->puzzblockcpu += c;
3029 ta->puzzblocktime += t;
3030 if (ta->puzzmincpu > c) ta->puzzmincpu = c;
3031 if (ta->puzzmaxcpu < c) ta->puzzmaxcpu = c;
3032 if (ta->puzzmintime > t) ta->puzzmintime = t;
3033 if (ta->puzzmaxtime < t) ta->puzzmaxtime = t;
3035 case TREEEVAL: ta->treeblockcpu += c;
3036 ta->treeblocktime += t;
3037 if (ta->treemincpu > c) ta->treemincpu = c;
3038 if (ta->treemaxcpu < c) ta->treemaxcpu = c;
3039 if (ta->treemintime > t) ta->treemintime = t;
3040 if (ta->treemaxtime < t) ta->treemaxtime = t;
3055 # endif /* !PARALLEL */
3056 printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]);
3057 printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]);
3058 printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]);
3060 # endif /* TIMEDEBUG */
3067 void addtimes(int jobtype, timearray_t *ta)
3075 if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) { /* CPU counter overflow for overall time */
3076 addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta);
3077 ta->tempfullcpu = tempc;
3078 ta->tempfulltime = tempt;
3079 if (jobtype == OVERALL) {
3080 addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
3081 ta->tempcpustart = ta->tempcpu;
3082 ta->tempcpu = tempc;
3083 ta->temptimestart = ta->temptime;
3084 ta->temptime = tempt;
3088 if((jobtype != ta->currentjob) && (jobtype != OVERALL)) { /* change of job type */
3089 addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta);
3090 ta->tempcpustart = ta->tempcpu;
3091 ta->tempcpu = tempc;
3092 ta->temptimestart = ta->temptime;
3093 ta->temptime = tempt;
3094 ta->currentjob = jobtype;
3097 if (tempc < ta->tempcpustart) { /* CPU counter overflow */
3098 addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
3099 ta->tempcpustart = ta->tempcpu;
3100 ta->tempcpu = tempc;
3101 ta->temptimestart = ta->temptime;
3102 ta->temptime = tempt;
3109 /******************************************************************************/
3111 /* estimate parameters of substitution process and rate heterogeneity - no tree
3112 n-taxon tree is not needed because of quartet method or NJ tree topology */
3113 void estimateparametersnotree()
3115 int it, nump, change;
3116 double TSold, YRold, FIold, GEold;
3121 /* count number of parameters */
3122 if (data_optn == NUCLEOTIDE && optim_optn) nump++;
3123 if (fracinv_optim || grate_optim) nump++;
3125 do { /* repeat until nothing changes any more */
3129 /* optimize substitution parameters */
3130 if (data_optn == NUCLEOTIDE && optim_optn) {
3140 FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
3143 if (qcalg_optn) { /* quartet sampling */
3144 optimseqevolparamsq();
3145 } else { /* NJ tree */
3149 readusertree(tmpfp);
3151 optimseqevolparamst();
3154 computedistan(); /* update ML distances */
3156 /* same tolerance as 1D minimization */
3157 if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
3158 (fabs(YRparam - YRold) > 3.3*PEPS1)
3163 /* optimize rate heterogeneity variables */
3164 if (fracinv_optim || grate_optim) {
3174 FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
3176 /* compute NJ tree */
3179 /* use NJ tree topology to estimate parameters */
3181 readusertree(tmpfp);
3185 computedistan(); /* update ML distances */
3188 /* same tolerance as 1D minimization */
3189 if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
3190 (fabs(Geta - GEold) > 3.3*PEPS2)
3195 if (nump == 1) return;
3197 } while (it != MAXITS && change);
3203 /* estimate parameters of substitution process and rate heterogeneity - tree
3204 same as above but here the n-taxon tree is already in memory */
3205 void estimateparameterstree()
3207 int it, nump, change;
3208 double TSold, YRold, FIold, GEold;
3213 /* count number of parameters */
3214 if (data_optn == NUCLEOTIDE && optim_optn) nump++;
3215 if (fracinv_optim || grate_optim) nump++;
3217 do { /* repeat until nothing changes any more */
3221 /* optimize substitution process parameters */
3222 if (data_optn == NUCLEOTIDE && optim_optn) {
3232 FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
3234 optimseqevolparamst();
3235 computedistan(); /* update ML distances */
3238 /* same tolerance as 1D minimization */
3239 if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
3240 (fabs(YRparam - YRold) > 3.3*PEPS1)
3245 /* optimize rate heterogeneity variables */
3246 if (fracinv_optim || grate_optim) {
3256 FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
3259 computedistan(); /* update ML distances */
3262 /* same tolerance as 1D minimization */
3263 if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
3264 (fabs(Geta - GEold) > 3.3*PEPS2)
3269 if (nump == 1) return;
3271 } while (it != MAXITS && change);
3277 /******************************************************************************/
3278 /* exported from main */
3279 /******************************************************************************/
3281 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx)
3283 if (approx == APPROX) {
3285 *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */
3286 *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */
3287 *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */
3289 } else /* approx == EXACT */ {
3291 *d1 = quartet_lklhd(a,b, c,d); /* (a,b)-(c,d) */
3292 *d2 = quartet_lklhd(a,c, b,d); /* (a,c)-(b,d) */
3293 *d3 = quartet_lklhd(a,d, b,c); /* (a,d)-(b,c) */
3298 /***************************************************************/
3306 double tc2, mintogo, minutes, hours;
3309 /* allocate memory for taxon list of bad quartets */
3310 badtaxon = new_ulivector(Maxspc);
3311 for (i = 0; i < Maxspc; i++) badtaxon[i] = 0;
3313 /* allocate variable used for randomizing input order */
3314 trueID = new_ivector(Maxspc);
3316 /* allocate memory for quartets */
3317 quartetinfo = mallocquartets(Maxspc);
3319 /* prepare for consensus tree analysis */
3322 if (!(readquart_optn) || (readquart_optn && savequart_optn)) {
3323 /* compute quartets */
3324 FPRINTF(STDOUTFILE "Computing quartet maximum likelihood trees\n");
3326 computeallquartets();
3330 writeallquarts(Maxspc, ALLQUART, quartetinfo);
3331 if (readquart_optn) {
3332 int xx1, xx2, xx3, xx4, count;
3333 readallquarts (Maxspc, ALLQUART, quartetinfo);
3334 if (show_optn) { /* list all unresolved quartets */
3335 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
3336 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
3339 /* initialize bad quartet memory */
3340 for (count = 0; count < Maxspc; count++) badtaxon[count] = 0;
3343 for (xx4 = 3; xx4 < Maxspc; xx4++)
3344 for (xx3 = 2; xx3 < xx4; xx3++)
3345 for (xx2 = 1; xx2 < xx3; xx2++)
3346 for (xx1 = 0; xx1 < xx2; xx1++) {
3347 if (readquartet(xx1, xx2, xx3, xx4) == 7) {
3354 fputid10(unresfp, xx1);
3355 fprintf(unresfp, " ");
3356 fputid10(unresfp, xx2);
3357 fprintf(unresfp, " ");
3358 fputid10(unresfp, xx3);
3359 fprintf(unresfp, " ");
3360 fputid (unresfp, xx4);
3361 fprintf(unresfp, "\n");
3364 } /* end for xx4; for xx3; for xx2; for xx1 */
3365 if (show_optn) /* list all unresolved quartets */
3367 } /* readquart_optn */
3370 PP_SendAllQuarts(numquarts(Maxspc), quartetinfo);
3371 # endif /* PARALLEL */
3373 FPRINTF(STDOUTFILE "Computing quartet puzzling tree\n");
3376 /* start timer - percentage of completed trees */
3381 /* open file for chronological list of puzzling step trees */
3382 if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
3383 openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)");
3387 PP_SendDoPermutBlock(Numtrial);
3390 addtimes(GENERAL, &tarr);
3391 for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) {
3393 /* randomize input order */
3394 chooser(Maxspc, Maxspc, trueID);
3396 /* initialize tree */
3399 /* adding all other leafs */
3400 for (i = 3; i < Maxspc; i++) {
3402 /* clear all edgeinfos */
3405 /* clear counter of quartets */
3409 * core of quartet puzzling algorithm
3412 for (a = 0; a < nextleaf - 2; a++)
3413 for (b = a + 1; b < nextleaf - 1; b++)
3414 for (c = b + 1; c < nextleaf; c++) {
3416 /* check which two _leaves_ out of a, b, c
3417 are closer related to each other than
3418 to leaf i according to a least squares
3419 fit of the continous Baysian weights to the
3420 seven trivial "attractive regions". We assign
3421 a score of 1 to all edges between these two leaves
3422 chooseA and chooseB */
3424 checkquartet(a, b, c, i);
3425 incrementedgeinfo(chooseA, chooseB);
3429 /* generate message every 15 minutes */
3433 if ( (time2 - time1) > 900) {
3434 /* every 900 seconds */
3435 /* percentage of completed trees */
3437 FPRINTF(STDOUTFILE "\n");
3440 tc2 = 100.0*Currtrial/Numtrial +
3441 100.0*nq/Numquartets/Numtrial;
3442 mintogo = (100.0-tc2) *
3443 (double) (time2-time0)/60.0/tc2;
3444 hours = floor(mintogo/60.0);
3445 minutes = mintogo - 60.0*hours;
3446 FPRINTF(STDOUTFILE "%2.2f%%", tc2);
3447 FPRINTF(STDOUTFILE " completed (remaining");
3448 FPRINTF(STDOUTFILE " time: %.0f", hours);
3449 FPRINTF(STDOUTFILE " hours %.0f", minutes);
3450 FPRINTF(STDOUTFILE " minutes)\n");
3456 /* find out which edge has the lowest edgeinfo */
3459 /* add the next leaf on minedge */
3460 addnextleaf(minedge);
3463 /* compute bipartitions of current tree */
3465 makenewsplitentries();
3468 int *ctree, startnode;
3470 treelistitemtype *treeitem;
3471 ctree = initctree();
3473 startnode = sortctree(ctree);
3474 trstr=sprintfctree(ctree, psteptreestrlen);
3477 treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum);
3479 if((listqptrees == PSTOUT_LIST)
3480 || (listqptrees == PSTOUT_LISTORDER)) {
3481 /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */
3482 fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n",
3483 Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum);
3487 printf("%s\n", trstr);
3488 printfsortedpstrees(psteptreelist);
3495 /* free tree before building the next tree */
3498 addtimes(PUZZLING, &tarr);
3500 # endif /* PARALLEL */
3502 /* close file for list of puzzling step trees */
3503 if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
3506 if (mflag == 1) FPRINTF(STDOUTFILE "\n");
3508 /* garbage collection */
3510 free_ivector(trueID);
3513 free_cmatrix(biparts);
3514 # endif /* PARALLEL */
3518 /* compute majority rule consensus tree */
3521 /* write consensus tree to tmp file */
3523 writeconsensustree(tmpfp);
3526 /***************************************************************/
3530 int i, a, a1, a2, b, b1, b2, c, c1, c2, d;
3532 double logs[3], d1, d2, d3, temp;
3533 ivector qts, mlorder, gettwo;
3534 /* reset variables */
3535 ar1 = ar2 = ar3 = 0;
3536 reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0;
3537 reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u =
3538 reg4d = reg5l = reg5r = reg6u = reg6d = 0;
3540 /* place for random quartet */
3541 qts = new_ivector(4);
3543 /* initialize output file */
3544 openfiletowrite(&trifp, TRIANGLE, "Postscript output");
3546 FPRINTF(STDOUTFILE "Performing likelihood mapping analysis\n");
3554 addtimes(GENERAL, &tarr);
3555 if (lmqts == 0) { /* all possible quartets */
3557 if (numclust == 4) { /* four-cluster analysis */
3559 for (a = 0; a < clustA; a++)
3560 for (b = 0; b < clustB; b++)
3561 for (c = 0; c < clustC; c++)
3562 for (d = 0; d < clustD; d++) {
3569 /* maximum likelihood values */
3570 /* approximate ML is sufficient */
3571 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX);
3573 /* draw point for LM analysis */
3574 makelmpoint(trifp, d1, d2, d3);
3575 addtimes(QUARTETS, &tarr);
3580 if (numclust == 3) { /* three-cluster analysis */
3582 gettwo = new_ivector(2);
3584 for (a = 0; a < clustA; a++)
3585 for (b = 0; b < clustB; b++)
3586 for (c1 = 0; c1 < clustC-1; c1++)
3587 for (c2 = c1+1; c2 < clustC; c2++) {
3594 /* maximum likelihood values */
3595 /* approximate ML is sufficient */
3596 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX);
3598 /* randomize order of d2 and d3 */
3599 if (randominteger(2) == 1) {
3605 /* draw point for LM analysis */
3606 makelmpoint(trifp, d1, d2, d3);
3607 addtimes(QUARTETS, &tarr);
3610 free_ivector(gettwo);
3613 if (numclust == 2) { /* two-cluster analysis */
3615 gettwo = new_ivector(2);
3617 for (a1 = 0; a1 < clustA-1; a1++)
3618 for (a2 = a1+1; a2 < clustA; a2++)
3619 for (b1 = 0; b1 < clustB-1; b1++)
3620 for (b2 = b1+1; b2 < clustB; b2++) {
3627 /* maximum likelihood values */
3628 /* approximate ML is sufficient */
3629 compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX);
3631 /* randomize order of d2 and d3 */
3632 if (randominteger(2) == 1) {
3638 /* draw point for LM analysis */
3639 makelmpoint(trifp, d1, d2, d3);
3640 addtimes(QUARTETS, &tarr);
3644 free_ivector(gettwo);
3647 if (numclust == 1) { /* normal likelihood mapping (one cluster) */
3649 mlorder = new_ivector(3);
3652 for (i = 3; i < Maxspc; i++)
3653 for (a = 0; a < i - 2; a++)
3654 for (b = a + 1; b < i - 1; b++)
3655 for (c = b + 1; c < i; c++)
3656 for (d = 3; d < Maxspc; d++)
3657 for (c = 2; c < d; c++)
3658 for (b = 1; b < c; b++)
3659 for (a = 0; a < b; a++)
3662 for (i = 3; i < Maxspc; i++)
3663 for (c = 2; c < i; c++)
3664 for (b = 1; b < c; b++)
3665 for (a = 0; a < b; a++) {
3672 /* maximum likelihood values */
3673 /* approximate ML is sufficient */
3674 compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX);
3676 /* randomize order */
3677 chooser(3,3,mlorder);
3678 d1 = logs[mlorder[0]];
3679 d2 = logs[mlorder[1]];
3680 d3 = logs[mlorder[2]];
3682 /* draw point for LM analysis */
3683 makelmpoint(trifp, d1, d2, d3);
3684 addtimes(QUARTETS, &tarr);
3687 free_ivector(mlorder);
3690 } else { /* randomly selected quartets */
3692 if (numclust == 4) { /* four-cluster analysis */
3694 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3701 /* choose random quartet */
3702 qts[0] = clusterA[ randominteger(clustA) ];
3703 qts[1] = clusterB[ randominteger(clustB) ];
3704 qts[2] = clusterC[ randominteger(clustC) ];
3705 qts[3] = clusterD[ randominteger(clustD) ];
3707 /* maximum likelihood values */
3708 /* approximate ML is sufficient */
3709 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3711 /* draw point for LM analysis */
3712 makelmpoint(trifp, d1, d2, d3);
3713 addtimes(QUARTETS, &tarr);
3718 if (numclust == 3) { /* three-cluster analysis */
3720 gettwo = new_ivector(2);
3722 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3729 /* choose random quartet */
3730 qts[0] = clusterA[ randominteger(clustA) ];
3731 qts[1] = clusterB[ randominteger(clustB) ];
3732 chooser(clustC, 2, gettwo);
3733 qts[2] = clusterC[gettwo[0]];
3734 qts[3] = clusterC[gettwo[1]];
3736 /* maximum likelihood values */
3737 /* approximate ML is sufficient */
3738 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3740 /* order of d2 and d3 is already randomized! */
3742 /* draw point for LM analysis */
3743 makelmpoint(trifp, d1, d2, d3);
3744 addtimes(QUARTETS, &tarr);
3748 free_ivector(gettwo);
3751 if (numclust == 2) { /* two-cluster analysis */
3753 gettwo = new_ivector(2);
3755 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3762 /* choose random quartet */
3763 chooser(clustA, 2, gettwo);
3764 qts[0] = clusterA[gettwo[0]];
3765 qts[1] = clusterA[gettwo[1]];
3766 chooser(clustB, 2, gettwo);
3767 qts[2] = clusterB[gettwo[0]];
3768 qts[3] = clusterB[gettwo[1]];
3770 /* maximum likelihood values */
3771 /* approximate ML is sufficient */
3772 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3774 /* order of d2 and d3 is already randomized! */
3776 /* draw point for LM analysis */
3777 makelmpoint(trifp, d1, d2, d3);
3778 addtimes(QUARTETS, &tarr);
3781 free_ivector(gettwo);
3784 if (numclust == 1) { /* normal likelihood mapping (one cluster) */
3786 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3793 /* choose random quartet */
3794 chooser(Maxspc, 4, qts);
3796 /* maximum likelihood values */
3797 /* approximate ML is sufficient */
3798 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3800 /* order of d1, d2, and d3 is already randomized! */
3802 /* draw point for LM analysis */
3803 makelmpoint(trifp, d1, d2, d3);
3804 addtimes(QUARTETS, &tarr);
3816 /***************************************************************/
3818 void setdefaults() {
3820 strcpy(INFILE, INFILEDEFAULT);
3821 strcpy(OUTFILE, OUTFILEDEFAULT);
3822 strcpy(TREEFILE, TREEFILEDEFAULT);
3823 strcpy(INTREE, INTREEDEFAULT);
3824 strcpy(DISTANCES, DISTANCESDEFAULT);
3825 strcpy(TRIANGLE, TRIANGLEDEFAULT);
3826 strcpy(UNRESOLVED, UNRESOLVEDDEFAULT);
3827 strcpy(ALLQUART, ALLQUARTDEFAULT);
3828 strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT);
3829 strcpy(OUTPTLIST, OUTPTLISTDEFAULT);
3830 strcpy(OUTPTORDER, OUTPTORDERDEFAULT);
3832 usebestq_optn = FALSE;
3833 savequartlh_optn = FALSE;
3834 savequart_optn = FALSE;
3835 readquart_optn = FALSE;
3837 randseed = -1; /* to set random random seed */
3841 /***************************************************************/
3846 fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION);
3848 fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION);
3852 /***************************************************************/
3854 void printusage(char *fname)
3856 fprintf(stderr, "\n\nUsage: %s [-h] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
3864 /***************************************************************/
3867 void printusagehhh(char *fname)
3869 fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
3870 fprintf(stderr, " -h - print usage\n");
3871 fprintf(stderr, " -wqf - write quartet file to Infilename.allquart\n");
3872 fprintf(stderr, " -rqf - read quartet file from Infilename.allquart\n");
3873 fprintf(stderr, " -wqlb - write quart lhs to Infilename.allquartlh (binary)\n");
3874 fprintf(stderr, " -wqla - write quart lhs to Infilename.allquartlh (ASCII)\n");
3875 fprintf(stderr, " -bestq - use best quart, no basian weights\n");
3876 fprintf(stderr, " -randseed<#> - use <#> as random number seed, for debug purposes only\n");
3885 /***************************************************************/
3888 void scancmdline(int *argc, char **argv[])
3890 static short infileset = 0;
3891 static short intreefileset = 0;
3894 int count, dummyint;
3896 for (n = 1; n < *argc; n++) {
3898 printf("argv[%d] = %s\n", n, (*argv)[n]);
3905 count = sscanf((*argv)[n], "-wqlb%n", &dummyint);
3906 if (dummyint == 5) {
3907 savequartlh_optn = TRUE;
3908 saveqlhbin_optn = TRUE;
3913 count = sscanf((*argv)[n], "-wqla%n", &dummyint);
3914 if (dummyint == 5) {
3915 savequartlh_optn = TRUE;
3916 saveqlhbin_optn = FALSE;
3921 count = sscanf((*argv)[n], "-wqf%n", &dummyint);
3922 if (dummyint == 4) {
3923 savequart_optn = TRUE;
3928 count = sscanf((*argv)[n],"-rqf%n", &dummyint);
3929 if (dummyint == 4) {
3930 readquart_optn = TRUE;
3935 count = sscanf((*argv)[n],"-bestq%n", &dummyint);
3936 if (dummyint == 6) {
3937 usebestq_optn = TRUE;
3942 count = sscanf((*argv)[n],"-hhh%n", &dummyint);
3944 printusagehhh((*argv)[0]);
3950 count = sscanf((*argv)[n],"-V%n", &dummyint);
3952 printversion((*argv)[0]);
3957 count = sscanf((*argv)[n],"-version%n", &dummyint);
3959 printversion((*argv)[0]);
3964 count = sscanf((*argv)[n],"--version%n", &dummyint);
3966 printversion((*argv)[0]);
3971 count = sscanf((*argv)[n],"-h%n", &dummyint);
3973 printusage((*argv)[0]);
3977 count = sscanf((*argv)[n],"-randseed%d", &dummyint);
3979 randseed = dummyint;
3984 count = sscanf((*argv)[n],"-h%n", &dummyint);
3985 if ((count == 1) && (dummyint>=2)) printusage((*argv)[0]);
3987 count = sscanf((*argv)[n],"-writequarts%n", &dummyint);
3988 if (count == 1) writequartstofile = 1;;
3990 count = sscanf((*argv)[n],"-ws%d", &dummyint);
3991 if (count == 1) windowsize = dummyint;
3994 if ((*argv)[n][0] != '-') {
3995 if (infileset == 0) {
3996 strcpy(INFILE, (*argv)[n]);
3998 sprintf(OUTFILE ,"%s.%s", INFILE, OUTFILEEXT);
3999 sprintf(TREEFILE ,"%s.%s", INFILE, TREEFILEEXT);
4000 sprintf(DISTANCES ,"%s.%s", INFILE, DISTANCESEXT);
4001 sprintf(TRIANGLE ,"%s.%s", INFILE, TRIANGLEEXT);
4002 sprintf(UNRESOLVED ,"%s.%s", INFILE, UNRESOLVEDEXT);
4003 sprintf(ALLQUART ,"%s.%s", INFILE, ALLQUARTEXT);
4004 sprintf(ALLQUARTLH ,"%s.%s", INFILE, ALLQUARTLHEXT);
4005 sprintf(OUTPTLIST ,"%s.%s", INFILE, OUTPTLISTEXT);
4006 sprintf(OUTPTORDER ,"%s.%s", INFILE, OUTPTORDEREXT);
4007 FPRINTF(STDOUTFILE "Input file: %s\n", INFILE);
4010 if (intreefileset == 0) {
4011 strcpy(INTREE, (*argv)[n]);
4013 sprintf(OUTFILE ,"%s.%s", INTREE, OUTFILEEXT);
4014 sprintf(TREEFILE ,"%s.%s", INTREE, TREEFILEEXT);
4015 sprintf(DISTANCES ,"%s.%s", INTREE, DISTANCESEXT);
4016 FPRINTF(STDOUTFILE "Usertree file: %s\n", INTREE);
4021 if (flagused == FALSE) {
4022 fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]);
4030 /***************************************************************/
4032 void inputandinit(int *argc, char **argv[]) {
4036 /* vectors used in QP and LM analysis */
4037 qweight = new_dvector(3);
4038 sqdiff = new_dvector(3);
4039 qworder = new_ivector(3);
4040 sqorder = new_ivector(3);
4042 /* Initialization and parsing of Commandline */
4044 scancmdline(argc, argv);
4046 /* initialize random numbers generator */
4048 fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed);
4049 randseed = initrandom(randseed);
4051 psteptreelist = NULL;
4056 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n", VERSION);
4058 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n", VERSION, ALPHA);
4063 openfiletoread(&seqfp, INFILE, "sequence data");
4064 getsizesites(seqfp);
4065 FPRINTF(STDOUTFILE "\nInput data set contains %d sequences of length %d\n", Maxspc, Maxseqc);
4068 data_optn = guessdatatype();
4070 /* translate characters into format used by ML engine */
4076 /* estimate base frequencies from data set */
4079 estimatebasefreqs();
4081 /* guess model of substitution */
4084 /* initialize guess variables */
4085 auto_datatype = AUTO_GUESS;
4086 if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS;
4087 else auto_aamodel = AUTO_DEFAULT;
4088 /* save guessed amino acid options */
4089 guessDayhf_optn = Dayhf_optn;
4090 guessJtt_optn = Jtt_optn;
4091 guessmtrev_optn = mtrev_optn;
4092 guesscprev_optn = cprev_optn;
4093 guessblosum62_optn = blosum62_optn;
4094 guessvtmv_optn = vtmv_optn;
4095 guesswag_optn = wag_optn;
4096 guessauto_aamodel = auto_aamodel;
4099 /* check for user specified tree */
4100 if ((utfp = fopen(INTREE, "r")) != NULL) {
4102 puzzlemode = USERTREE;
4104 puzzlemode = QUARTPUZ;
4107 /* reserve memory for cluster LM analysis */
4108 clusterA = new_ivector(Maxspc);
4109 clusterB = new_ivector(Maxspc);
4110 clusterC = new_ivector(Maxspc);
4111 clusterD = new_ivector(Maxspc);
4113 /* set options interactively */
4116 /* open usertree file right after start */
4117 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
4118 openfiletoread(&utfp, INTREE, "user trees");
4121 /* start main timer */
4124 addtimes(OPTIONS, &tarr);
4126 /* symmetrize doublet frequencies if specified */
4132 /* determine how many usertrees */
4133 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
4137 if ((char) ci == ';') numutrees++;
4138 } while (ci != EOF);
4140 if (numutrees < 1) {
4141 FPRINTF(STDOUTFILE "Unable to proceed (no tree in input tree file)\n\n\n");
4146 /* check fraction of invariable sites */
4147 if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim)
4148 /* fraction of invariable site was specified manually */
4149 if (fracinv > MAXFI)
4152 addtimes(GENERAL, &tarr);
4153 /* estimate parameters */
4154 if (!(typ_optn == TREERECON_OPTN && puzzlemode == USERTREE)) {
4155 /* no tree present */
4156 estimateparametersnotree();
4159 /* use 1st user tree */
4162 estimateparameterstree();
4164 /* don't use first user tree */
4165 estimateparametersnotree();
4168 addtimes(PARAMEST, &tarr);
4170 /* compute expected Ts/Tv ratio */
4171 if (data_optn == NUCLEOTIDE) computeexpectations();
4173 } /* inputandinit */
4177 /***************************************************************/
4179 void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot)
4183 case QUARTPUZ: /* read QP tree */
4184 readusertree(intreefp);
4185 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock)\n");
4188 findbestratecombination();
4190 case USERTREE: /* read user tree */
4191 readusertree(intreefp);
4192 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1);
4196 ulkl[utreenum] = Ctree->lklhd;
4197 allsitelkl(Ctree->condlkl, allsites[utreenum]);
4199 if (utreenum==0) findbestratecombination();
4204 if (compclock) { /* clocklike branch length */
4207 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock)\n");
4211 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1);
4216 /* find best place for root */
4219 if (utreenum==0) locroot = *oldlocroot;
4220 else *oldlocroot = locroot;
4223 locroot = findrootedge();
4226 /* if user-specified edge for root does not exist use displayed outgroup */
4227 if (!checkedge(locroot)) {
4231 /* compute likelihood */
4232 clock_lklhd(locroot);
4234 ulklc[utreenum] = Ctree->lklhdc;
4235 allsitelkl(Ctree->condlkl, allsitesc[utreenum]);
4241 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd);
4243 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc);
4245 /* write ML branch length tree to outree file */
4246 clockmode = 0; /* nonclocklike branch lengths */
4247 fputphylogeny(outtreefp);
4249 /* clocklike branch lengths */
4252 fputrooted(outtreefp, locroot);
4254 } /* evaluatetree */
4256 /***************************************************************/
4259 if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
4261 free(splitpatterns);
4263 free_ivector(consconfid);
4264 free_ivector(conssizes);
4265 free_cmatrix(consbiparts);
4266 free_ulivector(badtaxon);
4268 free_cmatrix(Identif);
4269 free_dvector(Freqtpm);
4270 free_imatrix(Basecomp);
4271 free_ivector(clusterA);
4272 free_ivector(clusterB);
4273 free_ivector(clusterC);
4274 free_ivector(clusterD);
4275 free_dvector(qweight);
4276 free_dvector(sqdiff);
4277 free_ivector(qworder);
4278 free_ivector(sqorder);
4279 freetreelist(&psteptreelist, &psteptreenum, &psteptreesum);
4282 /***************************************************************/
4285 /******************************************************************************/
4287 /******************************************************************************/
4289 int main(int argc, char *argv[])
4291 int i, oldlocroot=0;
4293 /* start main timer */
4294 time(&walltimestart);
4295 cputimestart = clock();
4299 PP_Init(&argc, &argv);
4301 slave_main(argc, argv);
4303 # endif /* PARALLEL */
4305 inputandinit(&argc, &argv);
4308 /* FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE); */
4309 /* openfiletowrite(&ofp, OUTFILE, "general output"); */
4310 /* writeoutputfile(ofp,WRITEPARAMS); */
4314 /* write distance matrix */
4315 FPRINTF(STDOUTFILE "Writing pairwise distances to file %s\n", DISTANCES);
4316 openfiletowrite(&dfp, DISTANCES, "pairwise distances");
4321 PP_SendSizes(Maxspc, Maxsite, numcats, Numptrn, tpmradix, outgroup, fracconst, randseed);
4322 PP_SendData(Seqpat, /* cmatrix */
4323 Alias, Weight, constpat, /* ivector */
4324 Rates, Eval, Freqtpm, /* dvector */
4325 Evec, Ievc, iexp, Distanmat, /* dmatrix */
4326 ltprobr); /* dcube */
4327 # endif /* PARALLEL */
4328 psteptreestrlen = (Maxspc * (int)(1 + log10(Maxspc))) +
4332 case TREERECON_OPTN: /* tree reconstruction */
4334 if (puzzlemode == QUARTPUZ) { /* quartet puzzling */
4336 } /* quartet puzzling */
4339 case LIKMAPING_OPTN: /* likelihood mapping */
4343 } /* switch typ_optn */
4346 free_cmatrix(Seqchar);
4347 free_cmatrix(seqchars);
4349 /* reserve memory for tree statistics */
4350 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) {
4351 ulkl = new_dvector(numutrees);
4352 allsites = new_dmatrix(numutrees,Numptrn);
4354 ulklc = new_dvector(numutrees);
4355 allsitesc = new_dmatrix(numutrees,Numptrn);
4359 /* write puzzling step tree list */
4360 if ((listqptrees == PSTOUT_ORDER) || (listqptrees == PSTOUT_LISTORDER)) {
4361 openfiletowrite(&qptorder, OUTPTORDER, "puzzling step trees (unique)");
4363 fprintfsortedpstrees(qptorder, psteptreelist, psteptreenum, psteptreesum, 1, 0.0);
4364 closefile(qptorder);
4367 /* compute ML branch lengths for QP tree and for 1st user tree */
4369 case TREERECON_OPTN:
4371 /* open outtree file */
4372 openfiletowrite(&tfp, TREEFILE, "output tree(s)");
4374 addtimes(GENERAL, &tarr);
4376 switch (puzzlemode) {
4377 case QUARTPUZ: /* read QP tree */
4379 openfiletowrite(&tfp, TREEFILE, "output tree(s)");
4380 evaluatetree(tmpfp, tfp, puzzlemode, 0, 1, &oldlocroot);
4381 addtimes(TREEEVAL, &tarr);
4386 /*openfiletoappend(&ofp, OUTFILE, "general output");*/
4387 /*writeoutputfile(ofp,WRITEREST);*/
4389 case USERTREE: /* read user tree */
4390 openfiletoappend(&ofp, OUTFILE, "general output");
4392 openfiletowrite(&tfp, TREEFILE, "output tree(s)");
4393 for (i = 0; i < numutrees; i++) {
4394 evaluatetree(utfp, tfp, puzzlemode, i, numutrees, &oldlocroot);
4395 if (i==0) writeoutputfile(ofp,WRITEREST);
4396 writecutree(ofp, i+1);
4397 addtimes(TREEEVAL, &tarr);
4404 /*openfiletoappend(&ofp, OUTFILE, "general output");*/
4405 /*writeoutputfile(ofp,WRITEREST);*/
4407 } /* switch puzzlemode */
4411 /*openfiletoappend(&ofp, OUTFILE, "general output");*/
4412 /*writeoutputfile(ofp,WRITEREST);*/
4414 } /* switch typ_optn */
4416 /* print tree statistics */
4417 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1)
4418 printtreestats(ofp);
4420 /* free memory for tree statistics */
4421 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) {
4423 free_dmatrix(allsites);
4425 free_dvector(ulklc);
4426 free_dmatrix(allsitesc);
4432 # endif /* PARALLEL */
4434 /* write CPU/Wallclock times and parallel statistics */
4435 time(&walltimestop);
4436 cputimestop = clock();
4437 addtimes(OVERALL, &tarr);
4439 printtimearr(&tarr);
4440 # endif /* TIMEDEBUG */
4441 fullcpu = tarr.fullcpu;
4442 fulltime = tarr.fulltime;
4445 writetimesstat(ofp);
4446 # endif /* PARALLEL */
4456 /* printbestratecombination(stderr); */
4459 FPRINTF(STDOUTFILE "\nAll results written to disk:\n");
4460 FPRINTF(STDOUTFILE " Puzzle report file: %s\n", OUTFILE);
4461 FPRINTF(STDOUTFILE " Likelihood distances: %s\n", DISTANCES);
4463 if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST)
4464 FPRINTF(STDOUTFILE " Phylip tree file: %s\n", TREEFILE);
4465 if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) {
4466 if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER))
4467 FPRINTF(STDOUTFILE " Unique puzzling step trees: %s\n", OUTPTORDER);
4468 if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER))
4469 FPRINTF(STDOUTFILE " Puzzling step tree list: %s\n", OUTPTLIST);
4471 if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ)
4472 FPRINTF(STDOUTFILE " Unresolved quartets: %s\n", UNRESOLVED);
4473 if (typ_optn == LIKMAPING_OPTN)
4474 FPRINTF(STDOUTFILE " Likelihood mapping diagram: %s\n", TRIANGLE);
4475 FPRINTF(STDOUTFILE "\n");
4477 /* runtime message */
4479 "The computation took %.0f seconds (= %.1f minutes = %.1f hours)\n",
4480 difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60.,
4481 difftime(Stoptime, Starttime)/3600.);
4483 " including input %.0f seconds (= %.1f minutes = %.1f hours)\n",
4484 fulltime, fulltime/60., fulltime/3600.);
4487 "and %.0f seconds CPU time (= %.1f minutes = %.1f hours)\n\n",
4488 fullcpu, fullcpu/60., fullcpu/3600.);
4489 #endif /* TIMEDEBUG */
4497 # endif /* PARALLEL */
4503 /* compare function for uli - sort largest numbers first */
4504 int ulicmp(const void *ap, const void *bp)
4511 if (a > b) return -1;
4512 else if (a < b) return 1;
4516 /* compare function for int - sort smallest numbers first */
4517 int intcmp(const void *ap, const void *bp)
4524 if (a < b) return -1;
4525 else if (a > b) return 1;