5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
21 void num2quart(uli qnum, int *a, int *b, int *c, int *d)
25 uli lowval=0, highval=0;
27 aa=0; bb=1; cc=2; dd=3;
29 temp = (double)(24 * qnum);
32 /* temp = pow(temp, (double)(1/4)); */
33 dd = (uli) floor(temp) + 1;
35 lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
36 highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
38 while ((lowval > qnum)) {
39 dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
42 while (highval <= qnum) {
43 dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
45 lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
49 temp = (double)(6 * qnum);
50 temp = pow(temp, (double)(1/3));
51 cc = (uli) floor(temp);
53 lowval = (uli) cc*(cc-1)*(cc-2)/6;
54 highval = (uli) (cc+1)*cc*(cc-1)/6;
56 while ((lowval > qnum)) {
57 cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6;
60 while (highval <= qnum) {
61 cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6;
63 lowval = (uli) cc*(cc-1)*(cc-2)/6;
67 temp = (double)(2 * qnum);
69 bb = (uli) floor(temp);
71 lowval = (uli) bb*(bb-1)/2;
72 highval = (uli) (bb+1)*bb/2;
74 while ((lowval > qnum)) {
75 bb -= 1; lowval = (uli) bb*(bb-1)/2;
78 while (highval <= qnum) {
79 bb += 1; highval = (uli) (bb+1)*bb/2;
81 lowval = (uli) bb*(bb-1)/2;
98 uli numquarts(int maxspc)
113 (uli) b * (b-1) / 2 +
114 (uli) c * (c-1) * (c-2) / 6 +
115 (uli) d * (d-1) * (d-2) * (d-3) / 24;
122 uli quart2num (int a, int b, int c, int d)
125 if ((a>b) || (b>c) || (c>d)) {
126 fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c,
131 (uli) b * (b-1) / 2 +
132 (uli) c * (c-1) * (c-2) / 6 +
133 (uli) d * (d-1) * (d-2) * (d-3) / 24;
141 /* flag=0 old allquart binary */
142 /* flag=1 allquart binary */
143 /* flag=2 allquart ACSII */
144 /* flag=3 quartlh binary */
145 /* flag=4 quartlh ASCII */
147 void writetpqfheader(int nspec,
153 unsigned long nquart;
154 unsigned long blocklen;
156 nquart = numquarts(nspec);
157 /* compute number of bytes */
158 if (nquart % 2 == 0) { /* even number */
159 blocklen = (nquart)/2;
160 } else { /* odd number */
161 blocklen = (nquart + 1)/2;
163 /* FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename); */
164 fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION);
165 fprintf(ofp, "species: %d\n", nspec);
166 fprintf(ofp, "quartets: %lu\n", nquart);
167 fprintf(ofp, "bytes: %lu\n\n", blocklen);
170 /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */
173 if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
174 if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
175 if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
176 if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
178 for (currspec=0; currspec<nspec; currspec++) {
179 fputid(ofp, currspec);
181 } /* for each species */
184 } /* writetpqfheader */
188 void writeallquarts(int nspec,
190 unsigned char *quartetinfo)
191 { unsigned long nquart;
192 unsigned long blocklen;
195 nquart = numquarts(nspec);
197 /* compute number of bytes */
198 if (nquart % 2 == 0) { /* even number */
199 blocklen = (nquart)/2;
200 } else { /* odd number */
201 blocklen = (nquart + 1)/2;
204 FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename);
206 openfiletowrite(&ofp, filename, "all quartets");
208 writetpqfheader(nspec, ofp, 0);
210 fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp);
213 } /* writeallquart */
217 void readallquarts(int nspec,
219 unsigned char *quartetinfo)
220 { unsigned long nquart;
221 unsigned long blocklen;
223 unsigned long dummynquart;
224 unsigned long dummyblocklen;
226 char dummyversion[128];
230 nquart = numquarts(nspec);
232 /* compute number of bytes */
233 if (nquart % 2 == 0) { /* even number */
234 blocklen = (nquart)/2;
235 } else { /* odd number */
236 blocklen = (nquart + 1)/2;
239 /* &(quartetinfo[0] */
241 openfiletoread(&ifp, filename, "all quartets");
243 fscanf(ifp, "TREE-PUZZLE\n");
244 fscanf(ifp, "%s\n\n", dummyversion);
246 fscanf(ifp, "species: %d\n", &dummynspec);
247 fscanf(ifp, "quartets: %lu\n", &dummynquart);
248 fscanf(ifp, "bytes: %lu\n\n", &dummyblocklen);
250 if ((nspec != dummynspec) ||
251 (nquart != dummynquart) ||
252 (blocklen != dummyblocklen)) {
253 FPRINTF(STDOUTFILE "ERROR: Parameters in quartet file %s do not match!!!\n",
258 # endif /* PARALLEL */
262 FPRINTF(STDOUTFILE "Reading quartet file: %s\n", filename);
263 FPRINTF(STDOUTFILE " generated by: TREE-PUZZLE %s\n", dummyversion);
264 FPRINTF(STDOUTFILE " current: TREE-PUZZLE %s\n", VERSION);
266 FPRINTF(STDOUTFILE " %d species, %lu quartets, %lu bytes\n",
267 dummynspec, dummynquart, dummyblocklen);
269 for (currspec=0; currspec<nspec; currspec++) {
271 fscanf(ifp, "%s\n", dummyname);
272 FPRINTF(STDOUTFILE " %3d. %s (", currspec+1, dummyname);
273 fputid(STDOUT, currspec);
274 FPRINTF(STDOUTFILE ")\n");
276 } /* for each species */
277 FPRINTF(STDOUTFILE "\n");
279 fread(&(quartetinfo[0]), sizeof(char), blocklen, ifp);
282 } /* writeallquart */
287 /******************************************************************************/
288 /* options - file I/O - output */
289 /******************************************************************************/
291 /* compute TN parameters according to F84 Ts/Tv ratio */
294 double rho, piA, piC, piG, piT, piR, piY, ts, yr;
302 if (piC*piT*piR + piA*piG*piY == 0.0) {
303 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
304 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
308 rho = (piR*piY*(piR*piY*tstvf84 - (piA*piG + piC*piT)))/
309 (piC*piT*piR + piA*piG*piY);
311 if(piR == 0.0 || piY == 0.0 || (piR + rho) == 0.0) {
312 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
313 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
317 ts = 0.5 + 0.25*rho*(1.0/piR + 1.0/piY);
318 yr = (piY + rho)/piY * piR/(piR + rho);
319 if (ts < MINTS || ts > MAXTS) {
320 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
321 FPRINTF(STDOUTFILE "(bad Ts/Tv parameter)\n");
325 if (yr < MINYR || yr > MAXYR) {
326 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
327 FPRINTF(STDOUTFILE "(bad Y/R transition parameter)\n");
336 /* compute number of quartets used in LM analysis */
341 Numquartets = (uli) clustA*clustB*clustC*clustD;
343 Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2;
345 Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2;
347 Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24;
353 /* set options interactively */
360 puzzlemode = PAIRDIST; /*Only do pairwise dist. CZ, 05/16/01*/
363 rhetmode = UNIFORMRATE; /* assume rate homogeneity */
368 fracinv_optim = FALSE;
370 compclock = FALSE; /* compute clocklike branch lengths */
371 locroot = -1; /* search for optimal place of root */
372 qcalg_optn = FALSE; /* don't use sampling of quartets */
373 approxp_optn = TRUE; /* approximate parameter estimates */
374 listqptrees = PSTOUT_NONE; /* list puzzling step trees */
376 /* approximate QP quartets? */
377 if (Maxspc <= 6) approxqp = FALSE;
378 else approxqp = TRUE;
380 codon_optn = 0; /* use all positions in a codon */
382 /* number of puzzling steps */
383 if (Maxspc <= 25) Numtrial = 1000;
384 else if (Maxspc <= 50) Numtrial = 10000;
385 else if (Maxspc <= 75) Numtrial = 25000;
386 else Numtrial = 50000;
388 utree_optn = TRUE; /* use first user tree for estimation */
389 outgroup = 0; /* use first taxon as outgroup */
390 sym_optn = FALSE; /* symmetrize doublet frequencies */
391 tstvf84 = 0.0; /* disable F84 model */
392 show_optn = FALSE; /* show unresolved quartets */
393 typ_optn = TREERECON_OPTN; /* tree reconstruction */
394 numclust = 1; /* one clusters in LM analysis */
395 lmqts = 0; /* all quartets in LM analysis */
397 if (Numquartets > 10000) {
398 lmqts = 10000; /* 10000 quartets in LM analysis */
403 FPRINTF(STDOUTFILE "\n\n\nGENERAL OPTIONS\n");
404 FPRINTF(STDOUTFILE " b Type of analysis? ");
405 if (typ_optn == TREERECON_OPTN) FPRINTF(STDOUTFILE "Tree reconstruction\n");
406 if (typ_optn == LIKMAPING_OPTN) FPRINTF(STDOUTFILE "Likelihood mapping\n");
407 if (typ_optn == TREERECON_OPTN) {
408 FPRINTF(STDOUTFILE " k Tree search procedure? ");
409 if (puzzlemode == QUARTPUZ) FPRINTF(STDOUTFILE "Quartet puzzling\n");
410 if (puzzlemode == USERTREE) FPRINTF(STDOUTFILE "User defined trees\n");
411 if (puzzlemode == PAIRDIST) FPRINTF(STDOUTFILE "Pairwise distances only (no tree)\n");
412 if (puzzlemode == QUARTPUZ) {
413 FPRINTF(STDOUTFILE " v Approximate quartet likelihood? %s\n",
414 (approxqp ? "Yes" : "No"));
415 FPRINTF(STDOUTFILE " u List unresolved quartets? %s\n",
416 (show_optn ? "Yes" : "No"));
417 FPRINTF(STDOUTFILE " n Number of puzzling steps? %lu\n",
419 FPRINTF(STDOUTFILE " j List puzzling step trees? ");
420 switch (listqptrees) {
421 case PSTOUT_NONE: FPRINTF(STDOUTFILE "No\n"); break;
422 case PSTOUT_ORDER: FPRINTF(STDOUTFILE "Unique topologies\n"); break;
423 case PSTOUT_LISTORDER: FPRINTF(STDOUTFILE "Unique topologies & Chronological list\n"); break;
424 case PSTOUT_LIST: FPRINTF(STDOUTFILE "Chronological list only\n"); break;
427 FPRINTF(STDOUTFILE " o Display as outgroup? ");
428 fputid(STDOUT, outgroup);
429 FPRINTF(STDOUTFILE "\n");
431 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
432 FPRINTF(STDOUTFILE " z Compute clocklike branch lengths? ");
433 if (compclock) FPRINTF(STDOUTFILE "Yes\n");
434 else FPRINTF(STDOUTFILE "No\n");
437 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
438 FPRINTF(STDOUTFILE " l Location of root? ");
439 if (locroot < 0) FPRINTF(STDOUTFILE "Best place (automatic search)\n");
440 else if (locroot < Maxspc) {
441 FPRINTF(STDOUTFILE "Branch %d (", locroot + 1);
442 fputid(STDOUT, locroot);
443 FPRINTF(STDOUTFILE ")\n");
444 } else FPRINTF(STDOUTFILE "Branch %d (internal branch)\n", locroot + 1);
447 if (typ_optn == LIKMAPING_OPTN) {
448 FPRINTF(STDOUTFILE " g Group sequences in clusters? ");
449 if (numclust == 1) FPRINTF(STDOUTFILE "No\n");
450 else FPRINTF(STDOUTFILE "Yes (%d clusters as specified)\n", numclust);
451 FPRINTF(STDOUTFILE " n Number of quartets? ");
452 if (lmqts == 0) FPRINTF(STDOUTFILE "%lu (all possible)\n", Numquartets);
453 else FPRINTF(STDOUTFILE "%lu (random choice)\n", lmqts);
455 FPRINTF(STDOUTFILE " e Parameter estimates? ");
456 if (approxp_optn) FPRINTF(STDOUTFILE "Approximate (faster)\n");
457 else FPRINTF(STDOUTFILE "Exact (slow)\n");
458 if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) {
459 FPRINTF(STDOUTFILE " x Parameter estimation uses? ");
460 if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
461 else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
464 FPRINTF(STDOUTFILE " x Parameter estimation uses? ");
466 FPRINTF(STDOUTFILE "1st input tree\n");
467 else if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
468 else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
470 FPRINTF(STDOUTFILE "SUBSTITUTION PROCESS\n");
471 FPRINTF(STDOUTFILE " d Type of sequence input data? ");
472 if (auto_datatype == AUTO_GUESS) FPRINTF(STDOUTFILE "Auto: ");
473 if (data_optn == NUCLEOTIDE) FPRINTF(STDOUTFILE "Nucleotides\n");
474 if (data_optn == AMINOACID) FPRINTF(STDOUTFILE "Amino acids\n");
475 if (data_optn == BINARY) FPRINTF(STDOUTFILE "Binary states\n");
476 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0 && !SH_optn) {
477 FPRINTF(STDOUTFILE " h Codon positions selected? ");
478 if (codon_optn == 0) FPRINTF(STDOUTFILE "Use all positions\n");
479 if (codon_optn == 1) FPRINTF(STDOUTFILE "Use only 1st positions\n");
480 if (codon_optn == 2) FPRINTF(STDOUTFILE "Use only 2nd positions\n");
481 if (codon_optn == 3) FPRINTF(STDOUTFILE "Use only 3rd positions\n");
482 if (codon_optn == 4) FPRINTF(STDOUTFILE "Use 1st and 2nd positions\n");
484 FPRINTF(STDOUTFILE " m Model of substitution? ");
485 if (data_optn == NUCLEOTIDE) { /* nucleotides */
488 FPRINTF(STDOUTFILE "HKY (Hasegawa et al. 1985)\n");
490 FPRINTF(STDOUTFILE "TN (Tamura-Nei 1993)\n");
491 FPRINTF(STDOUTFILE " p Constrain TN model to F84 model? ");
493 FPRINTF(STDOUTFILE "No\n");
494 else FPRINTF(STDOUTFILE "Yes (Ts/Tv ratio = %.2f)\n", tstvf84);
496 FPRINTF(STDOUTFILE " t Transition/transversion parameter? ");
498 FPRINTF(STDOUTFILE "Estimate from data set\n");
500 FPRINTF(STDOUTFILE "%.2f\n", TSparam);
502 FPRINTF(STDOUTFILE " r Y/R transition parameter? ");
504 FPRINTF(STDOUTFILE "Estimate from data set\n");
506 FPRINTF(STDOUTFILE "%.2f\n", YRparam);
510 FPRINTF(STDOUTFILE "SH (Schoeniger-von Haeseler 1994)\n");
511 FPRINTF(STDOUTFILE " t Transition/transversion parameter? ");
513 FPRINTF(STDOUTFILE "Estimate from data set\n");
515 FPRINTF(STDOUTFILE "%.2f\n", TSparam);
518 if (data_optn == NUCLEOTIDE && SH_optn) {
519 FPRINTF(STDOUTFILE " h Doublets defined by? ");
521 FPRINTF(STDOUTFILE "1st and 2nd codon positions\n");
523 FPRINTF(STDOUTFILE "1st+2nd, 3rd+4th, etc. site\n");
525 if (data_optn == AMINOACID) { /* amino acids */
526 switch (auto_aamodel) {
528 FPRINTF(STDOUTFILE "Auto: ");
531 FPRINTF(STDOUTFILE "Def.: ");
534 if (Dayhf_optn) FPRINTF(STDOUTFILE "Dayhoff (Dayhoff et al. 1978)\n");
535 if (Jtt_optn) FPRINTF(STDOUTFILE "JTT (Jones et al. 1992)\n");
536 if (mtrev_optn) FPRINTF(STDOUTFILE "mtREV24 (Adachi-Hasegawa 1996)\n");
537 if (cprev_optn) FPRINTF(STDOUTFILE "cpREV45 (Adachi et al. 2000)\n");
538 if (blosum62_optn) FPRINTF(STDOUTFILE "BLOSUM62 (Henikoff-Henikoff 92)\n");
539 if (vtmv_optn) FPRINTF(STDOUTFILE "VT (Mueller-Vingron 2000)\n");
540 if (wag_optn) FPRINTF(STDOUTFILE "WAG (Whelan-Goldman 2000)\n");
542 if (data_optn == BINARY) { /* binary states */
543 FPRINTF(STDOUTFILE "Two-state model (Felsenstein 1981)\n");
545 if (data_optn == AMINOACID)
546 FPRINTF(STDOUTFILE " f Amino acid frequencies? ");
547 else if (data_optn == NUCLEOTIDE && SH_optn)
548 FPRINTF(STDOUTFILE " f Doublet frequencies? ");
549 else if (data_optn == NUCLEOTIDE && nuc_optn)
550 FPRINTF(STDOUTFILE " f Nucleotide frequencies? ");
551 else if (data_optn == BINARY)
552 FPRINTF(STDOUTFILE " f Binary state frequencies? ");
553 FPRINTF(STDOUTFILE "%s\n", (Frequ_optn ? "Estimate from data set" :
554 "Use specified values"));
555 if (data_optn == NUCLEOTIDE && SH_optn)
556 FPRINTF(STDOUTFILE " s Symmetrize doublet frequencies? %s\n",
557 (sym_optn ? "Yes" : "No"));
559 FPRINTF(STDOUTFILE "RATE HETEROGENEITY\n");
560 FPRINTF(STDOUTFILE " w Model of rate heterogeneity? ");
561 if (rhetmode == UNIFORMRATE) FPRINTF(STDOUTFILE "Uniform rate\n");
562 if (rhetmode == GAMMARATE ) FPRINTF(STDOUTFILE "Gamma distributed rates\n");
563 if (rhetmode == TWORATE ) FPRINTF(STDOUTFILE "Two rates (1 invariable + 1 variable)\n");
564 if (rhetmode == MIXEDRATE ) FPRINTF(STDOUTFILE "Mixed (1 invariable + %d Gamma rates)\n", numcats);
566 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
567 FPRINTF(STDOUTFILE " i Fraction of invariable sites? ");
568 if (fracinv_optim) FPRINTF(STDOUTFILE "Estimate from data set");
569 else FPRINTF(STDOUTFILE "%.2f", fracinv);
570 if (fracinv == 0.0 && !fracinv_optim) FPRINTF(STDOUTFILE " (all sites variable)");
571 FPRINTF(STDOUTFILE "\n");
573 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
574 FPRINTF(STDOUTFILE " a Gamma distribution parameter alpha? ");
576 FPRINTF(STDOUTFILE "Estimate from data set\n");
578 FPRINTF(STDOUTFILE "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta);
579 else FPRINTF(STDOUTFILE "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta);
580 FPRINTF(STDOUTFILE " c Number of Gamma rate categories? %d\n", numcats);
583 FPRINTF(STDOUTFILE "\nQuit [q], confirm [y], or change [menu] settings: ");
589 while (getchar() != '\n');
591 ch = (char) tolower((int) ch);
593 /* letters in use: d m */
594 /* letters not in use: */
602 case 'd': if (auto_datatype == AUTO_GUESS) {
603 auto_datatype = AUTO_OFF;
604 guessdata_optn = data_optn;
607 data_optn = data_optn + 1;
608 if (data_optn == 3) {
609 auto_datatype = AUTO_GUESS;
610 data_optn = guessdata_optn;
613 /* translate characters into format used by ML engine */
620 case 'm': if (data_optn == NUCLEOTIDE) { /* nucleotide data */
621 if(HKY_optn && nuc_optn) {
633 if(TN_optn && nuc_optn) {
634 if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) {
635 /* number of chars needs to be a multiple 2 or 3 */
637 if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0)
649 /* translate characters into format */
650 /* used by ML engine */
654 FPRINTF(STDOUTFILE "\n\n\nSH model not ");
655 FPRINTF(STDOUTFILE "available for the data set!\n");
678 /* translate characters into format */
679 /* used by ML engine */
686 if (data_optn == AMINOACID) { /* amino acid data */
688 /* AUTO -> Dayhoff */
693 blosum62_optn = FALSE;
696 auto_aamodel = AUTO_OFF;
705 blosum62_optn = FALSE;
708 auto_aamodel = AUTO_OFF;
717 blosum62_optn = FALSE;
720 auto_aamodel = AUTO_OFF;
730 blosum62_optn = FALSE;
733 auto_aamodel = AUTO_OFF;
738 /* mtREV -> BLOSUM 62 */
743 blosum62_optn = TRUE;
746 auto_aamodel = AUTO_OFF;
753 /* cpREV -> BLOSUM 62 */
758 blosum62_optn = TRUE;
761 auto_aamodel = AUTO_OFF;
766 /* BLOSUM 62 -> VT model */
771 blosum62_optn = FALSE;
774 auto_aamodel = AUTO_OFF;
778 /* VT model -> WAG model */
783 blosum62_optn = FALSE;
786 auto_aamodel = AUTO_OFF;
790 /* WAG model -> AUTO */
791 Dayhf_optn = guessDayhf_optn;
792 Jtt_optn = guessJtt_optn;
793 mtrev_optn = guessmtrev_optn;
794 cprev_optn = guesscprev_optn;
795 blosum62_optn = guessblosum62_optn;
796 vtmv_optn = guessvtmv_optn;
797 wag_optn = guesswag_optn;
798 auto_aamodel = guessauto_aamodel;
803 if (data_optn == BINARY) {
804 FPRINTF(STDOUTFILE "\n\n\nNo other model available!\n");
812 default: FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
817 FPRINTF(STDOUTFILE "\n\n\n");
820 /* open file for reading */
821 void openfiletoread(FILE **fp, char name[], char descr[])
826 if ((*fp = fopen(name, "r")) == NULL) {
827 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
829 while ((*fp = fopen(str, "r")) == NULL)
834 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
837 FPRINTF(STDOUTFILE "File '%s' not found, ", str);
838 FPRINTF(STDOUTFILE "please enter alternative name: ");
843 FPRINTF(STDOUTFILE "\n");
845 } /* openfiletoread */
848 /* open file for writing */
849 void openfiletowrite(FILE **fp, char name[], char descr[])
854 if ((*fp = fopen(name, "w")) == NULL) {
855 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
857 while ((*fp = fopen(str, "w")) == NULL)
862 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
865 FPRINTF(STDOUTFILE "File '%s' not created, ", str);
866 FPRINTF(STDOUTFILE "please enter other name: ");
871 FPRINTF(STDOUTFILE "\n");
873 } /* openfiletowrite */
876 /* open file for appending */
877 void openfiletoappend(FILE **fp, char name[], char descr[])
882 if ((*fp = fopen(name, "a")) == NULL) {
883 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
885 while ((*fp = fopen(str, "a")) == NULL)
890 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
893 FPRINTF(STDOUTFILE "File '%s' not created, ", str);
894 FPRINTF(STDOUTFILE "please enter other name: ");
899 FPRINTF(STDOUTFILE "\n");
901 } /* openfiletowrite */
905 void closefile(FILE *fp)
910 /* symmetrize doublet frequencies */
916 if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) {
918 mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */
921 mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */
924 mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */
927 mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */
930 mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */
933 mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */
937 /* base composition of each taxon */
938 for (i = 0; i < Maxspc; i++) {
939 imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */
940 Basecomp[i][1] = imean;
941 Basecomp[i][4] = imean;
942 imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */
943 Basecomp[i][2] = imean;
944 Basecomp[i][8] = imean;
945 imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */
946 Basecomp[i][3] = imean;
947 Basecomp[i][12] = imean;
948 imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */
949 Basecomp[i][6] = imean;
950 Basecomp[i][9] = imean;
951 imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */
952 Basecomp[i][7] = imean;
953 Basecomp[i][13] = imean;
954 imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */
955 Basecomp[i][11] = imean;
956 Basecomp[i][14] = imean;
961 /* show Ts/Tv ratio and Ts Y/R ratio */
962 void computeexpectations()
967 /* write ML distance matrix to file. Modified CZ 05/29/01 */
968 void putdistance(FILE *fp)
973 for (i = 0; i < Maxspc - 1; i++) {
974 /*fprintf(fp, "%.5f ", Distanmat[i]/100.0);*/
975 for ( j = 0; j < 26; j++ ) {
976 fputc( Identif[i][j], fp ); /*CZ*/
978 fprintf(fp, "%.5f\n", Distanmat[i]/100.0);
987 /* first lines of EPSF likelihood mapping file */
988 void initps(FILE *ofp)
993 /* plot one point of likelihood mapping analysis */
994 void plotlmpoint(FILE *ofp, double w1, double w2)
999 /* last lines of EPSF likelihood mapping file */
1000 void finishps(FILE *ofp)
1005 /* computes LM point from the three log-likelihood values,
1006 plots the point, and does some statistics */
1007 void makelmpoint(FILE *fp, double b1, double b2, double b3)
1009 double w1, w2, w3, temp;
1010 unsigned char qpbranching;
1011 double temp1, temp2, temp3, onethird;
1012 unsigned char discreteweight[3], treebits[3];
1015 treebits[0] = (unsigned char) 1;
1016 treebits[1] = (unsigned char) 2;
1017 treebits[2] = (unsigned char) 4;
1019 /* sort in descending order */
1023 sort3doubles(qweight, qworder);
1025 /* compute Bayesian weights */
1026 qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
1027 qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
1028 qweight[qworder[0]] = 1.0;
1029 temp = qweight[0] + qweight[1] + qweight[2];
1030 qweight[0] = qweight[0]/temp;
1031 qweight[1] = qweight[1]/temp;
1032 qweight[2] = qweight[2]/temp;
1034 /* plot one point in likelihood mapping triangle */
1038 plotlmpoint(fp, w1, w2);
1040 /* check areas 1,2,3 */
1041 if (treebits[qworder[0]] == 1) ar1++;
1042 else if (treebits[qworder[0]] == 2) ar2++;
1045 /* check out regions 1,2,3,4,5,6,7 */
1047 /* 100 distribution */
1048 temp1 = 1.0 - qweight[qworder[0]];
1049 sqdiff[0] = temp1*temp1 +
1050 qweight[qworder[1]]*qweight[qworder[1]] +
1051 qweight[qworder[2]]*qweight[qworder[2]];
1052 discreteweight[0] = treebits[qworder[0]];
1054 /* 110 distribution */
1055 temp1 = 0.5 - qweight[qworder[0]];
1056 temp2 = 0.5 - qweight[qworder[1]];
1057 sqdiff[1] = temp1*temp1 + temp2*temp2 +
1058 qweight[qworder[2]]*qweight[qworder[2]];
1059 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
1061 /* 111 distribution */
1062 temp1 = onethird - qweight[qworder[0]];
1063 temp2 = onethird - qweight[qworder[1]];
1064 temp3 = onethird - qweight[qworder[2]];
1065 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
1066 discreteweight[2] = (unsigned char) 7;
1068 /* sort in descending order */
1069 sort3doubles(sqdiff, sqorder);
1071 qpbranching = (unsigned char) discreteweight[sqorder[2]];
1073 if (qpbranching == 1) {
1075 if (w2 < w3) reg1l++;
1078 if (qpbranching == 2) {
1080 if (w1 < w3) reg2d++;
1083 if (qpbranching == 4) {
1085 if (w1 < w2) reg3d++;
1088 if (qpbranching == 3) {
1090 if (w1 < w2) reg4d++;
1093 if (qpbranching == 6) {
1095 if (w2 < w3) reg5l++;
1098 if (qpbranching == 5) {
1100 if (w1 < w3) reg6d++;
1103 if (qpbranching == 7) reg7++;
1106 /* print tree statistics */
1107 void printtreestats(FILE *ofp)
1110 double bestlkl, difflkl, difflklps, temp, sum;
1112 /* find best tree */
1115 for (i = 1; i < numutrees; i++)
1116 if (ulkl[i] > bestlkl) {
1121 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n");
1122 fprintf(ofp, "Tree log L difference S.E. Significantly worse\n");
1123 fprintf(ofp, "--------------------------------------------------------\n");
1124 for (i = 0; i < numutrees; i++) {
1125 difflkl = ulkl[besttree]-ulkl[i];
1126 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl);
1127 if (i == besttree) {
1128 fprintf(ofp, " <----------------- best tree");
1130 /* compute variance of Log L differences over sites */
1131 difflklps = difflkl/(double)Maxsite;
1133 for (j = 0; j < Numptrn; j++) {
1134 temp = allsites[besttree][j] - allsites[i][j] - difflklps;
1135 sum += temp*temp*Weight[j];
1137 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1138 fprintf(ofp, "%11.2f ", sum);
1139 if (difflkl > 1.96*sum)
1140 fprintf(ofp, "yes");
1146 fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1150 /* find best tree */
1153 for (i = 1; i < numutrees; i++)
1154 if (ulklc[i] > bestlkl) {
1159 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n");
1160 fprintf(ofp, "Tree log L difference S.E. Significantly worse\n");
1161 fprintf(ofp, "--------------------------------------------------------\n");
1162 for (i = 0; i < numutrees; i++) {
1163 difflkl = ulklc[besttree]-ulklc[i];
1164 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl);
1165 if (i == besttree) {
1166 fprintf(ofp, " <----------------- best tree");
1168 /* compute variance of Log L differences over sites */
1169 difflklps = difflkl/(double)Maxsite;
1171 for (j = 0; j < Numptrn; j++) {
1172 temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps;
1173 sum += temp*temp*Weight[j];
1175 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1176 fprintf(ofp, "%11.2f ", sum);
1177 if (difflkl > 1.96*sum)
1178 fprintf(ofp, "yes");
1184 fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1189 void timestamp(FILE* ofp)
1193 timespan = difftime(Stoptime, Starttime);
1194 cpuspan = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC);
1195 fprintf(ofp, "\n\nTIME STAMP\n\n");
1196 fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) );
1197 fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
1198 timespan, timespan/60., timespan/3600.);
1199 fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
1200 fulltime, fulltime/60., fulltime/3600.);
1202 fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n",
1203 fullcpu, fullcpu/60., fullcpu/3600.);
1204 #endif /* TIMEDEBUG */
1208 /* extern int bestrfound; */
1210 /* write output file */
1211 void writeoutputfile(FILE *ofp, int part)
1217 /******************************************************************************/
1218 /* timer routines */
1219 /******************************************************************************/
1228 /* check remaining time and print message if necessary */
1229 void checktimer(uli numqts)
1231 double tc2, mintogo, minutes, hours;
1234 if ( (time2 - time1) > 900) { /* generate message every 15 minutes */
1235 /* every 900 seconds */
1236 /* percentage of completed quartets */
1239 FPRINTF(STDOUTFILE "\n");
1241 tc2 = 100.*numqts/Numquartets;
1242 mintogo = (100.0-tc2) *
1243 (double) (time2-time0)/60.0/tc2;
1244 hours = floor(mintogo/60.0);
1245 minutes = mintogo - 60.0*hours;
1246 FPRINTF(STDOUTFILE "%.2f%%", tc2);
1247 FPRINTF(STDOUTFILE " completed (remaining");
1248 FPRINTF(STDOUTFILE " time: %.0f", hours);
1249 FPRINTF(STDOUTFILE " hours %.0f", minutes);
1250 FPRINTF(STDOUTFILE " minutes)\n");
1257 /* check remaining time and print message if necessary */
1258 void checktimer2(uli numqts, uli all, int flag)
1260 double tc2, mintogo, minutes, hours;
1270 if ( (tt2 - tt1) > 900) { /* generate message every 15 minutes */
1271 /* every 900 seconds */
1272 /* percentage of completed quartets */
1275 FPRINTF(STDOUTFILE "\n");
1277 tc2 = 100.*numqts/Numquartets;
1278 mintogo = (100.0-tc2) *
1279 (double) (tt2-time0)/60.0/tc2;
1280 hours = floor(mintogo/60.0);
1281 minutes = mintogo - 60.0*hours;
1282 FPRINTF(STDOUTFILE "%.2f%%", tc2);
1283 FPRINTF(STDOUTFILE " completed (remaining");
1284 FPRINTF(STDOUTFILE " time: %.0f", hours);
1285 FPRINTF(STDOUTFILE " hours %.0f", minutes);
1286 FPRINTF(STDOUTFILE " minutes)\n");
1293 void resetqblocktime(timearray_t *ta)
1295 ta->quartcpu += ta->quartblockcpu;
1296 ta->quartblockcpu = 0.0;
1297 ta->quarttime += ta->quartblocktime;
1298 ta->quartblocktime = 0.0;
1299 } /* resetqblocktime */
1302 void resetpblocktime(timearray_t *ta)
1304 ta->puzzcpu += ta->puzzblockcpu;
1305 ta->puzzblockcpu = 0.0;
1306 ta->puzztime += ta->puzzblocktime;
1307 ta->puzzblocktime = 0.0;
1308 } /* resetpblocktime */
1312 void printtimearr(timearray_t *ta)
1318 printf("(%2d) MMCPU: %11ld / %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu);
1319 printf("(%2d) CTick: %11.6f [tks] / %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime);
1321 printf("(%2d) MMTIM: %11ld / %11ld \n", PP_Myid, ta->maxtime, ta->mintime);
1323 printf("(%2d) Mxblk: %11.6e / %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock);
1324 printf("(%2d) Mnblk: %11.6e / %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock);
1326 printf("(%2d) Gnrl: %11.6e / %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime);
1327 printf("(%2d) Optn: %11.6e / %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime);
1328 printf("(%2d) Estm: %11.6e / %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime);
1329 printf("(%2d) Qurt: %11.6e / %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime);
1330 printf("(%2d) QBlk: %11.6e / %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime);
1331 printf("(%2d) QMax: %11.6e / %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime);
1332 printf("(%2d) QMin: %11.6e / %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime);
1334 printf("(%2d) Puzz: %11.6e / %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime);
1335 printf("(%2d) PBlk: %11.6e / %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime);
1336 printf("(%2d) PMax: %11.6e / %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime);
1337 printf("(%2d) PMin: %11.6e / %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime);
1339 printf("(%2d) Tree: %11.6e / %11.6e \n", PP_Myid, ta->treecpu, ta->treetime);
1340 printf("(%2d) TBlk: %11.6e / %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime);
1341 printf("(%2d) TMax: %11.6e / %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime);
1342 printf("(%2d) TMin: %11.6e / %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime);
1344 printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid,
1345 (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu),
1346 (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime));
1347 printf("(%2d) CPU: %11.6e / Time: %11.6e \n", PP_Myid, ta->cpu, ta->time);
1348 printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime);
1350 } /* printtimearr */
1351 #endif /* TIMEDEBUG */
1355 void inittimearr(timearray_t *ta)
1359 jtype[OVERALL] = "OVERALL";
1360 jtype[GENERAL] = "GENERAL";
1361 jtype[OPTIONS] = "OPTIONS";
1362 jtype[PARAMEST] = "PARAMeter ESTimation";
1363 jtype[QUARTETS] = "QUARTETS";
1364 jtype[PUZZLING] = "PUZZLING steps";
1365 jtype[TREEEVAL] = "TREE EVALuation";
1366 ta->currentjob = GENERAL;
1372 ta->mincputick = (double)(c2 - c1);
1373 ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC;
1375 ta->tempcpu = clock();
1376 ta->tempcpustart = ta->tempcpu;
1377 ta->tempfullcpu = ta->tempcpu;
1378 time(&(ta->temptime));
1379 ta->temptimestart = ta->temptime;
1380 ta->tempfulltime = ta->temptime;
1382 c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);;
1386 c2 = (clock_t)((2 * c1) + 1);
1388 if (c1 == c2) ta->maxcpu=c0;
1389 if (c1 > c2) ta->maxcpu=c1;
1391 c0=0; c1=0; c2=(clock_t)((2 * c1) - 1);
1395 c2 = (clock_t)((2 * c1) - 1);
1397 if (c1 == c2) ta->mincpu=c0;
1398 if (c1 < c2) ta->mincpu=c1;
1405 ta->maxcpublock = 0;
1406 ta->mincpublock = DBL_MAX;
1407 ta->maxtimeblock = 0;
1408 ta->mintimeblock = DBL_MAX;
1416 ta->generalcpu = 0.0;
1417 ta->optionscpu = 0.0;
1418 ta->paramestcpu = 0.0;
1420 ta->quartblockcpu = 0.0;
1421 ta->quartmaxcpu = 0.0;
1422 ta->quartmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
1424 ta->puzzblockcpu = 0.0;
1425 ta->puzzmaxcpu = 0.0;
1426 ta->puzzmincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
1428 ta->treeblockcpu = 0.0;
1429 ta->treemaxcpu = 0.0;
1430 ta->treemincpu = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
1432 ta->generaltime = 0.0;
1433 ta->optionstime = 0.0;
1434 ta->paramesttime = 0.0;
1435 ta->quarttime = 0.0;
1436 ta->quartblocktime = 0.0;
1437 ta->quartmaxtime = 0.0;
1438 ta->quartmintime = DBL_MAX;
1440 ta->puzzblocktime = 0.0;
1441 ta->puzzmaxtime = 0.0;
1442 ta->puzzmintime = DBL_MAX;
1444 ta->treeblocktime = 0.0;
1445 ta->treemaxtime = 0.0;
1446 ta->treemintime = DBL_MAX;
1452 void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta)
1457 if (t2 != t1) t = difftime(t2, t1);
1461 c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC +
1462 ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC;
1464 c = ((double)(c2 - c1))/CLOCKS_PER_SEC;
1466 if (jobtype != OVERALL) {
1468 if (ta->mincpublock > c) ta->mincpublock = c;
1469 if (ta->maxcpublock < c) ta->maxcpublock = c;
1470 if (ta->mintimeblock > t) ta->mintimeblock = t;
1471 if (ta->maxtimeblock < t) ta->maxtimeblock = t;
1474 case GENERAL: ta->generalcpu += c;
1475 ta->generaltime += t;
1477 case OPTIONS: ta->optionscpu += c;
1478 ta->optionstime += t;
1480 case PARAMEST: ta->paramestcpu += c;
1481 ta->paramesttime += t;
1483 case QUARTETS: ta->quartblockcpu += c;
1484 ta->quartblocktime += t;
1485 if (ta->quartmincpu > c) ta->quartmincpu = c;
1486 if (ta->quartmaxcpu < c) ta->quartmaxcpu = c;
1487 if (ta->quartmintime > t) ta->quartmintime = t;
1488 if (ta->quartmaxtime < t) ta->quartmaxtime = t;
1490 case PUZZLING: ta->puzzblockcpu += c;
1491 ta->puzzblocktime += t;
1492 if (ta->puzzmincpu > c) ta->puzzmincpu = c;
1493 if (ta->puzzmaxcpu < c) ta->puzzmaxcpu = c;
1494 if (ta->puzzmintime > t) ta->puzzmintime = t;
1495 if (ta->puzzmaxtime < t) ta->puzzmaxtime = t;
1497 case TREEEVAL: ta->treeblockcpu += c;
1498 ta->treeblocktime += t;
1499 if (ta->treemincpu > c) ta->treemincpu = c;
1500 if (ta->treemaxcpu < c) ta->treemaxcpu = c;
1501 if (ta->treemintime > t) ta->treemintime = t;
1502 if (ta->treemaxtime < t) ta->treemaxtime = t;
1517 # endif /* !PARALLEL */
1518 printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]);
1519 printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]);
1520 printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]);
1522 # endif /* TIMEDEBUG */
1529 void addtimes(int jobtype, timearray_t *ta)
1537 if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) { /* CPU counter overflow for overall time */
1538 addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta);
1539 ta->tempfullcpu = tempc;
1540 ta->tempfulltime = tempt;
1541 if (jobtype == OVERALL) {
1542 addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
1543 ta->tempcpustart = ta->tempcpu;
1544 ta->tempcpu = tempc;
1545 ta->temptimestart = ta->temptime;
1546 ta->temptime = tempt;
1550 if((jobtype != ta->currentjob) && (jobtype != OVERALL)) { /* change of job type */
1551 addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta);
1552 ta->tempcpustart = ta->tempcpu;
1553 ta->tempcpu = tempc;
1554 ta->temptimestart = ta->temptime;
1555 ta->temptime = tempt;
1556 ta->currentjob = jobtype;
1559 if (tempc < ta->tempcpustart) { /* CPU counter overflow */
1560 addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
1561 ta->tempcpustart = ta->tempcpu;
1562 ta->tempcpu = tempc;
1563 ta->temptimestart = ta->temptime;
1564 ta->temptime = tempt;
1571 /******************************************************************************/
1573 /* estimate parameters of substitution process and rate heterogeneity - no tree
1574 n-taxon tree is not needed because of quartet method or NJ tree topology */
1575 void estimateparametersnotree()
1577 int it, nump, change;
1578 double TSold, YRold, FIold, GEold;
1583 /* count number of parameters */
1584 if (data_optn == NUCLEOTIDE && optim_optn) nump++;
1585 if (fracinv_optim || grate_optim) nump++;
1587 do { /* repeat until nothing changes any more */
1591 /* optimize substitution parameters */
1592 if (data_optn == NUCLEOTIDE && optim_optn) {
1602 FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
1605 if (qcalg_optn) { /* quartet sampling */
1606 optimseqevolparamsq();
1607 } else { /* NJ tree */
1611 readusertree(tmpfp);
1613 optimseqevolparamst();
1616 computedistan(); /* update ML distances */
1618 /* same tolerance as 1D minimization */
1619 if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
1620 (fabs(YRparam - YRold) > 3.3*PEPS1)
1625 /* optimize rate heterogeneity variables */
1626 if (fracinv_optim || grate_optim) {
1636 FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
1638 /* compute NJ tree */
1641 /* use NJ tree topology to estimate parameters */
1643 readusertree(tmpfp);
1647 computedistan(); /* update ML distances */
1650 /* same tolerance as 1D minimization */
1651 if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
1652 (fabs(Geta - GEold) > 3.3*PEPS2)
1657 if (nump == 1) return;
1659 } while (it != MAXITS && change);
1665 /* estimate parameters of substitution process and rate heterogeneity - tree
1666 same as above but here the n-taxon tree is already in memory */
1667 void estimateparameterstree()
1669 int it, nump, change;
1670 double TSold, YRold, FIold, GEold;
1675 /* count number of parameters */
1676 if (data_optn == NUCLEOTIDE && optim_optn) nump++;
1677 if (fracinv_optim || grate_optim) nump++;
1679 do { /* repeat until nothing changes any more */
1683 /* optimize substitution process parameters */
1684 if (data_optn == NUCLEOTIDE && optim_optn) {
1694 FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
1696 optimseqevolparamst();
1697 computedistan(); /* update ML distances */
1700 /* same tolerance as 1D minimization */
1701 if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
1702 (fabs(YRparam - YRold) > 3.3*PEPS1)
1707 /* optimize rate heterogeneity variables */
1708 if (fracinv_optim || grate_optim) {
1718 FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
1721 computedistan(); /* update ML distances */
1724 /* same tolerance as 1D minimization */
1725 if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
1726 (fabs(Geta - GEold) > 3.3*PEPS2)
1731 if (nump == 1) return;
1733 } while (it != MAXITS && change);
1739 /******************************************************************************/
1740 /* exported from main */
1741 /******************************************************************************/
1743 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx)
1745 if (approx == APPROX) {
1747 *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */
1748 *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */
1749 *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */
1751 } else /* approx == EXACT */ {
1753 *d1 = quartet_lklhd(a,b, c,d); /* (a,b)-(c,d) */
1754 *d2 = quartet_lklhd(a,c, b,d); /* (a,c)-(b,d) */
1755 *d3 = quartet_lklhd(a,d, b,c); /* (a,d)-(b,c) */
1760 /***************************************************************/
1768 double tc2, mintogo, minutes, hours;
1771 /* allocate memory for taxon list of bad quartets */
1772 badtaxon = new_ulivector(Maxspc);
1773 for (i = 0; i < Maxspc; i++) badtaxon[i] = 0;
1775 /* allocate variable used for randomizing input order */
1776 trueID = new_ivector(Maxspc);
1778 /* allocate memory for quartets */
1779 quartetinfo = mallocquartets(Maxspc);
1781 /* prepare for consensus tree analysis */
1784 if (!(readquart_optn) || (readquart_optn && savequart_optn)) {
1785 /* compute quartets */
1786 FPRINTF(STDOUTFILE "Computing quartet maximum likelihood trees\n");
1788 computeallquartets();
1792 writeallquarts(Maxspc, ALLQUART, quartetinfo);
1793 if (readquart_optn) {
1794 int xx1, xx2, xx3, xx4, count;
1795 readallquarts (Maxspc, ALLQUART, quartetinfo);
1796 if (show_optn) { /* list all unresolved quartets */
1797 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
1798 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
1801 /* initialize bad quartet memory */
1802 for (count = 0; count < Maxspc; count++) badtaxon[count] = 0;
1805 for (xx4 = 3; xx4 < Maxspc; xx4++)
1806 for (xx3 = 2; xx3 < xx4; xx3++)
1807 for (xx2 = 1; xx2 < xx3; xx2++)
1808 for (xx1 = 0; xx1 < xx2; xx1++) {
1809 if (readquartet(xx1, xx2, xx3, xx4) == 7) {
1816 fputid10(unresfp, xx1);
1817 fprintf(unresfp, " ");
1818 fputid10(unresfp, xx2);
1819 fprintf(unresfp, " ");
1820 fputid10(unresfp, xx3);
1821 fprintf(unresfp, " ");
1822 fputid (unresfp, xx4);
1823 fprintf(unresfp, "\n");
1826 } /* end for xx4; for xx3; for xx2; for xx1 */
1827 if (show_optn) /* list all unresolved quartets */
1829 } /* readquart_optn */
1832 PP_SendAllQuarts(numquarts(Maxspc), quartetinfo);
1833 # endif /* PARALLEL */
1835 FPRINTF(STDOUTFILE "Computing quartet puzzling tree\n");
1838 /* start timer - percentage of completed trees */
1843 /* open file for chronological list of puzzling step trees */
1844 if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
1845 openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)");
1849 PP_SendDoPermutBlock(Numtrial);
1852 addtimes(GENERAL, &tarr);
1853 for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) {
1855 /* randomize input order */
1856 chooser(Maxspc, Maxspc, trueID);
1858 /* initialize tree */
1861 /* adding all other leafs */
1862 for (i = 3; i < Maxspc; i++) {
1864 /* clear all edgeinfos */
1867 /* clear counter of quartets */
1871 * core of quartet puzzling algorithm
1874 for (a = 0; a < nextleaf - 2; a++)
1875 for (b = a + 1; b < nextleaf - 1; b++)
1876 for (c = b + 1; c < nextleaf; c++) {
1878 /* check which two _leaves_ out of a, b, c
1879 are closer related to each other than
1880 to leaf i according to a least squares
1881 fit of the continous Baysian weights to the
1882 seven trivial "attractive regions". We assign
1883 a score of 1 to all edges between these two leaves
1884 chooseA and chooseB */
1886 checkquartet(a, b, c, i);
1887 incrementedgeinfo(chooseA, chooseB);
1891 /* generate message every 15 minutes */
1895 if ( (time2 - time1) > 900) {
1896 /* every 900 seconds */
1897 /* percentage of completed trees */
1899 FPRINTF(STDOUTFILE "\n");
1902 tc2 = 100.0*Currtrial/Numtrial +
1903 100.0*nq/Numquartets/Numtrial;
1904 mintogo = (100.0-tc2) *
1905 (double) (time2-time0)/60.0/tc2;
1906 hours = floor(mintogo/60.0);
1907 minutes = mintogo - 60.0*hours;
1908 FPRINTF(STDOUTFILE "%2.2f%%", tc2);
1909 FPRINTF(STDOUTFILE " completed (remaining");
1910 FPRINTF(STDOUTFILE " time: %.0f", hours);
1911 FPRINTF(STDOUTFILE " hours %.0f", minutes);
1912 FPRINTF(STDOUTFILE " minutes)\n");
1918 /* find out which edge has the lowest edgeinfo */
1921 /* add the next leaf on minedge */
1922 addnextleaf(minedge);
1925 /* compute bipartitions of current tree */
1927 makenewsplitentries();
1930 int *ctree, startnode;
1932 treelistitemtype *treeitem;
1933 ctree = initctree();
1935 startnode = sortctree(ctree);
1936 trstr=sprintfctree(ctree, psteptreestrlen);
1939 treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum);
1941 if((listqptrees == PSTOUT_LIST)
1942 || (listqptrees == PSTOUT_LISTORDER)) {
1943 /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */
1944 fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n",
1945 Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum);
1949 printf("%s\n", trstr);
1950 printfsortedpstrees(psteptreelist);
1957 /* free tree before building the next tree */
1960 addtimes(PUZZLING, &tarr);
1962 # endif /* PARALLEL */
1964 /* close file for list of puzzling step trees */
1965 if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
1968 if (mflag == 1) FPRINTF(STDOUTFILE "\n");
1970 /* garbage collection */
1972 free_ivector(trueID);
1975 free_cmatrix(biparts);
1976 # endif /* PARALLEL */
1980 /* compute majority rule consensus tree */
1983 /* write consensus tree to tmp file */
1985 writeconsensustree(tmpfp);
1988 /***************************************************************/
1992 int i, a, a1, a2, b, b1, b2, c, c1, c2, d;
1994 double logs[3], d1, d2, d3, temp;
1995 ivector qts, mlorder, gettwo;
1996 /* reset variables */
1997 ar1 = ar2 = ar3 = 0;
1998 reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0;
1999 reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u =
2000 reg4d = reg5l = reg5r = reg6u = reg6d = 0;
2002 /* place for random quartet */
2003 qts = new_ivector(4);
2005 /* initialize output file */
2006 openfiletowrite(&trifp, TRIANGLE, "Postscript output");
2008 FPRINTF(STDOUTFILE "Performing likelihood mapping analysis\n");
2016 addtimes(GENERAL, &tarr);
2017 if (lmqts == 0) { /* all possible quartets */
2019 if (numclust == 4) { /* four-cluster analysis */
2021 for (a = 0; a < clustA; a++)
2022 for (b = 0; b < clustB; b++)
2023 for (c = 0; c < clustC; c++)
2024 for (d = 0; d < clustD; d++) {
2031 /* maximum likelihood values */
2032 /* approximate ML is sufficient */
2033 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX);
2035 /* draw point for LM analysis */
2036 makelmpoint(trifp, d1, d2, d3);
2037 addtimes(QUARTETS, &tarr);
2042 if (numclust == 3) { /* three-cluster analysis */
2044 gettwo = new_ivector(2);
2046 for (a = 0; a < clustA; a++)
2047 for (b = 0; b < clustB; b++)
2048 for (c1 = 0; c1 < clustC-1; c1++)
2049 for (c2 = c1+1; c2 < clustC; c2++) {
2056 /* maximum likelihood values */
2057 /* approximate ML is sufficient */
2058 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX);
2060 /* randomize order of d2 and d3 */
2061 if (randominteger(2) == 1) {
2067 /* draw point for LM analysis */
2068 makelmpoint(trifp, d1, d2, d3);
2069 addtimes(QUARTETS, &tarr);
2072 free_ivector(gettwo);
2075 if (numclust == 2) { /* two-cluster analysis */
2077 gettwo = new_ivector(2);
2079 for (a1 = 0; a1 < clustA-1; a1++)
2080 for (a2 = a1+1; a2 < clustA; a2++)
2081 for (b1 = 0; b1 < clustB-1; b1++)
2082 for (b2 = b1+1; b2 < clustB; b2++) {
2089 /* maximum likelihood values */
2090 /* approximate ML is sufficient */
2091 compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX);
2093 /* randomize order of d2 and d3 */
2094 if (randominteger(2) == 1) {
2100 /* draw point for LM analysis */
2101 makelmpoint(trifp, d1, d2, d3);
2102 addtimes(QUARTETS, &tarr);
2106 free_ivector(gettwo);
2109 if (numclust == 1) { /* normal likelihood mapping (one cluster) */
2111 mlorder = new_ivector(3);
2114 for (i = 3; i < Maxspc; i++)
2115 for (a = 0; a < i - 2; a++)
2116 for (b = a + 1; b < i - 1; b++)
2117 for (c = b + 1; c < i; c++)
2118 for (d = 3; d < Maxspc; d++)
2119 for (c = 2; c < d; c++)
2120 for (b = 1; b < c; b++)
2121 for (a = 0; a < b; a++)
2124 for (i = 3; i < Maxspc; i++)
2125 for (c = 2; c < i; c++)
2126 for (b = 1; b < c; b++)
2127 for (a = 0; a < b; a++) {
2134 /* maximum likelihood values */
2135 /* approximate ML is sufficient */
2136 compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX);
2138 /* randomize order */
2139 chooser(3,3,mlorder);
2140 d1 = logs[mlorder[0]];
2141 d2 = logs[mlorder[1]];
2142 d3 = logs[mlorder[2]];
2144 /* draw point for LM analysis */
2145 makelmpoint(trifp, d1, d2, d3);
2146 addtimes(QUARTETS, &tarr);
2149 free_ivector(mlorder);
2152 } else { /* randomly selected quartets */
2154 if (numclust == 4) { /* four-cluster analysis */
2156 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2163 /* choose random quartet */
2164 qts[0] = clusterA[ randominteger(clustA) ];
2165 qts[1] = clusterB[ randominteger(clustB) ];
2166 qts[2] = clusterC[ randominteger(clustC) ];
2167 qts[3] = clusterD[ randominteger(clustD) ];
2169 /* maximum likelihood values */
2170 /* approximate ML is sufficient */
2171 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2173 /* draw point for LM analysis */
2174 makelmpoint(trifp, d1, d2, d3);
2175 addtimes(QUARTETS, &tarr);
2180 if (numclust == 3) { /* three-cluster analysis */
2182 gettwo = new_ivector(2);
2184 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2191 /* choose random quartet */
2192 qts[0] = clusterA[ randominteger(clustA) ];
2193 qts[1] = clusterB[ randominteger(clustB) ];
2194 chooser(clustC, 2, gettwo);
2195 qts[2] = clusterC[gettwo[0]];
2196 qts[3] = clusterC[gettwo[1]];
2198 /* maximum likelihood values */
2199 /* approximate ML is sufficient */
2200 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2202 /* order of d2 and d3 is already randomized! */
2204 /* draw point for LM analysis */
2205 makelmpoint(trifp, d1, d2, d3);
2206 addtimes(QUARTETS, &tarr);
2210 free_ivector(gettwo);
2213 if (numclust == 2) { /* two-cluster analysis */
2215 gettwo = new_ivector(2);
2217 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2224 /* choose random quartet */
2225 chooser(clustA, 2, gettwo);
2226 qts[0] = clusterA[gettwo[0]];
2227 qts[1] = clusterA[gettwo[1]];
2228 chooser(clustB, 2, gettwo);
2229 qts[2] = clusterB[gettwo[0]];
2230 qts[3] = clusterB[gettwo[1]];
2232 /* maximum likelihood values */
2233 /* approximate ML is sufficient */
2234 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2236 /* order of d2 and d3 is already randomized! */
2238 /* draw point for LM analysis */
2239 makelmpoint(trifp, d1, d2, d3);
2240 addtimes(QUARTETS, &tarr);
2243 free_ivector(gettwo);
2246 if (numclust == 1) { /* normal likelihood mapping (one cluster) */
2248 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2255 /* choose random quartet */
2256 chooser(Maxspc, 4, qts);
2258 /* maximum likelihood values */
2259 /* approximate ML is sufficient */
2260 compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2262 /* order of d1, d2, and d3 is already randomized! */
2264 /* draw point for LM analysis */
2265 makelmpoint(trifp, d1, d2, d3);
2266 addtimes(QUARTETS, &tarr);
2278 /***************************************************************/
2280 void setdefaults() {
2282 strcpy(INFILE, INFILEDEFAULT);
2283 strcpy(OUTFILE, OUTFILEDEFAULT);
2284 strcpy(TREEFILE, TREEFILEDEFAULT);
2285 strcpy(INTREE, INTREEDEFAULT);
2286 strcpy(DISTANCES, DISTANCESDEFAULT);
2287 strcpy(TRIANGLE, TRIANGLEDEFAULT);
2288 strcpy(UNRESOLVED, UNRESOLVEDDEFAULT);
2289 strcpy(ALLQUART, ALLQUARTDEFAULT);
2290 strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT);
2291 strcpy(OUTPTLIST, OUTPTLISTDEFAULT);
2292 strcpy(OUTPTORDER, OUTPTORDERDEFAULT);
2294 usebestq_optn = FALSE;
2295 savequartlh_optn = FALSE;
2296 savequart_optn = FALSE;
2297 readquart_optn = FALSE;
2299 randseed = -1; /* to set random random seed */
2303 /***************************************************************/
2308 fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION);
2310 fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION);
2314 /***************************************************************/
2316 void printusage(char *fname)
2318 fprintf(stderr, "\n\nUsage: %s [-h] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
2326 /***************************************************************/
2329 void printusagehhh(char *fname)
2331 fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
2332 fprintf(stderr, " -h - print usage\n");
2333 fprintf(stderr, " -wqf - write quartet file to Infilename.allquart\n");
2334 fprintf(stderr, " -rqf - read quartet file from Infilename.allquart\n");
2335 fprintf(stderr, " -wqlb - write quart lhs to Infilename.allquartlh (binary)\n");
2336 fprintf(stderr, " -wqla - write quart lhs to Infilename.allquartlh (ASCII)\n");
2337 fprintf(stderr, " -bestq - use best quart, no basian weights\n");
2338 fprintf(stderr, " -randseed<#> - use <#> as random number seed, for debug purposes only\n");
2347 /***************************************************************/
2350 void scancmdline(int *argc, char **argv[])
2352 static short infileset = 0;
2353 static short intreefileset = 0;
2356 int count, dummyint;
2358 for (n = 1; n < *argc; n++) {
2360 printf("argv[%d] = %s\n", n, (*argv)[n]);
2367 count = sscanf((*argv)[n], "-wqlb%n", &dummyint);
2368 if (dummyint == 5) {
2369 savequartlh_optn = TRUE;
2370 saveqlhbin_optn = TRUE;
2375 count = sscanf((*argv)[n], "-wqla%n", &dummyint);
2376 if (dummyint == 5) {
2377 savequartlh_optn = TRUE;
2378 saveqlhbin_optn = FALSE;
2383 count = sscanf((*argv)[n], "-wqf%n", &dummyint);
2384 if (dummyint == 4) {
2385 savequart_optn = TRUE;
2390 count = sscanf((*argv)[n],"-rqf%n", &dummyint);
2391 if (dummyint == 4) {
2392 readquart_optn = TRUE;
2397 count = sscanf((*argv)[n],"-bestq%n", &dummyint);
2398 if (dummyint == 6) {
2399 usebestq_optn = TRUE;
2404 count = sscanf((*argv)[n],"-hhh%n", &dummyint);
2406 printusagehhh((*argv)[0]);
2412 count = sscanf((*argv)[n],"-V%n", &dummyint);
2414 printversion((*argv)[0]);
2419 count = sscanf((*argv)[n],"-version%n", &dummyint);
2421 printversion((*argv)[0]);
2426 count = sscanf((*argv)[n],"--version%n", &dummyint);
2428 printversion((*argv)[0]);
2433 count = sscanf((*argv)[n],"-h%n", &dummyint);
2435 printusage((*argv)[0]);
2439 count = sscanf((*argv)[n],"-randseed%d", &dummyint);
2441 randseed = dummyint;
2446 count = sscanf((*argv)[n],"-h%n", &dummyint);
2447 if ((count == 1) && (dummyint>=2)) printusage((*argv)[0]);
2449 count = sscanf((*argv)[n],"-writequarts%n", &dummyint);
2450 if (count == 1) writequartstofile = 1;;
2452 count = sscanf((*argv)[n],"-ws%d", &dummyint);
2453 if (count == 1) windowsize = dummyint;
2456 if ((*argv)[n][0] != '-') {
2457 if (infileset == 0) {
2458 strcpy(INFILE, (*argv)[n]);
2460 sprintf(OUTFILE ,"%s.%s", INFILE, OUTFILEEXT);
2461 sprintf(TREEFILE ,"%s.%s", INFILE, TREEFILEEXT);
2462 sprintf(DISTANCES ,"%s.%s", INFILE, DISTANCESEXT);
2463 sprintf(TRIANGLE ,"%s.%s", INFILE, TRIANGLEEXT);
2464 sprintf(UNRESOLVED ,"%s.%s", INFILE, UNRESOLVEDEXT);
2465 sprintf(ALLQUART ,"%s.%s", INFILE, ALLQUARTEXT);
2466 sprintf(ALLQUARTLH ,"%s.%s", INFILE, ALLQUARTLHEXT);
2467 sprintf(OUTPTLIST ,"%s.%s", INFILE, OUTPTLISTEXT);
2468 sprintf(OUTPTORDER ,"%s.%s", INFILE, OUTPTORDEREXT);
2469 FPRINTF(STDOUTFILE "Input file: %s\n", INFILE);
2472 if (intreefileset == 0) {
2473 strcpy(INTREE, (*argv)[n]);
2475 sprintf(OUTFILE ,"%s.%s", INTREE, OUTFILEEXT);
2476 sprintf(TREEFILE ,"%s.%s", INTREE, TREEFILEEXT);
2477 sprintf(DISTANCES ,"%s.%s", INTREE, DISTANCESEXT);
2478 FPRINTF(STDOUTFILE "Usertree file: %s\n", INTREE);
2483 if (flagused == FALSE) {
2484 fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]);
2492 /***************************************************************/
2494 void inputandinit(int *argc, char **argv[]) {
2498 /* vectors used in QP and LM analysis */
2499 qweight = new_dvector(3);
2500 sqdiff = new_dvector(3);
2501 qworder = new_ivector(3);
2502 sqorder = new_ivector(3);
2504 /* Initialization and parsing of Commandline */
2506 scancmdline(argc, argv);
2508 /* initialize random numbers generator */
2510 fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed);
2511 randseed = initrandom(randseed);
2513 psteptreelist = NULL;
2518 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n", VERSION);
2520 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n", VERSION, ALPHA);
2525 openfiletoread(&seqfp, INFILE, "sequence data");
2526 getsizesites(seqfp);
2527 FPRINTF(STDOUTFILE "\nInput data set contains %d sequences of length %d\n", Maxspc, Maxseqc);
2530 data_optn = guessdatatype();
2532 /* translate characters into format used by ML engine */
2538 /* estimate base frequencies from data set */
2541 estimatebasefreqs();
2543 /* guess model of substitution */
2546 /* initialize guess variables */
2547 auto_datatype = AUTO_GUESS;
2548 if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS;
2549 else auto_aamodel = AUTO_DEFAULT;
2550 /* save guessed amino acid options */
2551 guessDayhf_optn = Dayhf_optn;
2552 guessJtt_optn = Jtt_optn;
2553 guessmtrev_optn = mtrev_optn;
2554 guesscprev_optn = cprev_optn;
2555 guessblosum62_optn = blosum62_optn;
2556 guessvtmv_optn = vtmv_optn;
2557 guesswag_optn = wag_optn;
2558 guessauto_aamodel = auto_aamodel;
2561 /* check for user specified tree */
2562 if ((utfp = fopen(INTREE, "r")) != NULL) {
2564 puzzlemode = USERTREE;
2566 puzzlemode = QUARTPUZ;
2569 /* reserve memory for cluster LM analysis */
2570 clusterA = new_ivector(Maxspc);
2571 clusterB = new_ivector(Maxspc);
2572 clusterC = new_ivector(Maxspc);
2573 clusterD = new_ivector(Maxspc);
2575 /* set options interactively */
2578 /* open usertree file right after start */
2579 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
2580 openfiletoread(&utfp, INTREE, "user trees");
2583 /* start main timer */
2586 addtimes(OPTIONS, &tarr);
2588 /* symmetrize doublet frequencies if specified */
2594 /* determine how many usertrees */
2595 if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
2599 if ((char) ci == ';') numutrees++;
2600 } while (ci != EOF);
2602 if (numutrees < 1) {
2603 FPRINTF(STDOUTFILE "Unable to proceed (no tree in input tree file)\n\n\n");
2608 /* check fraction of invariable sites */
2609 if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim)
2610 /* fraction of invariable site was specified manually */
2611 if (fracinv > MAXFI)
2614 addtimes(GENERAL, &tarr);
2615 /* estimate parameters */
2616 if (!(typ_optn == TREERECON_OPTN && puzzlemode == USERTREE)) {
2617 /* no tree present */
2618 estimateparametersnotree();
2621 /* use 1st user tree */
2624 estimateparameterstree();
2626 /* don't use first user tree */
2627 estimateparametersnotree();
2630 addtimes(PARAMEST, &tarr);
2632 /* compute expected Ts/Tv ratio */
2633 if (data_optn == NUCLEOTIDE) computeexpectations();
2635 } /* inputandinit */
2639 /***************************************************************/
2641 void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot)
2645 case QUARTPUZ: /* read QP tree */
2646 readusertree(intreefp);
2647 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock)\n");
2650 findbestratecombination();
2652 case USERTREE: /* read user tree */
2653 readusertree(intreefp);
2654 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1);
2658 ulkl[utreenum] = Ctree->lklhd;
2659 allsitelkl(Ctree->condlkl, allsites[utreenum]);
2661 if (utreenum==0) findbestratecombination();
2666 if (compclock) { /* clocklike branch length */
2669 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock)\n");
2673 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1);
2678 /* find best place for root */
2681 if (utreenum==0) locroot = *oldlocroot;
2682 else *oldlocroot = locroot;
2685 locroot = findrootedge();
2688 /* if user-specified edge for root does not exist use displayed outgroup */
2689 if (!checkedge(locroot)) {
2693 /* compute likelihood */
2694 clock_lklhd(locroot);
2696 ulklc[utreenum] = Ctree->lklhdc;
2697 allsitelkl(Ctree->condlkl, allsitesc[utreenum]);
2703 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd);
2705 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc);
2707 /* write ML branch length tree to outree file */
2708 clockmode = 0; /* nonclocklike branch lengths */
2709 fputphylogeny(outtreefp);
2711 /* clocklike branch lengths */
2714 fputrooted(outtreefp, locroot);
2716 } /* evaluatetree */
2718 /***************************************************************/
2721 if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
2723 free(splitpatterns);
2725 free_ivector(consconfid);
2726 free_ivector(conssizes);
2727 free_cmatrix(consbiparts);
2728 free_ulivector(badtaxon);
2730 free_cmatrix(Identif);
2731 free_dvector(Freqtpm);
2732 free_imatrix(Basecomp);
2733 free_ivector(clusterA);
2734 free_ivector(clusterB);
2735 free_ivector(clusterC);
2736 free_ivector(clusterD);
2737 free_dvector(qweight);
2738 free_dvector(sqdiff);
2739 free_ivector(qworder);
2740 free_ivector(sqorder);
2741 freetreelist(&psteptreelist, &psteptreenum, &psteptreesum);
2744 /***************************************************************/
2747 /******************************************************************************/
2749 /******************************************************************************/
2751 int main(int argc, char *argv[])
2753 int i, oldlocroot=0;
2755 /* start main timer */
2756 time(&walltimestart);
2757 cputimestart = clock();
2762 inputandinit(&argc, &argv);
2766 /* write distance matrix */
2767 FPRINTF(STDOUTFILE "Writing pairwise distances to file %s\n", DISTANCES);
2768 openfiletowrite(&dfp, DISTANCES, "pairwise distances");
2774 free_cmatrix(Seqchar);
2775 free_cmatrix(seqchars);
2780 /* write CPU/Wallclock times and parallel statistics */
2781 time(&walltimestop);
2782 cputimestop = clock();
2783 addtimes(OVERALL, &tarr);
2785 fullcpu = tarr.fullcpu;
2786 fulltime = tarr.fulltime;
2800 /* printbestratecombination(stderr); */
2803 FPRINTF(STDOUTFILE "\nAll results written to disk:\n");
2804 /*FPRINTF(STDOUTFILE " Puzzle report file: %s\n", OUTFILE);*/
2805 FPRINTF(STDOUTFILE " Likelihood distances: %s\n", DISTANCES);
2807 if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST)
2808 FPRINTF(STDOUTFILE " Phylip tree file: %s\n", TREEFILE);
2809 if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) {
2810 if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER))
2811 FPRINTF(STDOUTFILE " Unique puzzling step trees: %s\n", OUTPTORDER);
2812 if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER))
2813 FPRINTF(STDOUTFILE " Puzzling step tree list: %s\n", OUTPTLIST);
2815 if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ)
2816 FPRINTF(STDOUTFILE " Unresolved quartets: %s\n", UNRESOLVED);
2817 if (typ_optn == LIKMAPING_OPTN)
2818 FPRINTF(STDOUTFILE " Likelihood mapping diagram: %s\n", TRIANGLE);
2819 FPRINTF(STDOUTFILE "\n");
2821 /* runtime message */
2823 "The computation took %.0f seconds (= %.1f minutes = %.1f hours)\n",
2824 difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60.,
2825 difftime(Stoptime, Starttime)/3600.);
2827 " including input %.0f seconds (= %.1f minutes = %.1f hours)\n",
2828 fulltime, fulltime/60., fulltime/3600.);
2840 /* compare function for uli - sort largest numbers first */
2841 int ulicmp(const void *ap, const void *bp)
2848 if (a > b) return -1;
2849 else if (a < b) return 1;
2853 /* compare function for int - sort smallest numbers first */
2854 int intcmp(const void *ap, const void *bp)
2861 if (a < b) return -1;
2862 else if (a > b) return 1;