initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_mod / src / puzzle1.c
1 /*
2  * puzzle1.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 /* Modified by Christian Zmasek to:
17    - name and pairwise dist. output as one line per seq.
18    - removed some unnecessary -- for my puposes -- output.
19    
20
21    !WARNING: Use ONLY together with FORESTER/RIO!
22    !For all other puposes download the excellent original!
23    
24    last modification: 05/19/01
25
26
27
28    void putdistance(FILE *fp):
29
30    removed: "if ((j + 1) % 7 == 0 && j+1 != Maxspc)
31              fprintf(fp, "\n          ");"
32
33
34
35
36    int main(int argc, char *argv[]):
37
38    removed:
39    "FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE);
40     openfiletowrite(&ofp, OUTFILE, "general output");
41     writeoutputfile(ofp,WRITEPARAMS);
42     fclose(ofp);"
43
44    "openfiletoappend(&ofp, OUTFILE, "general output");
45     writeoutputfile(ofp,WRITEREST);"
46
47    "openfiletoappend(&ofp, OUTFILE, "general output");
48     writeoutputfile(ofp,WRITEREST);"
49
50    "openfiletoappend(&ofp, OUTFILE, "general output");
51     writeoutputfile(ofp,WRITEREST);"
52
53    "timestamp(ofp);
54     closefile(ofp);"
55
56
57 */
58
59
60
61 #define EXTERN
62
63 #include "puzzle.h"
64 #include "gamma.h"
65
66 void num2quart(uli qnum, int *a, int *b, int *c, int *d)
67 {
68         double temp;
69         uli aa, bb, cc, dd;
70         uli lowval=0, highval=0;
71
72         aa=0; bb=1; cc=2; dd=3;
73
74         temp = (double)(24 * qnum);
75         temp = sqrt(temp);
76         temp = sqrt(temp);
77         /* temp = pow(temp, (double)(1/4)); */
78         dd = (uli) floor(temp) + 1;
79         if (dd < 3) dd = 3;
80         lowval =  (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
81         highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
82         if (lowval >= qnum)
83             while ((lowval > qnum)) {
84                 dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
85             }
86         else {
87             while (highval <= qnum) {
88                 dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
89             }
90             lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
91         }
92         qnum -= lowval;
93         if (qnum > 0) {
94             temp = (double)(6 * qnum);
95             temp = pow(temp, (double)(1/3));
96             cc = (uli) floor(temp);
97             if (cc < 2) cc= 2;
98             lowval =  (uli) cc*(cc-1)*(cc-2)/6;
99             highval = (uli) (cc+1)*cc*(cc-1)/6;
100             if (lowval >= qnum)
101                 while ((lowval > qnum)) {
102                    cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6;
103                 }
104             else {
105                 while (highval <= qnum) {
106                    cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6;
107                 }
108                 lowval = (uli) cc*(cc-1)*(cc-2)/6;
109             }
110             qnum -= lowval;
111             if (qnum > 0) {
112                 temp = (double)(2 * qnum);
113                 temp = sqrt(temp);
114                 bb = (uli) floor(temp);
115                 if (bb < 1) bb= 1;
116                 lowval =  (uli) bb*(bb-1)/2;
117                 highval = (uli) (bb+1)*bb/2;
118                 if (lowval >= qnum)
119                     while ((lowval > qnum)) {
120                         bb -= 1; lowval = (uli) bb*(bb-1)/2;
121                     }
122                 else {
123                     while (highval <= qnum) {
124                        bb += 1; highval = (uli) (bb+1)*bb/2;
125                     }
126                     lowval = (uli) bb*(bb-1)/2;
127                 }
128                 qnum -= lowval;
129                 if (qnum > 0) {
130                    aa = (uli) qnum;
131                    if (aa < 0) aa= 0;
132                 }
133             }
134         }
135         *d = (int)dd;
136         *c = (int)cc;
137         *b = (int)bb;
138         *a = (int)aa;
139 }  /* num2quart */
140
141 /******************/
142
143 uli numquarts(int maxspc)
144 {
145    uli tmp;
146    int a, b, c, d;
147
148    if (maxspc < 4)
149      return (uli)0;
150    else {
151       maxspc--;
152       a = maxspc-3;
153       b = maxspc-2;
154       c = maxspc-1;
155       d = maxspc;
156
157       tmp = (uli) 1 + a +
158             (uli) b * (b-1) / 2 +
159             (uli) c * (c-1) * (c-2) / 6 +
160             (uli) d * (d-1) * (d-2) * (d-3) / 24;
161       return (tmp);
162    }
163 }  /* numquarts */
164
165 /******************/
166
167 uli quart2num (int a, int b, int c, int d)
168 {
169       uli tmp;
170       if ((a>b) || (b>c) || (c>d)) {
171          fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c,
172 d);
173          exit (1);
174       }
175       tmp = (uli) a +
176             (uli) b * (b-1) / 2 +
177             (uli) c * (c-1) * (c-2) / 6 +
178             (uli) d * (d-1) * (d-2) * (d-3) / 24;
179       return (tmp);
180 }  /* quart2num */
181
182 /******************/
183
184
185
186 /*  flag=0      old allquart binary  */
187 /*  flag=1      allquart binary  */
188 /*  flag=2      allquart ACSII   */
189 /*  flag=3      quartlh  binary  */
190 /*  flag=4      quartlh  ASCII   */
191
192 void writetpqfheader(int            nspec,
193                      FILE          *ofp,
194                      int            flag)
195 { int            currspec;
196
197   if (flag == 0) {
198      unsigned long  nquart;
199      unsigned long  blocklen;
200
201      nquart = numquarts(nspec);
202      /* compute number of bytes */
203      if (nquart % 2 == 0) { /* even number */
204         blocklen = (nquart)/2;
205      } else { /* odd number */
206         blocklen = (nquart + 1)/2;
207      }
208      /* FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename); */
209      fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION);
210      fprintf(ofp, "species: %d\n",       nspec);
211      fprintf(ofp, "quartets: %lu\n",     nquart);
212      fprintf(ofp, "bytes: %lu\n\n",      blocklen);
213
214
215      /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */
216   }
217
218   if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
219   if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
220   if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
221   if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
222
223   for (currspec=0; currspec<nspec; currspec++) {
224      fputid(ofp, currspec);
225      fprintf(ofp, "\n");
226   } /* for each species */
227   fprintf(ofp, "\n");
228
229 } /* writetpqfheader */
230
231
232
233 void writeallquarts(int            nspec,
234                     char          *filename,
235                     unsigned char *quartetinfo)
236 { unsigned long  nquart;
237   unsigned long  blocklen;
238   FILE          *ofp;
239
240   nquart = numquarts(nspec);
241
242   /* compute number of bytes */
243   if (nquart % 2 == 0) { /* even number */
244      blocklen = (nquart)/2;
245   } else { /* odd number */
246      blocklen = (nquart + 1)/2;
247   }
248
249   FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename);
250
251   openfiletowrite(&ofp, filename, "all quartets");
252
253   writetpqfheader(nspec, ofp, 0);
254
255   fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp);
256   fclose(ofp);
257
258 } /* writeallquart */
259
260
261
262 void readallquarts(int            nspec,
263                    char          *filename,
264                    unsigned char *quartetinfo)
265 { unsigned long  nquart;
266   unsigned long  blocklen;
267   int            currspec;
268   unsigned long  dummynquart;
269   unsigned long  dummyblocklen;
270   int            dummynspec;
271   char           dummyversion[128];
272   char           dummyname[128];
273   FILE          *ifp;
274
275   nquart = numquarts(nspec);
276
277   /* compute number of bytes */
278   if (nquart % 2 == 0) { /* even number */
279      blocklen = (nquart)/2;
280   } else { /* odd number */
281      blocklen = (nquart + 1)/2;
282   }
283
284 /*  &(quartetinfo[0] */
285
286   openfiletoread(&ifp, filename, "all quartets");
287
288   fscanf(ifp, "TREE-PUZZLE\n");
289   fscanf(ifp, "%s\n\n", dummyversion);
290   
291   fscanf(ifp, "species: %d\n",   &dummynspec);
292   fscanf(ifp, "quartets: %lu\n", &dummynquart);
293   fscanf(ifp, "bytes: %lu\n\n",  &dummyblocklen);
294
295   if ((nspec != dummynspec) ||
296       (nquart != dummynquart) ||
297       (blocklen != dummyblocklen)) {
298      FPRINTF(STDOUTFILE "ERROR: Parameters in quartet file %s do not match!!!\n",
299                      filename);
300 #    if PARALLEL
301          PP_SendDone();
302          MPI_Finalize();
303 #    endif /* PARALLEL */
304      exit(1);
305   }
306
307   FPRINTF(STDOUTFILE "Reading quartet file: %s\n", filename);
308   FPRINTF(STDOUTFILE "   generated by: TREE-PUZZLE %s\n", dummyversion);
309   FPRINTF(STDOUTFILE "   current:      TREE-PUZZLE %s\n", VERSION);
310
311   FPRINTF(STDOUTFILE "   %d species, %lu quartets, %lu bytes\n", 
312                    dummynspec, dummynquart, dummyblocklen);
313
314   for (currspec=0; currspec<nspec; currspec++) {
315
316      fscanf(ifp, "%s\n", dummyname);
317      FPRINTF(STDOUTFILE "   %3d. %s (", currspec+1, dummyname);
318      fputid(STDOUT, currspec);
319      FPRINTF(STDOUTFILE ")\n");
320
321   } /* for each species */
322   FPRINTF(STDOUTFILE "\n");
323
324   fread(&(quartetinfo[0]), sizeof(char), blocklen, ifp);
325   fclose(ifp);
326
327 } /* writeallquart */
328
329
330
331
332 /******************************************************************************/
333 /* options - file I/O - output                                                */
334 /******************************************************************************/
335
336 /* compute TN parameters according to F84 Ts/Tv ratio */
337 void makeF84model()
338 {
339         double rho, piA, piC, piG, piT, piR, piY, ts, yr;
340         
341         piA = Freqtpm[0];
342         piC = Freqtpm[1];
343         piG = Freqtpm[2];
344         piT = Freqtpm[3];
345         piR = piA + piG;
346         piY = piC + piT;
347         if (piC*piT*piR + piA*piG*piY == 0.0) {         
348                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
349                 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
350                 tstvf84 = 0.0;
351                 return;
352         }
353         rho = (piR*piY*(piR*piY*tstvf84 - (piA*piG + piC*piT)))/
354                 (piC*piT*piR + piA*piG*piY);
355         
356         if(piR == 0.0 || piY == 0.0 || (piR + rho) == 0.0) {
357                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
358                 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
359                 tstvf84 = 0.0;
360                 return;
361         }
362         ts = 0.5 + 0.25*rho*(1.0/piR + 1.0/piY);
363         yr = (piY + rho)/piY * piR/(piR + rho);
364         if (ts < MINTS || ts > MAXTS) {
365                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
366                 FPRINTF(STDOUTFILE "(bad Ts/Tv parameter)\n");
367                 tstvf84 = 0.0;
368                 return;
369         }
370         if (yr < MINYR || yr > MAXYR) {
371                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
372                 FPRINTF(STDOUTFILE "(bad Y/R transition parameter)\n");
373                 tstvf84 = 0.0;
374                 return;
375         }
376         TSparam = ts;
377         YRparam = yr;
378         optim_optn = FALSE;
379 }
380
381 /* compute number of quartets used in LM analysis */
382 void compnumqts()
383 {
384         if (lmqts == 0) {
385                 if (numclust == 4)
386                         Numquartets = (uli) clustA*clustB*clustC*clustD;
387                 if (numclust == 3)
388                         Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2;
389                 if (numclust == 2)
390                         Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2;
391                 if (numclust == 1)      
392                         Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24;
393         } else {
394                 Numquartets = lmqts;
395         }
396 }
397
398 /* set options interactively */
399 void setoptions()
400 {       
401         int i, valid;
402         double sumfreq;
403         char ch;
404
405         /* defaults */
406         rhetmode = UNIFORMRATE;          /* assume rate homogeneity               */
407         numcats = 1;
408         Geta = 0.05;
409         grate_optim = FALSE;
410         fracinv = 0.0;
411         fracinv_optim = FALSE;
412
413         compclock = FALSE;           /* compute clocklike branch lengths      */
414         locroot = -1;                /* search for optimal place of root      */ 
415         qcalg_optn = FALSE;          /* don't use sampling of quartets        */
416         approxp_optn = TRUE;         /* approximate parameter estimates       */
417         listqptrees = PSTOUT_NONE;   /* list puzzling step trees              */
418         
419         /* approximate QP quartets? */
420         if (Maxspc <= 6) approxqp = FALSE;
421         else approxqp = TRUE;
422         
423         codon_optn = 0;        /* use all positions in a codon          */
424         
425         /* number of puzzling steps */
426         if (Maxspc <= 25) Numtrial = 1000;
427         else if (Maxspc <= 50) Numtrial = 10000;
428         else if (Maxspc <= 75) Numtrial = 25000;
429         else Numtrial = 50000;
430             
431         utree_optn = TRUE;     /* use first user tree for estimation    */
432         outgroup = 0;          /* use first taxon as outgroup           */
433         sym_optn = FALSE;      /* symmetrize doublet frequencies        */
434         tstvf84 = 0.0;         /* disable F84 model                     */
435         show_optn = FALSE;     /* show unresolved quartets              */
436         typ_optn = TREERECON_OPTN;          /* tree reconstruction                   */
437         numclust = 1;          /* one clusters in LM analysis           */
438         lmqts = 0;             /* all quartets in LM analysis           */
439         compnumqts();
440         if (Numquartets > 10000) {
441                 lmqts = 10000;     /* 10000 quartets in LM analysis         */
442                 compnumqts();
443         }
444         
445         do {
446                 FPRINTF(STDOUTFILE "\n\n\nGENERAL OPTIONS\n");
447                 FPRINTF(STDOUTFILE " b                     Type of analysis?  ");
448                 if (typ_optn == TREERECON_OPTN) FPRINTF(STDOUTFILE "Tree reconstruction\n");
449                 if (typ_optn == LIKMAPING_OPTN) FPRINTF(STDOUTFILE "Likelihood mapping\n");
450                 if (typ_optn == TREERECON_OPTN) {
451                         FPRINTF(STDOUTFILE " k                Tree search procedure?  ");
452                         if (puzzlemode == QUARTPUZ) FPRINTF(STDOUTFILE "Quartet puzzling\n");
453                         if (puzzlemode == USERTREE) FPRINTF(STDOUTFILE "User defined trees\n");
454                         if (puzzlemode == PAIRDIST) FPRINTF(STDOUTFILE "Pairwise distances only (no tree)\n");
455                         if (puzzlemode == QUARTPUZ) {
456                                 FPRINTF(STDOUTFILE " v       Approximate quartet likelihood?  %s\n",
457                                         (approxqp ? "Yes" : "No"));
458                                 FPRINTF(STDOUTFILE " u             List unresolved quartets?  %s\n",
459                                         (show_optn ? "Yes" : "No"));
460                                 FPRINTF(STDOUTFILE " n             Number of puzzling steps?  %lu\n",
461                                                 Numtrial);
462                                 FPRINTF(STDOUTFILE " j             List puzzling step trees?  ");
463                                 switch (listqptrees) {
464                                         case PSTOUT_NONE:      FPRINTF(STDOUTFILE "No\n"); break;
465                                         case PSTOUT_ORDER:     FPRINTF(STDOUTFILE "Unique topologies\n"); break;
466                                         case PSTOUT_LISTORDER: FPRINTF(STDOUTFILE "Unique topologies & Chronological list\n"); break;
467                                         case PSTOUT_LIST:      FPRINTF(STDOUTFILE "Chronological list only\n"); break;
468                                 }
469
470                                 FPRINTF(STDOUTFILE " o                  Display as outgroup?  ");
471                                 fputid(STDOUT, outgroup);
472                                 FPRINTF(STDOUTFILE "\n");
473                         }
474                         if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
475                                 FPRINTF(STDOUTFILE " z     Compute clocklike branch lengths?  ");
476                                 if (compclock) FPRINTF(STDOUTFILE "Yes\n");
477                                 else FPRINTF(STDOUTFILE "No\n");
478                         }
479                         if (compclock)
480                                 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
481                                         FPRINTF(STDOUTFILE " l                     Location of root?  ");
482                                         if (locroot < 0) FPRINTF(STDOUTFILE "Best place (automatic search)\n");
483                                         else if (locroot < Maxspc) {
484                                                 FPRINTF(STDOUTFILE "Branch %d (", locroot + 1);
485                                                 fputid(STDOUT, locroot);
486                                                 FPRINTF(STDOUTFILE ")\n");
487                                         } else FPRINTF(STDOUTFILE "Branch %d (internal branch)\n", locroot + 1);
488                                 }
489                 }
490                 if (typ_optn == LIKMAPING_OPTN) {
491                           FPRINTF(STDOUTFILE " g          Group sequences in clusters?  ");
492                           if (numclust == 1) FPRINTF(STDOUTFILE "No\n");
493                           else FPRINTF(STDOUTFILE "Yes (%d clusters as specified)\n", numclust);
494                           FPRINTF(STDOUTFILE " n                   Number of quartets?  ");
495                           if (lmqts == 0) FPRINTF(STDOUTFILE "%lu (all possible)\n", Numquartets);
496                           else FPRINTF(STDOUTFILE "%lu (random choice)\n", lmqts);
497                 }
498                 FPRINTF(STDOUTFILE " e                  Parameter estimates?  ");
499                 if (approxp_optn) FPRINTF(STDOUTFILE "Approximate (faster)\n");
500                 else FPRINTF(STDOUTFILE "Exact (slow)\n");
501                 if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) {  
502                         FPRINTF(STDOUTFILE " x            Parameter estimation uses?  ");
503                         if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
504                         else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
505                         
506                 } else {
507                         FPRINTF(STDOUTFILE " x            Parameter estimation uses?  ");
508                         if (utree_optn)
509                                 FPRINTF(STDOUTFILE "1st input tree\n");
510                         else if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
511                         else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
512                 }
513                 FPRINTF(STDOUTFILE "SUBSTITUTION PROCESS\n");
514                 FPRINTF(STDOUTFILE " d          Type of sequence input data?  ");
515                 if (auto_datatype == AUTO_GUESS) FPRINTF(STDOUTFILE "Auto: ");
516                 if (data_optn == NUCLEOTIDE) FPRINTF(STDOUTFILE "Nucleotides\n");
517                 if (data_optn == AMINOACID) FPRINTF(STDOUTFILE "Amino acids\n");
518                 if (data_optn == BINARY) FPRINTF(STDOUTFILE "Binary states\n");
519                 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0  && !SH_optn) {
520                         FPRINTF(STDOUTFILE " h             Codon positions selected?  ");
521                         if (codon_optn == 0) FPRINTF(STDOUTFILE "Use all positions\n");
522                         if (codon_optn == 1) FPRINTF(STDOUTFILE "Use only 1st positions\n");
523                         if (codon_optn == 2) FPRINTF(STDOUTFILE "Use only 2nd positions\n");
524                         if (codon_optn == 3) FPRINTF(STDOUTFILE "Use only 3rd positions\n");
525                         if (codon_optn == 4) FPRINTF(STDOUTFILE "Use 1st and 2nd positions\n");
526                 }
527                 FPRINTF(STDOUTFILE " m                Model of substitution?  ");
528                 if (data_optn == NUCLEOTIDE) { /* nucleotides */
529                         if (nuc_optn) {
530                                 if(HKY_optn)
531                                         FPRINTF(STDOUTFILE "HKY (Hasegawa et al. 1985)\n");     
532                                 else {
533                                         FPRINTF(STDOUTFILE "TN (Tamura-Nei 1993)\n");
534                                         FPRINTF(STDOUTFILE " p      Constrain TN model to F84 model?  ");
535                                         if (tstvf84 == 0.0)
536                                                 FPRINTF(STDOUTFILE "No\n");
537                                         else FPRINTF(STDOUTFILE "Yes (Ts/Tv ratio = %.2f)\n", tstvf84);
538                                 }
539                                 FPRINTF(STDOUTFILE " t    Transition/transversion parameter?  ");
540                                 if (optim_optn)
541                                         FPRINTF(STDOUTFILE "Estimate from data set\n");
542                                 else
543                                         FPRINTF(STDOUTFILE "%.2f\n", TSparam);  
544                                 if (TN_optn) {
545                                         FPRINTF(STDOUTFILE " r             Y/R transition parameter?  ");
546                                         if (optim_optn)
547                                                 FPRINTF(STDOUTFILE "Estimate from data set\n");
548                                         else
549                                                 FPRINTF(STDOUTFILE "%.2f\n", YRparam);          
550                                 }                       
551                         }
552                         if (SH_optn) {
553                                         FPRINTF(STDOUTFILE "SH (Schoeniger-von Haeseler 1994)\n");
554                                         FPRINTF(STDOUTFILE " t    Transition/transversion parameter?  ");
555                                         if (optim_optn)
556                                                 FPRINTF(STDOUTFILE "Estimate from data set\n");
557                                         else
558                                                 FPRINTF(STDOUTFILE "%.2f\n", TSparam);
559                         }       
560                 }
561                 if (data_optn == NUCLEOTIDE && SH_optn) {
562                         FPRINTF(STDOUTFILE " h                  Doublets defined by?  ");
563                         if (SHcodon)
564                                 FPRINTF(STDOUTFILE "1st and 2nd codon positions\n");
565                         else
566                                 FPRINTF(STDOUTFILE "1st+2nd, 3rd+4th, etc. site\n");
567                 }
568                 if (data_optn == AMINOACID) { /* amino acids */
569                         switch (auto_aamodel) {
570                                 case AUTO_GUESS:
571                                         FPRINTF(STDOUTFILE "Auto: ");
572                                         break;
573                                 case AUTO_DEFAULT:
574                                         FPRINTF(STDOUTFILE "Def.: ");
575                                         break;
576                         }
577                         if (Dayhf_optn) FPRINTF(STDOUTFILE "Dayhoff (Dayhoff et al. 1978)\n");  
578                         if (Jtt_optn) FPRINTF(STDOUTFILE "JTT (Jones et al. 1992)\n");
579                         if (mtrev_optn) FPRINTF(STDOUTFILE "mtREV24 (Adachi-Hasegawa 1996)\n");
580                         if (cprev_optn) FPRINTF(STDOUTFILE "cpREV45 (Adachi et al. 2000)\n");
581                         if (blosum62_optn) FPRINTF(STDOUTFILE "BLOSUM62 (Henikoff-Henikoff 92)\n");
582                         if (vtmv_optn) FPRINTF(STDOUTFILE "VT (Mueller-Vingron 2000)\n");
583                         if (wag_optn) FPRINTF(STDOUTFILE "WAG (Whelan-Goldman 2000)\n");
584                 }
585                 if (data_optn == BINARY) { /* binary states */
586                         FPRINTF(STDOUTFILE "Two-state model (Felsenstein 1981)\n");
587                 }
588                 if (data_optn == AMINOACID)
589                         FPRINTF(STDOUTFILE " f               Amino acid frequencies?  ");
590                 else if (data_optn == NUCLEOTIDE && SH_optn)
591                         FPRINTF(STDOUTFILE " f                  Doublet frequencies?  ");
592                 else if (data_optn == NUCLEOTIDE && nuc_optn)
593                         FPRINTF(STDOUTFILE " f               Nucleotide frequencies?  ");
594                 else if (data_optn == BINARY)
595                         FPRINTF(STDOUTFILE " f             Binary state frequencies?  ");
596                 FPRINTF(STDOUTFILE "%s\n", (Frequ_optn ? "Estimate from data set" :
597                         "Use specified values"));
598                 if (data_optn == NUCLEOTIDE && SH_optn)
599                         FPRINTF(STDOUTFILE " s       Symmetrize doublet frequencies?  %s\n",
600                                 (sym_optn ? "Yes" : "No"));
601                                 
602                 FPRINTF(STDOUTFILE "RATE HETEROGENEITY\n");
603                 FPRINTF(STDOUTFILE " w          Model of rate heterogeneity?  ");
604                 if (rhetmode == UNIFORMRATE) FPRINTF(STDOUTFILE "Uniform rate\n");
605                 if (rhetmode == GAMMARATE  ) FPRINTF(STDOUTFILE "Gamma distributed rates\n");
606                 if (rhetmode == TWORATE    ) FPRINTF(STDOUTFILE "Two rates (1 invariable + 1 variable)\n");
607                 if (rhetmode == MIXEDRATE  ) FPRINTF(STDOUTFILE "Mixed (1 invariable + %d Gamma rates)\n", numcats);
608                 
609                 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
610                         FPRINTF(STDOUTFILE " i         Fraction of invariable sites?  ");
611                         if (fracinv_optim) FPRINTF(STDOUTFILE "Estimate from data set");
612                         else FPRINTF(STDOUTFILE "%.2f", fracinv);
613                         if (fracinv == 0.0 && !fracinv_optim) FPRINTF(STDOUTFILE " (all sites variable)");
614                         FPRINTF(STDOUTFILE "\n");
615                 }
616                 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
617                         FPRINTF(STDOUTFILE " a   Gamma distribution parameter alpha?  ");
618                         if (grate_optim)
619                                 FPRINTF(STDOUTFILE "Estimate from data set\n");
620                         else if (Geta > 0.5)
621                                 FPRINTF(STDOUTFILE "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta);
622                         else FPRINTF(STDOUTFILE "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta);
623                         FPRINTF(STDOUTFILE " c      Number of Gamma rate categories?  %d\n", numcats);
624                 }
625         
626                 FPRINTF(STDOUTFILE "\nQuit [q], confirm [y], or change [menu] settings: ");
627                 
628                 /* read one char */
629                 ch = getchar();
630                 if (ch != '\n') {
631                         do ;
632                         while (getchar() != '\n');
633                 }
634                 ch = (char) tolower((int) ch);
635                 
636                 /* letters in use: a b c d e f g h i j k l m n o p q r s t u v w y x z */
637                 /* letters not in use:  */
638
639                 switch (ch) {
640
641                         case '\n':      break;
642                         
643                         case 'z':       if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) {
644                                                         compclock = compclock + 1;
645                                                         if (compclock == 2) compclock = 0;
646                                                 } else {
647                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
648                                                 }
649                                                 break;
650
651                         case 'l':       if (compclock && typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE)) {
652                                                         FPRINTF(STDOUTFILE "\n\n\nEnter an invalid branch number to search ");
653                                                         FPRINTF(STDOUTFILE "for the best location!\n");
654                                                         FPRINTF(STDOUTFILE "\nPlace root at branch (1-%d): ",
655                                                                 2*Maxspc-3);
656                                                         scanf("%d", &locroot);
657                                                         do ;
658                                                         while (getchar() != '\n');
659                                                         if (locroot < 1 || locroot > 2*Maxspc-3) locroot = 0;
660                                                         locroot = locroot - 1; 
661                                                 } else {
662                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
663                                                 }
664                                                 break;
665                         
666                         case 'e':       if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && fracinv_optim) {
667                                                         FPRINTF(STDOUTFILE "\n\n\nInvariable sites estimation needs to be exact!\n");
668                                                 } else {
669                                                         approxp_optn = approxp_optn + 1;
670                                                         if (approxp_optn == 2) approxp_optn = 0;                                                
671                                                 }
672                                                 break;
673                                                 
674                         case 'w':       rhetmode = rhetmode + 1;
675                                                 if (rhetmode == 4) rhetmode = UNIFORMRATE;
676                                                 if (rhetmode == UNIFORMRATE) { /* uniform rate */
677                                                                 numcats = 1;
678                                                                 Geta = 0.05;
679                                                                 grate_optim = FALSE;
680                                                                 fracinv = 0.0;
681                                                                 fracinv_optim = FALSE;
682                                                 }
683                                                 if (rhetmode == GAMMARATE  ) { /* Gamma distributed rates */
684                                                                 numcats = 8;
685                                                                 Geta = 0.05;
686                                                                 grate_optim = TRUE;
687                                                                 fracinv = 0.0;
688                                                                 fracinv_optim = FALSE;
689                                                 }
690                                                 if (rhetmode == TWORATE    ) { /* two rates (1 invariable + 1 variable) */
691                                                                 approxp_optn = FALSE;
692                                                                 numcats = 1;
693                                                                 Geta = 0.05;
694                                                                 grate_optim = FALSE;
695                                                                 fracinv = 0.0;
696                                                                 fracinv_optim = TRUE;
697                                                 }
698                                                 if (rhetmode == MIXEDRATE  ) { /* mixed (1 invariable + Gamma rates) */
699                                                                 approxp_optn = FALSE;
700                                                                 numcats = 8;
701                                                                 Geta = 0.05;
702                                                                 grate_optim = TRUE;
703                                                                 fracinv = 0.0;
704                                                                 fracinv_optim = TRUE;
705                                                 }
706                                                 break;
707
708                         case 'i':       if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
709                                                         FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for ");
710                                                         FPRINTF(STDOUTFILE "estimation from data set!\n");
711                                                         FPRINTF(STDOUTFILE "\nFraction of invariable sites among all sites (%.2f-%.2f): ",
712                                                                 MINFI, MAXFI);
713                                                         scanf("%lf", &fracinv);
714                                                         do ;
715                                                         while (getchar() != '\n');
716                                                         if (fracinv < MINFI || fracinv > MAXFI) {
717                                                                         fracinv_optim = TRUE;
718                                                                         fracinv = 0.0;
719                                                         } else {
720                                                                 fracinv_optim = FALSE;
721                                                         }
722                                                 } else {
723                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
724                                                 }
725                                                 break;
726
727                         case 'a':       if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
728                                                         FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for estimation from data set!\n");
729                                                         FPRINTF(STDOUTFILE "\nGamma distribution parameter alpha (%.2f-%.2f): ",
730                                                                 (1.0-MAXGE)/MAXGE, (1.0-MINGE)/MINGE);
731                                                         scanf("%lf", &Geta);
732                                                         do ;
733                                                         while (getchar() != '\n');
734                                                         if (Geta < (1.0-MAXGE)/MAXGE || Geta > (1.0-MINGE)/MINGE) {
735                                                                 grate_optim = TRUE;
736                                                                 Geta = 0.05;
737                                                         } else {
738                                                                 grate_optim = FALSE;
739                                                                 Geta = 1.0/(1.0 + Geta);
740                                                         }
741                                                 } else 
742                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
743                                                 break;
744                                                 
745                         case 'c':       if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
746                                                         FPRINTF(STDOUTFILE "\n\n\nNumber of Gamma rate categories (%d-%d): ",
747                                                                 MINCAT, MAXCAT);
748                                                         scanf("%d", &numcats);
749                                                         do ;
750                                                         while (getchar() != '\n');
751                                                         if (numcats < MINCAT || numcats > MAXCAT) {
752                                                                 FPRINTF(STDOUTFILE "\n\n\nThis number of categories is not available!\n");
753                                                                 numcats = 4;
754                                                         }
755                                                 } else {
756                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
757                                                 }       
758                                                 break;
759                         
760                         case 'h':       if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0  && !SH_optn) {
761                                                         codon_optn = codon_optn + 1;
762                                                         if (codon_optn == 5) codon_optn = 0;
763                                                         translatedataset();
764                                                         /* reestimate nucleotide frequencies only 
765                                                            if user did not specify other values */
766                                                         if (Frequ_optn) estimatebasefreqs();
767
768                                                 } else if (data_optn == NUCLEOTIDE && SH_optn) { 
769                                                         if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0) {
770                                                                 SHcodon = TRUE;
771                                                                 FPRINTF(STDOUTFILE "\n\n\nThis is the only possible option for the data set!\n");
772                                                         }
773                                                         if (Maxseqc % 3 != 0 && Maxseqc % 2 == 0) {
774                                                                 SHcodon = FALSE;
775                                                                 FPRINTF(STDOUTFILE "\n\n\nThis is the only possible option for the data set!\n");
776                                                         }
777                                                         if (Maxseqc % 2 == 0 && Maxseqc % 3 == 0) {
778                                                                 if (SHcodon)
779                                                                         SHcodon = FALSE;
780                                                                 else
781                                                                         SHcodon = TRUE; 
782                                                                 translatedataset();     
783                                                                 /* reestimate nucleotide frequencies only 
784                                                                    if user did not specify other values */
785                                                                 if (Frequ_optn) estimatebasefreqs();
786                                                         }
787                                                 } else {
788                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
789                                                 }
790                                                 break;
791
792                         case 'x':       if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
793                                                         if (utree_optn) {
794                                                                 utree_optn = FALSE;
795                                                                 qcalg_optn = FALSE;
796                                                         } else {
797                                                                 qcalg_optn = qcalg_optn + 1;
798                                                                 if (qcalg_optn == 2) {
799                                                                         qcalg_optn = 0;
800                                                                         utree_optn = TRUE;
801                                                                 }
802                                                         }
803                                                 } else {
804                                                         qcalg_optn = qcalg_optn + 1;
805                                                         if (qcalg_optn == 2) qcalg_optn = 0;
806                                                 }
807                                                 break;
808                                                 
809                         case 'k':       if (typ_optn == TREERECON_OPTN) {
810                                                         puzzlemode = (puzzlemode + 1) % 3;
811                                                         /* puzzlemode = puzzlemode + 1;
812                                                            if (puzzlemode == 3) puzzlemode = 0;
813                                                         xxx */
814                                                 } else {
815                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
816                                                 }
817                                                 break;
818
819                         case 'b':       typ_optn = (typ_optn + 1) % 2;
820                                                 /* typ_optn = typ_optn + 1;
821                                                    if (typ_optn == 2) typ_optn = TREERECON_OPTN;
822                                                 xxx */
823                                                 break;
824
825                         case 'g':       if (typ_optn == LIKMAPING_OPTN) {
826                                                         clustA = clustB = clustC = clustD = 0;
827                                                         if (numclust != 1) {
828                                                                 numclust = 1;
829                                                         } else {
830                                                                 FPRINTF(STDOUTFILE "\n\n\nNumber of clusters (2-4): ");
831                                                                 scanf("%d", &numclust);
832                                                                 do ;
833                                                                 while (getchar() != '\n');
834                                                                 if (numclust < 2 || numclust > 4) {
835                                                                         numclust = 1;
836                                                                         FPRINTF(STDOUTFILE "\n\n\nOnly 2, 3, or 4 ");
837                                                                         FPRINTF(STDOUTFILE "clusters possible\n");
838                                                                 } else {
839                                                                         FPRINTF(STDOUTFILE "\nDistribute all sequences over the ");
840                                                                         if (numclust == 2) {
841                                                                                 FPRINTF(STDOUTFILE "two clusters a and b (At least two\n");
842                                                                                 FPRINTF(STDOUTFILE "sequences per cluster are necessary), ");
843                                                                         }
844                                                                         if (numclust == 3) {
845                                                                                 FPRINTF(STDOUTFILE "three clusters a, b, and c\n");
846                                                                                 FPRINTF(STDOUTFILE "(At least one sequence in cluster a and b, and at least two\n");
847                                                                                 FPRINTF(STDOUTFILE "sequences in c are necessary), ");
848                                                                         }
849                                                                         if (numclust == 4) {
850                                                                                 FPRINTF(STDOUTFILE "four clusters a, b, c, and d\n");
851                                                                                 FPRINTF(STDOUTFILE "(At least one sequence per cluster is necessary),\n");
852                                                                         }
853                                                                         FPRINTF(STDOUTFILE "type x to exclude a sequence:\n\n");
854                                                                                                                                 
855                                                                         for (i = 0; i < Maxspc; i++) {
856                                                                                 valid = FALSE;
857                                                                                 do {
858                                                                                         fputid10(STDOUT, i);
859                                                                                         FPRINTF(STDOUTFILE ": ");
860                                                                                         /* read one char */
861                                                                                         ch = getchar();
862                                                                                         if (ch != '\n') {
863                                                                                         do ;
864                                                                                         while (getchar() != '\n');
865                                                                                         }       
866                                                                                         ch = (char) tolower((int) ch);
867                                                                                         if (ch == 'a' || ch == 'b' || ch == 'x')
868                                                                                                 valid = TRUE;
869                                                                                         if (numclust == 3 || numclust == 4)
870                                                                                                 if (ch == 'c') valid = TRUE;
871                                                                                         if (numclust == 4)
872                                                                                                 if (ch == 'd') valid = TRUE;
873                                                                                 } while (!valid);
874                                                                                 if (ch == 'a') {
875                                                                                         clusterA[clustA] = i;
876                                                                                         clustA++;
877                                                                                 }
878                                                                                 if (ch == 'b') {
879                                                                                         clusterB[clustB] = i;
880                                                                                         clustB++;
881                                                                                 }
882                                                                                 if (ch == 'c') {
883                                                                                         clusterC[clustC] = i;
884                                                                                         clustC++;
885                                                                                 }
886                                                                                 if (ch == 'd') {
887                                                                                         clusterD[clustD] = i;
888                                                                                         clustD++;
889                                                                                 }
890                                                                         }
891                                                                         /* check clusters */
892                                                                         valid = TRUE;
893                                                                         if (numclust == 4) {
894                                                                                 if (clustA == 0) {
895                                                                                         valid = FALSE;
896                                                                                         numclust = 1;
897                                                                                         FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n");
898                                                                                 }
899                                                                                 if (clustB == 0) {
900                                                                                         valid = FALSE;
901                                                                                         numclust = 1;
902                                                                                         FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n");
903                                                                                 }
904                                                                                 if (clustC == 0) {
905                                                                                         valid = FALSE;
906                                                                                         numclust = 1;
907                                                                                         FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster c\n");
908                                                                                 }
909                                                                                 if (clustD == 0) {
910                                                                                         valid = FALSE;
911                                                                                         numclust = 1;
912                                                                                         FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster d\n");
913                                                                                 }
914                                                                         }
915                                                                         if (numclust == 3) {
916                                                                                 if (clustA == 0) {
917                                                                                         valid = FALSE;
918                                                                                         numclust = 1;
919                                                                                         FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n");
920                                                                                 }
921                                                                                 if (clustB == 0) {
922                                                                                         valid = FALSE;
923                                                                                         numclust = 1;
924                                                                                         FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n");
925                                                                                 }
926                                                                                 if (clustC < 2) {
927                                                                                         valid = FALSE;
928                                                                                         numclust = 1;
929                                                                                         if (clustC == 0)
930                                                                                                 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster c\n");
931                                                                                         else
932                                                                                                 FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster c\n");
933                                                                                 }
934                                                                         }
935                                                                         if (numclust == 2) {
936                                                                                 if (clustA < 2) {
937                                                                                         valid = FALSE;
938                                                                                         numclust = 1;
939                                                                                         if (clustA == 0)
940                                                                                                 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster a\n");
941                                                                                         else
942                                                                                                 FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster a\n");
943                                                                                 }
944                                                                                 if (clustB < 2) {
945                                                                                         valid = FALSE;
946                                                                                         numclust = 1;
947                                                                                         if (clustB == 0)
948                                                                                                 FPRINTF(STDOUTFILE "\n\n\nNo sequence in cluster b\n");
949                                                                                         else
950                                                                                                 FPRINTF(STDOUTFILE "\n\n\nOnly one sequence in cluster b\n");
951                                                                                 }       
952                                                                         }
953                                                                         if (valid) {
954                                                                                 FPRINTF(STDOUTFILE "\nNumber of sequences in each cluster:\n\n");
955                                                                                 FPRINTF(STDOUTFILE "Cluster a: %d\n", clustA);
956                                                                                 FPRINTF(STDOUTFILE "Cluster b: %d\n", clustB);
957                                                                                 if (numclust > 2)
958                                                                                         FPRINTF(STDOUTFILE "Cluster c: %d\n", clustC);
959                                                                                 if (numclust == 4)
960                                                                                         FPRINTF(STDOUTFILE "Cluster d: %d\n", clustD);
961                                                                                 FPRINTF(STDOUTFILE "\nExcluded sequences: ");
962                                                                                 if (numclust == 2) FPRINTF(STDOUTFILE "%d\n",
963                                                                                         Maxspc-clustA-clustB);
964                                                                                 if (numclust == 3) FPRINTF(STDOUTFILE "%d\n",
965                                                                                         Maxspc-clustA-clustB-clustC);
966                                                                                 if (numclust == 4) FPRINTF(STDOUTFILE "%d\n",
967                                                                                         Maxspc-clustA-clustB-clustC-clustD);
968                                                                         
969                                                                         }
970                                                                 }
971                                                         }
972                                                         /* number of resulting quartets */
973                                                         compnumqts();
974                                                         
975                                                 } else {
976                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
977                                                 }
978                                                 break;
979
980                         case 'd':       if (auto_datatype == AUTO_GUESS) {
981                                                 auto_datatype = AUTO_OFF;
982                                                 guessdata_optn = data_optn;
983                                                 data_optn = 0;
984                                         } else {
985                                                 data_optn = data_optn + 1;
986                                                 if (data_optn == 3) {
987                                                         auto_datatype = AUTO_GUESS;
988                                                         data_optn = guessdata_optn;
989                                                 }
990                                         }
991                                         /* translate characters into format used by ML engine */
992                                         translatedataset();
993                                         estimatebasefreqs();
994                                         break;
995
996                         case 'u':       if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
997                                                         show_optn = 1 - show_optn;
998                                                 else
999                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1000                                                 break;
1001
1002                         case 'j':       if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1003                                                         listqptrees = (listqptrees + 1) % 4;
1004                                                 else
1005                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1006                                                 break;
1007                                                         
1008                         case 'v':       if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1009                                                         approxqp = 1 - approxqp;
1010                                                 else
1011                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1012                                                 break;
1013
1014                         case 'f':       if (Frequ_optn) {
1015                                                         tstvf84 = 0.0;
1016                                                         Frequ_optn = FALSE;
1017                                                         sumfreq = 0.0;
1018                                                         if (data_optn == AMINOACID)
1019                                                                 FPRINTF(STDOUTFILE "\n\n\nAmino acid");
1020                                                         else if (data_optn == NUCLEOTIDE && SH_optn)
1021                                                                 FPRINTF(STDOUTFILE "\n\n\nDoublet");
1022                                                         else if (data_optn == NUCLEOTIDE && nuc_optn)
1023                                                                 FPRINTF(STDOUTFILE "\n\n\nNucleotide");
1024                                                         else if (data_optn == BINARY)
1025                                                                 FPRINTF(STDOUTFILE "\n\n\nBinary state");
1026                                                         FPRINTF(STDOUTFILE " frequencies (in %%):\n\n");
1027                                                         for (i = 0; i < gettpmradix() - 1; i++) {
1028                                                                 FPRINTF(STDOUTFILE "pi(%s) = ", int2code(i));
1029                                                                 scanf("%lf", &(Freqtpm[i]));
1030                                                                 do ;
1031                                                                 while (getchar() != '\n');
1032                                                                 Freqtpm[i] = Freqtpm[i]/100.0;
1033                                                                 if (Freqtpm[i] < 0.0) {
1034                                                                         FPRINTF(STDOUTFILE "\n\n\nNegative frequency not possible\n");
1035                                                                         estimatebasefreqs();
1036                                                                         break;
1037                                                                 }
1038                                                                 sumfreq = sumfreq + Freqtpm[i];
1039                                                                 if (sumfreq > 1.0) {
1040                                                                         FPRINTF(STDOUTFILE "\n\n\nThe sum of ");
1041                                                                         FPRINTF(STDOUTFILE "all frequencies exceeds");
1042                                                                         FPRINTF(STDOUTFILE " 100%%\n");
1043                                                                         estimatebasefreqs();
1044                                                                         break;
1045                                                                 }
1046                                                                 if (i == gettpmradix() - 2)
1047                                                                         Freqtpm[i+1] = 1.0 - sumfreq;
1048                                                         }
1049                                                 } else estimatebasefreqs();
1050                                                 break;
1051                         
1052                         case 's':       if (data_optn == NUCLEOTIDE && SH_optn) {
1053                                                         sym_optn = 1 - sym_optn;
1054                                                 } else {
1055                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1056                                                 }
1057                                                 break;
1058
1059                         case 'n':       if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN)
1060                                                 {
1061                                                         FPRINTF(STDOUTFILE "\n\n\nNumber of puzzling steps: ");
1062                                                         scanf("%lu", &Numtrial);
1063                                                         do ;
1064                                                         while (getchar() != '\n');
1065                                                         if (Numtrial < 1) {
1066                                                                 FPRINTF(STDOUTFILE "\n\n\nThe number of puzzling");
1067                                                                 FPRINTF(STDOUTFILE " steps can't be smaller than one\n");
1068                                                                 Numtrial = 1000;
1069                                                         }
1070                                                 }
1071                                                 else if (typ_optn == LIKMAPING_OPTN)
1072                                                 {
1073                                                         FPRINTF(STDOUTFILE "\n\nEnter zero to use all possible");
1074                                                         FPRINTF(STDOUTFILE " quartets in the analysis!\n");
1075                                                         FPRINTF(STDOUTFILE "\nNumber of random quartets: ");
1076                                                         scanf("%lu", &lmqts);
1077                                                         do ;
1078                                                         while (getchar() != '\n');
1079                                                         
1080                                                         /* compute number of quartets used */
1081                                                         compnumqts();
1082                                                 }
1083                                                 else
1084                                                 {
1085                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1086                                                 }
1087                                                 break;
1088                                                 
1089                         case 'o':       if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
1090                                                         FPRINTF(STDOUTFILE "\n\n\nSequence to be displayed as outgroup (1-%d): ",
1091                                                                         Maxspc);
1092                                                         scanf("%d", &outgroup);
1093                                                         do ;
1094                                                         while (getchar() != '\n');
1095                                                         if (outgroup < 1 || outgroup > Maxspc) {
1096                                                                 FPRINTF(STDOUTFILE "\n\n\nSequences are numbered ");
1097                                                                 FPRINTF(STDOUTFILE "from 1 to %d\n",
1098                                                                                 Maxspc);
1099                                                                 outgroup = 1;
1100                                                         }
1101                                                         outgroup = outgroup - 1;
1102                                                 } else {
1103                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1104                                                 }
1105                                                 break;
1106         
1107                         case 'm':       if (data_optn == NUCLEOTIDE) { /* nucleotide data */
1108                                                         if(HKY_optn && nuc_optn) {
1109                                                                 /* HKY -> TN */
1110                                                                 tstvf84       = 0.0;
1111                                                                 TSparam       = 2.0;
1112                                                                 YRparam       = 0.9;
1113                                                                 HKY_optn      = FALSE;
1114                                                                 TN_optn       = TRUE;
1115                                                                 optim_optn    = TRUE;
1116                                                                 nuc_optn      = TRUE;
1117                                                                 SH_optn       = FALSE;
1118                                                                 break;
1119                                                         }
1120                                                         if(TN_optn && nuc_optn) {
1121                                                                 if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) {
1122                                                                         /* number of chars needs to be a multiple 2 or 3 */
1123                                                                         /* TN -> SH */          
1124                                                                         if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0)
1125                                                                                 SHcodon = TRUE;
1126                                                                         else
1127                                                                                 SHcodon = FALSE;                                                                
1128                                                                         tstvf84       = 0.0;
1129                                                                         TSparam       = 2.0;
1130                                                                         YRparam       = 1.0;
1131                                                                         HKY_optn      = TRUE;
1132                                                                         TN_optn       = FALSE;
1133                                                                         optim_optn    = TRUE;
1134                                                                         nuc_optn      = FALSE;
1135                                                                         SH_optn       = TRUE;
1136                                                                         /* translate characters into format */
1137                                                                         /* used by ML engine */
1138                                                                         translatedataset();
1139                                                                         estimatebasefreqs();
1140                                                                 } else {
1141                                                                         FPRINTF(STDOUTFILE "\n\n\nSH model not ");
1142                                                                         FPRINTF(STDOUTFILE "available for the data set!\n");
1143                                                                         /* TN -> HKY */
1144                                                                         tstvf84       = 0.0;
1145                                                                         TSparam       = 2.0;
1146                                                                         YRparam       = 1.0;
1147                                                                         HKY_optn      = TRUE;
1148                                                                         TN_optn       = FALSE;
1149                                                                         optim_optn    = TRUE;
1150                                                                         nuc_optn      = TRUE;
1151                                                                         SH_optn       = FALSE;
1152                                                                 }
1153                                                                 break;
1154                                                         }
1155                                                         if(SH_optn) {
1156                                                                 /* SH -> HKY */
1157                                                                 tstvf84       = 0.0;
1158                                                                 TSparam       = 2.0;
1159                                                                 YRparam       = 1.0;
1160                                                                 HKY_optn      = TRUE;
1161                                                                 TN_optn       = FALSE;
1162                                                                 optim_optn    = TRUE;
1163                                                                 nuc_optn      = TRUE;
1164                                                                 SH_optn       = FALSE;
1165                                                                 /* translate characters into format */
1166                                                                 /* used by ML engine */
1167                                                                 translatedataset();
1168                                                                 estimatebasefreqs();
1169                                                                 break;
1170                                                         }
1171                                                         break;
1172                                                 }
1173                                                 if (data_optn == AMINOACID) { /* amino acid data */
1174                                                         if (auto_aamodel) {
1175                                                                 /* AUTO -> Dayhoff */
1176                                                                 Dayhf_optn    = TRUE;
1177                                                                 Jtt_optn      = FALSE;
1178                                                                 mtrev_optn    = FALSE;
1179                                                                 cprev_optn    = FALSE;
1180                                                                 blosum62_optn = FALSE;
1181                                                                 vtmv_optn     = FALSE;
1182                                                                 wag_optn      = FALSE;
1183                                                                 auto_aamodel  = AUTO_OFF;
1184                                                                 break;
1185                                                         }
1186                                                         if (Dayhf_optn) {
1187                                                                 /* Dayhoff -> JTT */
1188                                                                 Dayhf_optn    = FALSE;
1189                                                                 Jtt_optn      = TRUE;
1190                                                                 mtrev_optn    = FALSE;
1191                                                                 cprev_optn    = FALSE;
1192                                                                 blosum62_optn = FALSE;
1193                                                                 vtmv_optn     = FALSE;
1194                                                                 wag_optn      = FALSE;
1195                                                                 auto_aamodel  = AUTO_OFF;
1196                                                                 break;
1197                                                         }
1198                                                         if (Jtt_optn) {
1199                                                                 /* JTT -> mtREV */
1200                                                                 Dayhf_optn    = FALSE;
1201                                                                 Jtt_optn      = FALSE;
1202                                                                 mtrev_optn    = TRUE;
1203                                                                 cprev_optn    = FALSE;
1204                                                                 blosum62_optn = FALSE;
1205                                                                 vtmv_optn     = FALSE;
1206                                                                 wag_optn      = FALSE;
1207                                                                 auto_aamodel  = AUTO_OFF;
1208                                                                 break;
1209                                                         }
1210 #ifdef CPREV
1211                                                         if (mtrev_optn) {
1212                                                                 /* mtREV -> cpREV */
1213                                                                 Dayhf_optn    = FALSE;
1214                                                                 Jtt_optn      = FALSE;
1215                                                                 mtrev_optn    = FALSE;
1216                                                                 cprev_optn    = TRUE;
1217                                                                 blosum62_optn = FALSE;
1218                                                                 vtmv_optn     = FALSE;
1219                                                                 wag_optn      = FALSE;
1220                                                                 auto_aamodel  = AUTO_OFF;
1221                                                                 break;
1222                                                         }
1223 #else /* ! CPREV */
1224                                                         if (mtrev_optn) {
1225                                                                 /* mtREV -> BLOSUM 62 */
1226                                                                 Dayhf_optn    = FALSE;
1227                                                                 Jtt_optn      = FALSE;
1228                                                                 mtrev_optn    = FALSE;
1229                                                                 cprev_optn    = FALSE;
1230                                                                 blosum62_optn = TRUE;
1231                                                                 vtmv_optn     = FALSE;
1232                                                                 wag_optn      = FALSE;
1233                                                                 auto_aamodel  = AUTO_OFF;
1234                                                                 break;
1235                                                         }
1236 #endif /* ! CPREV */
1237
1238 #ifdef CPREV
1239                                                         if (cprev_optn) {
1240                                                                 /* cpREV -> BLOSUM 62 */
1241                                                                 Dayhf_optn    = FALSE;
1242                                                                 Jtt_optn      = FALSE;
1243                                                                 mtrev_optn    = FALSE;
1244                                                                 cprev_optn    = FALSE;
1245                                                                 blosum62_optn = TRUE;
1246                                                                 vtmv_optn     = FALSE;
1247                                                                 wag_optn      = FALSE;
1248                                                                 auto_aamodel  = AUTO_OFF;
1249                                                                 break;
1250                                                         }
1251 #endif
1252                                                         if (blosum62_optn) {
1253                                                                 /* BLOSUM 62 -> VT model */
1254                                                                 Dayhf_optn    = FALSE;
1255                                                                 Jtt_optn      = FALSE;
1256                                                                 mtrev_optn    = FALSE;
1257                                                                 cprev_optn    = FALSE;
1258                                                                 blosum62_optn = FALSE;
1259                                                                 vtmv_optn     = TRUE;
1260                                                                 wag_optn      = FALSE;
1261                                                                 auto_aamodel  = AUTO_OFF;
1262                                                                 break;
1263                                                         }
1264                                                         if (vtmv_optn) {
1265                                                                 /* VT model -> WAG model */
1266                                                                 Dayhf_optn    = FALSE;
1267                                                                 Jtt_optn      = FALSE;
1268                                                                 mtrev_optn    = FALSE;
1269                                                                 cprev_optn    = FALSE;
1270                                                                 blosum62_optn = FALSE;
1271                                                                 vtmv_optn     = FALSE;
1272                                                                 wag_optn      = TRUE;
1273                                                                 auto_aamodel  = AUTO_OFF;
1274                                                                 break;
1275                                                         }
1276                                                         if (wag_optn) {
1277                                                                 /* WAG model -> AUTO */
1278                                                                 Dayhf_optn    = guessDayhf_optn;
1279                                                                 Jtt_optn      = guessJtt_optn;
1280                                                                 mtrev_optn    = guessmtrev_optn;
1281                                                                 cprev_optn    = guesscprev_optn;
1282                                                                 blosum62_optn = guessblosum62_optn;
1283                                                                 vtmv_optn     = guessvtmv_optn;
1284                                                                 wag_optn      = guesswag_optn;
1285                                                                 auto_aamodel  = guessauto_aamodel;
1286                                                                 break;
1287                                                         }
1288                                                         break;
1289                                                 }
1290                                                 if (data_optn == BINARY) {
1291                                                         FPRINTF(STDOUTFILE "\n\n\nNo other model available!\n");
1292                                                 }
1293                                                 break;
1294                                                 
1295                         case 't':       if (data_optn != NUCLEOTIDE) {
1296                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1297                                                 } else {
1298                                                         tstvf84 = 0.0;
1299                                                         FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value for ");
1300                                                         FPRINTF(STDOUTFILE "estimation from data set!\n");
1301                                                         FPRINTF(STDOUTFILE "\nTransition/transversion parameter (%.2f-%.2f): ",
1302                                                                         MINTS, MAXTS);
1303                                                         scanf("%lf", &TSparam);
1304                                                         do ;
1305                                                         while (getchar() != '\n');
1306                                                         if (TSparam < MINTS || TSparam > MAXTS) {
1307                                                                 optim_optn = TRUE;
1308                                                                 TSparam = 2.0;
1309                                                         } else {
1310                                                                 optim_optn = FALSE;
1311                                                         }
1312                                                 }
1313                                                 break;
1314
1315                         case 'q':       FPRINTF(STDOUTFILE "\n\n\n");
1316 #                                               if PARALLEL
1317                                                         PP_SendDone();
1318                                                         MPI_Finalize();
1319 #                                               endif /* PARALLEL */
1320                                                 exit(0);
1321                                                 
1322                                                 break;
1323
1324                         case 'r':       if (!(TN_optn && nuc_optn)){
1325                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1326                                                 } else {
1327                                                         tstvf84 = 0.0;
1328                                                         FPRINTF(STDOUTFILE "\n\n\nEnter an invalid value ");
1329                                                         FPRINTF(STDOUTFILE "for estimation from data set!\n");
1330                                                         FPRINTF(STDOUTFILE "\nY/R transition parameter (%.2f-%.2f): ", MINYR, MAXYR);
1331                                                         scanf("%lf", &YRparam);
1332                                                         do ;
1333                                                         while (getchar() != '\n');
1334                                                         if (YRparam < MINYR || YRparam > MAXYR) {
1335                                                                 optim_optn = TRUE;
1336                                                                 YRparam = 0.9;
1337                                                         } else if (YRparam == 1.0) {
1338                                                                 TN_optn = FALSE;
1339                                                                 HKY_optn = TRUE;
1340                                                                 if (optim_optn) TSparam = 2.0;
1341                                                         } else {
1342                                                                 optim_optn = FALSE;
1343                                                         }
1344                                                 }
1345                                                 break;
1346                                                 
1347                         case 'p':       if (!(TN_optn && nuc_optn)){
1348                                                         FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1349                                                 } else {
1350                                                         FPRINTF(STDOUTFILE "\n\n\nThe F84 model (Felsenstein 1984) is a restricted");
1351                                                         FPRINTF(STDOUTFILE " TN model, and the one\nF84 parameter uniquely");
1352                                                         FPRINTF(STDOUTFILE " determines the two corresponding TN parameters!\n\n");
1353                                                         FPRINTF(STDOUTFILE "F84 expected transition/transversion ratio: ");
1354                                                         scanf("%lf", &tstvf84);
1355                                                         do ;
1356                                                         while (getchar() != '\n');
1357                                                         if (tstvf84 <= 0.0) tstvf84 = 0.0;
1358                                                         else makeF84model();
1359                                                 }
1360                                                 break;
1361
1362                         case 'y':       break;
1363
1364                         default:        FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
1365                                                 break;
1366                 }
1367         } while (ch != 'y');
1368
1369         FPRINTF(STDOUTFILE "\n\n\n");
1370 }
1371
1372 /* open file for reading */
1373 void openfiletoread(FILE **fp, char name[], char descr[])
1374 {
1375         int count = 0;
1376         cvector str;
1377
1378         if ((*fp = fopen(name, "r")) == NULL) {
1379                 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
1380                 str = mygets();
1381                 while ((*fp = fopen(str, "r")) == NULL)
1382                 {
1383                         count++;
1384                         if (count > 10)
1385                         {
1386                                 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
1387                                 exit(1);
1388                         }
1389                         FPRINTF(STDOUTFILE "File '%s' not found, ", str);
1390                         FPRINTF(STDOUTFILE "please enter alternative name: ");
1391                         free_cvector(str);
1392                         str = mygets();
1393                 }
1394                 free_cvector(str);
1395                 FPRINTF(STDOUTFILE "\n");
1396         }
1397 } /* openfiletoread */
1398
1399
1400 /* open file for writing */
1401 void openfiletowrite(FILE **fp, char name[], char descr[])
1402 {
1403         int count = 0;
1404         cvector str;
1405
1406         if ((*fp = fopen(name, "w")) == NULL) {
1407                 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
1408                 str = mygets();
1409                 while ((*fp = fopen(str, "w")) == NULL)
1410                 {
1411                         count++;
1412                         if (count > 10)
1413                         {
1414                                 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
1415                                 exit(1);
1416                         }
1417                         FPRINTF(STDOUTFILE "File '%s' not created, ", str);
1418                         FPRINTF(STDOUTFILE "please enter other name: ");
1419                         free_cvector(str);
1420                         str = mygets();
1421                 }
1422                 free_cvector(str);
1423                 FPRINTF(STDOUTFILE "\n");
1424         }
1425 } /* openfiletowrite */
1426
1427
1428 /* open file for appending */
1429 void openfiletoappend(FILE **fp, char name[], char descr[])
1430 {
1431         int count = 0;
1432         cvector str;
1433
1434         if ((*fp = fopen(name, "a")) == NULL) {
1435                 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
1436                 str = mygets();
1437                 while ((*fp = fopen(str, "a")) == NULL)
1438                 {
1439                         count++;
1440                         if (count > 10)
1441                         {
1442                                 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
1443                                 exit(1);
1444                         }
1445                         FPRINTF(STDOUTFILE "File '%s' not created, ", str);
1446                         FPRINTF(STDOUTFILE "please enter other name: ");
1447                         free_cvector(str);
1448                         str = mygets();
1449                 }
1450                 free_cvector(str);
1451                 FPRINTF(STDOUTFILE "\n");
1452         }
1453 } /* openfiletowrite */
1454
1455
1456 /* close file */
1457 void closefile(FILE *fp)
1458 {       
1459         fclose(fp);
1460 } /* closefile */
1461
1462 /* symmetrize doublet frequencies */
1463 void symdoublets()
1464 {
1465         int i, imean;
1466         double mean;
1467         
1468         if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) {
1469                 /* ML frequencies */
1470                 mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */
1471                 Freqtpm[1] = mean;
1472                 Freqtpm[4] = mean;
1473                 mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */
1474                 Freqtpm[2] = mean;
1475                 Freqtpm[8] = mean;
1476                 mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */
1477                 Freqtpm[3] = mean;
1478                 Freqtpm[12] = mean;
1479                 mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */
1480                 Freqtpm[6] = mean;
1481                 Freqtpm[9] = mean;
1482                 mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */
1483                 Freqtpm[7] = mean;
1484                 Freqtpm[13] = mean;
1485                 mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */
1486                 Freqtpm[11] = mean;
1487                 Freqtpm[14] = mean;
1488                 
1489                 /* base composition of each taxon */
1490                 for (i = 0; i < Maxspc; i++) {
1491                         imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */
1492                         Basecomp[i][1] = imean;
1493                         Basecomp[i][4] = imean;
1494                         imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */
1495                         Basecomp[i][2] = imean;
1496                         Basecomp[i][8] = imean;
1497                         imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */
1498                         Basecomp[i][3] = imean;
1499                         Basecomp[i][12] = imean;
1500                         imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */
1501                         Basecomp[i][6] = imean;
1502                         Basecomp[i][9] = imean;
1503                         imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */
1504                         Basecomp[i][7] = imean;
1505                         Basecomp[i][13] = imean;
1506                         imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */
1507                         Basecomp[i][11] = imean;
1508                         Basecomp[i][14] = imean;
1509                 }
1510         }
1511 }
1512
1513 /* show Ts/Tv ratio and Ts Y/R ratio */
1514 void computeexpectations()
1515 {
1516         double AlphaYBeta, AlphaRBeta, piR, piY, num, denom, pyr, pur;
1517         
1518         if (nuc_optn == TRUE) { /* 4x4 nucs */
1519                 piR = Freqtpm[0] + Freqtpm[2];
1520                 piY = Freqtpm[1] + Freqtpm[3];
1521                 AlphaRBeta = 4.0*TSparam / (1 + YRparam);
1522                 AlphaYBeta = AlphaRBeta * YRparam;
1523                 tstvratio = (AlphaRBeta*Freqtpm[0]*Freqtpm[2] +
1524                                          AlphaYBeta*Freqtpm[1]*Freqtpm[3])/(piR * piY);
1525                 yrtsratio = (AlphaYBeta*Freqtpm[1]*Freqtpm[3]) /
1526                                         (AlphaRBeta*Freqtpm[0]*Freqtpm[2]);
1527         } else { /* 16x16 nucs */
1528                 pyr = Freqtpm[1]*Freqtpm[3] + Freqtpm[5]*Freqtpm[7] +
1529                         Freqtpm[9]*Freqtpm[11] + Freqtpm[4]*Freqtpm[12] +
1530                         Freqtpm[5]*Freqtpm[13] + Freqtpm[6]*Freqtpm[14] +
1531                         Freqtpm[7]*Freqtpm[15] + Freqtpm[13]*Freqtpm[15];
1532                 pur = Freqtpm[0]*Freqtpm[2] + Freqtpm[4]*Freqtpm[6] +
1533                         Freqtpm[0]*Freqtpm[8] + Freqtpm[1]*Freqtpm[9] +
1534                         Freqtpm[2]*Freqtpm[10] + Freqtpm[8]*Freqtpm[10] +
1535                         Freqtpm[3]*Freqtpm[11] + Freqtpm[12]*Freqtpm[14];
1536                 num = pyr + pur;
1537                 denom = Freqtpm[0]*Freqtpm[1] + Freqtpm[1]*Freqtpm[2] +
1538                         Freqtpm[0]*Freqtpm[3] + Freqtpm[2]*Freqtpm[3] +
1539                         Freqtpm[0]*Freqtpm[4] + Freqtpm[1]*Freqtpm[5] +
1540                         Freqtpm[4]*Freqtpm[5] + Freqtpm[2]*Freqtpm[6] +
1541                         Freqtpm[5]*Freqtpm[6] + Freqtpm[3]*Freqtpm[7] +
1542                         Freqtpm[4]*Freqtpm[7] + Freqtpm[6]*Freqtpm[7] +
1543                         Freqtpm[4]*Freqtpm[8] + Freqtpm[5]*Freqtpm[9] +
1544                         Freqtpm[8]*Freqtpm[9] + Freqtpm[6]*Freqtpm[10] +
1545                         Freqtpm[9]*Freqtpm[10] + Freqtpm[7]*Freqtpm[11] +
1546                         Freqtpm[8]*Freqtpm[11] + Freqtpm[10]*Freqtpm[11] +
1547                         Freqtpm[0]*Freqtpm[12] + Freqtpm[8]*Freqtpm[12] +
1548                         Freqtpm[1]*Freqtpm[13] + Freqtpm[9]*Freqtpm[13] +
1549                         Freqtpm[12]*Freqtpm[13] + Freqtpm[2]*Freqtpm[14] +
1550                         Freqtpm[10]*Freqtpm[14] + Freqtpm[13]*Freqtpm[14] +
1551                         Freqtpm[3]*Freqtpm[15] + Freqtpm[11]*Freqtpm[15] +
1552                         Freqtpm[12]*Freqtpm[15] + Freqtpm[14]*Freqtpm[15];
1553                 tstvratio = 2.0*TSparam * num/denom;
1554                 yrtsratio = pyr/pur;
1555         }
1556 }
1557
1558 /* write ML distance matrix to file */
1559 void putdistance(FILE *fp) /* mod CZ 05/19/01 */
1560 {
1561         int i, j;
1562         
1563         fprintf(fp, "  %d\n", Maxspc);
1564         for (i = 0; i < Maxspc; i++) {
1565                 fputid10(fp, i);
1566                 for (j = 0; j < Maxspc; j++) {
1567                         fprintf(fp, "  %.5f", Distanmat[i][j]/100.0);
1568                 }
1569                 fprintf(fp, "\n");
1570         }
1571 }
1572
1573
1574 /* find identical sequences */
1575 void findidenticals(FILE *fp)
1576 {
1577         int i, j, noids;
1578         cvector useqs;
1579         
1580         useqs = new_cvector(Maxspc);
1581         
1582         for (i = 0; i < Maxspc; i++)
1583                 useqs[i] = 0;
1584         
1585         noids = TRUE;
1586         for (i = 0; i < Maxspc && noids; i++)
1587                 for (j = i + 1; j < Maxspc && noids; j++)
1588                         if (Distanmat[i][j] == 0.0) noids = FALSE;
1589         
1590         if (noids)
1591                 fprintf(fp, " All sequences are unique.\n");
1592         else {
1593                 for (i = 0; i < Maxspc; i++) {  
1594                         noids = TRUE;
1595                         for (j = i + 1; j < Maxspc && noids; j++)
1596                                 if (Distanmat[i][j] == 0.0) noids = FALSE;
1597                                 
1598                         if (!noids && useqs[i] == 0) {
1599                                 fputid(fp, i);
1600                                 useqs[i] = 1;   
1601                                 for (j = i + 1; j < Maxspc; j++)
1602                                         if (Distanmat[i][j] == 0.0) {
1603                                                 fprintf(fp, ", ");
1604                                                 fputid(fp, j);
1605                                                 useqs[j] = 1;
1606                                         }                               
1607                                 fprintf(fp, ".\n");
1608                         }
1609                 }
1610         }
1611         free_cvector(useqs);
1612 }
1613
1614 /* compute average distance */
1615 double averagedist()
1616 {       
1617         int i, j;
1618         double sum;
1619         
1620         sum = 0.0;
1621         for (i = 0; i < Maxspc; i++)
1622                 for (j = i + 1; j < Maxspc; j++)
1623                         sum = sum + Distanmat[i][j];
1624         
1625         sum = sum / (double) Maxspc / ((double) Maxspc - 1.0) * 2.0;
1626         
1627         return sum;
1628 }
1629
1630 /* first lines of EPSF likelihood mapping file */
1631 void initps(FILE *ofp)
1632 {
1633         fprintf(ofp, "%%!PS-Adobe-3.0 EPSF-3.0\n");
1634         fprintf(ofp, "%%%%BoundingBox: 60 210 550 650\n");
1635         fprintf(ofp, "%%%%Pages: 1\n");
1636 #       ifndef ALPHA
1637                 fprintf(ofp, "%%%%Creator: %s (version %s)\n", PACKAGE, VERSION);
1638 #       else
1639                 fprintf(ofp, "%%%%Creator: %s (version %s%s)\n", PACKAGE, VERSION, ALPHA);
1640 #       endif
1641         fprintf(ofp, "%%%%Title: Likelihood Mapping Analysis\n");
1642         fprintf(ofp, "%%%%CreationDate: %s", asctime(localtime(&Starttime)) );
1643         fprintf(ofp, "%%%%DocumentFonts: Helvetica\n");
1644         fprintf(ofp, "%%%%DocumentNeededFonts: Helvetica\n");
1645         fprintf(ofp, "%%%%EndComments\n");
1646         fprintf(ofp, "%% use inch as unit\n");
1647         fprintf(ofp, "/inch {72 mul} def\n");
1648         fprintf(ofp, "%% triangle side length (3 inch)\n");
1649         fprintf(ofp, "/tl {3 inch mul} def\n");
1650         fprintf(ofp, "%% plot one dot (x-y coordinates on stack)\n");
1651         fprintf(ofp, "/dot {\n");
1652         fprintf(ofp, "newpath\n");
1653         fprintf(ofp, "0.002 tl 0 360 arc  %% radius is 0.002 of the triangle length\n");
1654         fprintf(ofp, "closepath\n");
1655         fprintf(ofp, "fill\n");
1656         fprintf(ofp, "} def\n");
1657         fprintf(ofp, "%% preamble\n");
1658         fprintf(ofp, "/Helvetica findfont\n");
1659         fprintf(ofp, "12 scalefont\n");
1660         fprintf(ofp, "setfont\n");
1661         fprintf(ofp, "%% 0/0 for triangle of triangles\n");
1662         fprintf(ofp, "0.9 inch 3 inch translate\n");
1663         fprintf(ofp, "%% first triangle (the one with dots)\n");
1664         fprintf(ofp, "0.6 tl 1.2 tl 0.8660254038 mul translate\n");
1665         fprintf(ofp, "newpath\n");
1666         fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1667         fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1668         fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1669         fprintf(ofp, "closepath\n");
1670         fprintf(ofp, "stroke\n");
1671 }
1672
1673 /* plot one point of likelihood mapping analysis */
1674 void plotlmpoint(FILE *ofp, double w1, double w2)
1675 {
1676         fprintf(ofp,"%.10f tl %.10f tl dot\n",
1677                 0.5*w1 + w2, w1*0.8660254038);
1678 }
1679
1680 /* last lines of EPSF likelihood mapping file */
1681 void finishps(FILE *ofp)
1682 {
1683         fprintf(ofp, "stroke\n");
1684         fprintf(ofp, "%% second triangle (the one with 3 basins)\n");
1685         fprintf(ofp, "/secondtriangle {\n");
1686         fprintf(ofp, "newpath\n");
1687         fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1688         fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1689         fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1690         fprintf(ofp, "closepath\n");
1691         fprintf(ofp, "stroke\n");
1692         fprintf(ofp, "newpath\n");
1693         fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
1694         fprintf(ofp, " 0.50 tl 0.0000000000 tl lineto\n");
1695         fprintf(ofp, "stroke\n");
1696         fprintf(ofp, "newpath\n");
1697         fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
1698         fprintf(ofp, " 0.25 tl 0.4330127019 tl lineto\n");
1699         fprintf(ofp, "stroke\n");
1700         fprintf(ofp, "newpath\n");
1701         fprintf(ofp, " 0.50 tl 0.2886751346 tl moveto\n");
1702         fprintf(ofp, " 0.75 tl 0.4330127019 tl lineto\n");
1703         fprintf(ofp, "stroke\n");
1704         fprintf(ofp, "0.44 tl 0.5 tl moveto %% up\n");
1705         fprintf(ofp, "(%.1f%%) show\n", (double) ar1*100.0/Numquartets);
1706         fprintf(ofp, "0.25 tl 0.15 tl moveto %% down left\n");
1707         fprintf(ofp, "(%.1f%%) show\n", (double) ar3*100.0/Numquartets);
1708         fprintf(ofp, "0.63 tl 0.15 tl moveto %% down right\n");
1709         fprintf(ofp, "(%.1f%%) show\n", (double) ar2*100.0/Numquartets);
1710         fprintf(ofp, "} def\n");
1711         fprintf(ofp, "%% third triangle (the one with 7 basins)\n");
1712         fprintf(ofp, "/thirdtriangle {\n");
1713         fprintf(ofp, "newpath\n");
1714         fprintf(ofp, " 0.0 tl 0.0 tl moveto\n");
1715         fprintf(ofp, " 1.0 tl 0.0 tl lineto\n");
1716         fprintf(ofp, " 0.5 tl 0.8660254038 tl lineto\n");
1717         fprintf(ofp, "closepath\n");
1718         fprintf(ofp, "stroke\n");
1719         fprintf(ofp, "newpath\n");
1720         fprintf(ofp, " 0.25 tl 0.1443375673 tl moveto\n");
1721         fprintf(ofp, " 0.75 tl 0.1443375673 tl lineto\n");
1722         fprintf(ofp, " 0.50 tl 0.5773502692 tl lineto\n");
1723         fprintf(ofp, "closepath\n");
1724         fprintf(ofp, "stroke\n");
1725         fprintf(ofp, "newpath\n");
1726         fprintf(ofp, " 0.125 tl 0.2165063509 tl moveto\n");
1727         fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n");
1728         fprintf(ofp, "stroke\n");
1729         fprintf(ofp, "newpath\n");
1730         fprintf(ofp, " 0.375 tl 0.6495190528 tl moveto\n");
1731         fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n");
1732         fprintf(ofp, "stroke\n");
1733         fprintf(ofp, "newpath\n");
1734         fprintf(ofp, " 0.625 tl 0.6495190528 tl moveto\n");
1735         fprintf(ofp, " 0.500 tl 0.5773502692 tl lineto\n");
1736         fprintf(ofp, "stroke\n");
1737         fprintf(ofp, "newpath\n");
1738         fprintf(ofp, " 0.875 tl 0.2165063509 tl moveto\n");
1739         fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n");
1740         fprintf(ofp, "stroke\n");
1741         fprintf(ofp, "newpath\n");
1742         fprintf(ofp, " 0.750 tl 0.00 tl moveto\n");
1743         fprintf(ofp, " 0.750 tl 0.1443375673 tl lineto\n");
1744         fprintf(ofp, "stroke\n");
1745         fprintf(ofp, "newpath\n");
1746         fprintf(ofp, " 0.250 tl 0.00 tl moveto\n");
1747         fprintf(ofp, " 0.250 tl 0.1443375673 tl lineto\n");
1748         fprintf(ofp, "stroke\n");
1749         fprintf(ofp, "0.42 tl 0.66 tl moveto %% up\n");
1750         fprintf(ofp, "(%.1f%%) show\n", (double) reg1*100.0/Numquartets);
1751         fprintf(ofp, "0.07 tl 0.05 tl moveto %% down left\n");
1752         fprintf(ofp, "(%.1f%%) show\n", (double) reg3*100.0/Numquartets);
1753         fprintf(ofp, "0.77 tl 0.05 tl moveto %% down right\n");
1754         fprintf(ofp, "(%.1f%%) show\n", (double) reg2*100.0/Numquartets);
1755         fprintf(ofp, "0.43 tl 0.05 tl moveto %% down side\n");
1756         fprintf(ofp, "(%.1f%%) show\n", (double) reg5*100.0/Numquartets);
1757         fprintf(ofp, "0.43 tl 0.28 tl moveto %% center\n");
1758         fprintf(ofp, "(%.1f%%) show\n", (double) reg7*100.0/Numquartets);
1759         fprintf(ofp, "gsave\n");
1760         fprintf(ofp, "-60 rotate\n");
1761         fprintf(ofp, "-0.07 tl 0.77 tl moveto %% right side\n");
1762         fprintf(ofp, "(%.1f%%) show\n", (double) reg4*100.0/Numquartets);
1763         fprintf(ofp, "grestore\n");
1764         fprintf(ofp, "gsave\n");
1765         fprintf(ofp, "60 rotate\n");
1766         fprintf(ofp, "0.4 tl -0.09 tl moveto %% left side\n");
1767         fprintf(ofp, "(%.1f%%) show\n", (double) reg6*100.0/Numquartets);
1768         fprintf(ofp, "grestore\n");
1769         fprintf(ofp, "} def\n");
1770         fprintf(ofp, "%% print the other two triangles\n");
1771         fprintf(ofp, "-0.6 tl -1.2 tl 0.8660254038 mul translate\n");
1772         fprintf(ofp, "secondtriangle\n");
1773         fprintf(ofp, "1.2 tl 0 translate\n");
1774         fprintf(ofp, "thirdtriangle\n");        
1775         if (numclust == 4) { /* four cluster analysis */
1776                 fprintf(ofp, "%% label corners\n");
1777                 fprintf(ofp, "0.375 tl 0.9 tl moveto\n");
1778                 fprintf(ofp, "((a,b)-(c,d)) show %% CHANGE HERE IF NECESSARY\n");
1779                 fprintf(ofp, "-0.16 tl -0.08 tl moveto\n");
1780                 fprintf(ofp, "((a,d)-(b,c)) show %% CHANGE HERE IF NECESSARY\n");
1781                 fprintf(ofp, "0.92 tl -0.08 tl moveto\n");
1782                 fprintf(ofp, "((a,c)-(b,d)) show %% CHANGE HERE IF NECESSARY\n");
1783         }
1784         if (numclust == 3) { /* three cluster analysis */
1785                 fprintf(ofp, "%% label corners\n");
1786                 fprintf(ofp, "0.375 tl 0.9 tl moveto\n");
1787                 fprintf(ofp, "((a,b)-(c,c)) show %% CHANGE HERE IF NECESSARY\n");
1788                 fprintf(ofp, "-0.16 tl -0.08 tl moveto\n");
1789                 fprintf(ofp, "((a,c)-(b,c)) show %% CHANGE HERE IF NECESSARY\n");
1790                 fprintf(ofp, "0.92 tl -0.08 tl moveto\n");
1791                 fprintf(ofp, "((a,c)-(b,c)) show %% CHANGE HERE IF NECESSARY\n");
1792         }
1793         if (numclust == 2) { /* two cluster analysis */
1794                 fprintf(ofp, "%% label corners\n");
1795                 fprintf(ofp, "0.375 tl 0.9 tl moveto\n");
1796                 fprintf(ofp, "((a,a)-(b,b)) show %% CHANGE HERE IF NECESSARY\n");
1797                 fprintf(ofp, "-0.16 tl -0.08 tl moveto\n");
1798                 fprintf(ofp, "((a,b)-(a,b)) show %% CHANGE HERE IF NECESSARY\n");
1799                 fprintf(ofp, "0.92 tl -0.08 tl moveto\n");
1800                 fprintf(ofp, "((a,b)-(a,b)) show %% CHANGE HERE IF NECESSARY\n");
1801         }
1802         fprintf(ofp, "showpage\n");
1803         fprintf(ofp, "%%%%EOF\n");
1804 }
1805
1806 /* computes LM point from the three log-likelihood values,
1807    plots the point, and does some statistics */
1808 void makelmpoint(FILE *fp, double b1, double b2, double b3)
1809 {
1810         double w1, w2, w3, temp;
1811         unsigned char qpbranching;
1812         double temp1, temp2, temp3, onethird;
1813         unsigned char discreteweight[3], treebits[3];
1814         
1815         onethird = 1.0/3.0;
1816         treebits[0] = (unsigned char) 1;
1817         treebits[1] = (unsigned char) 2;
1818         treebits[2] = (unsigned char) 4;
1819
1820         /* sort in descending order */
1821         qweight[0] = b1;
1822         qweight[1] = b2;
1823         qweight[2] = b3;
1824         sort3doubles(qweight, qworder);
1825
1826         /* compute Bayesian weights */
1827         qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
1828         qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
1829         qweight[qworder[0]] = 1.0;
1830         temp = qweight[0] + qweight[1] + qweight[2];
1831         qweight[0] = qweight[0]/temp;
1832         qweight[1] = qweight[1]/temp;
1833         qweight[2] = qweight[2]/temp;
1834
1835         /* plot one point in likelihood mapping triangle */
1836         w1 = qweight[0];
1837         w2 = qweight[1];
1838         w3 = qweight[2];
1839         plotlmpoint(fp, w1, w2);
1840         
1841         /* check areas 1,2,3 */ 
1842         if (treebits[qworder[0]] == 1) ar1++;
1843         else if (treebits[qworder[0]] == 2) ar2++;
1844         else ar3++;                             
1845
1846         /* check out regions 1,2,3,4,5,6,7 */
1847
1848         /* 100 distribution */
1849         temp1 = 1.0 - qweight[qworder[0]];
1850         sqdiff[0] = temp1*temp1 +
1851                 qweight[qworder[1]]*qweight[qworder[1]] +
1852                 qweight[qworder[2]]*qweight[qworder[2]];
1853         discreteweight[0] = treebits[qworder[0]];
1854
1855         /* 110 distribution */
1856         temp1 = 0.5 - qweight[qworder[0]];
1857         temp2 = 0.5 - qweight[qworder[1]];
1858         sqdiff[1] = temp1*temp1 + temp2*temp2 +
1859                 qweight[qworder[2]]*qweight[qworder[2]];
1860         discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
1861
1862         /* 111 distribution */
1863         temp1 = onethird - qweight[qworder[0]];
1864         temp2 = onethird - qweight[qworder[1]];
1865         temp3 = onethird - qweight[qworder[2]];
1866         sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
1867         discreteweight[2] = (unsigned char) 7;
1868
1869         /* sort in descending order */
1870         sort3doubles(sqdiff, sqorder);
1871                         
1872         qpbranching = (unsigned char) discreteweight[sqorder[2]];
1873                                                         
1874         if (qpbranching == 1) {
1875                 reg1++;
1876                 if (w2 < w3) reg1l++;
1877                 else reg1r++;
1878         }
1879         if (qpbranching == 2) {
1880                 reg2++;
1881                 if (w1 < w3) reg2d++;
1882                 else reg2u++;
1883         }
1884         if (qpbranching == 4) {
1885                 reg3++;
1886                 if (w1 < w2) reg3d++;
1887                 else reg3u++;
1888         }
1889         if (qpbranching == 3) {
1890                 reg4++;
1891                 if (w1 < w2) reg4d++;
1892                 else reg4u++;
1893         }
1894         if (qpbranching == 6) {
1895                 reg5++;
1896                 if (w2 < w3) reg5l++;
1897                 else reg5r++;
1898         }
1899         if (qpbranching == 5) {
1900                 reg6++;
1901                 if (w1 < w3) reg6d++;
1902                 else reg6u++;
1903         }
1904         if (qpbranching == 7) reg7++;
1905 }
1906
1907 /* print tree statistics */
1908 void printtreestats(FILE *ofp)
1909 {
1910         int i, j, besttree;
1911         double bestlkl, difflkl, difflklps, temp, sum;
1912         
1913         /* find best tree */
1914         besttree = 0;
1915         bestlkl = ulkl[0];
1916         for (i = 1; i < numutrees; i++)
1917                 if (ulkl[i] > bestlkl) {
1918                         besttree = i;
1919                         bestlkl = ulkl[i];
1920                 }
1921         
1922         fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n");
1923         fprintf(ofp, "Tree   log L   difference     S.E.   Significantly worse\n");
1924         fprintf(ofp, "--------------------------------------------------------\n");
1925         for (i = 0; i < numutrees; i++) {
1926                 difflkl = ulkl[besttree]-ulkl[i];
1927                 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl);
1928                 if (i == besttree) {
1929                         fprintf(ofp, " <----------------- best tree");
1930                 } else {
1931                         /* compute variance of Log L differences over sites */
1932                         difflklps = difflkl/(double)Maxsite;
1933                         sum = 0.0;
1934                         for (j = 0; j < Numptrn; j++) {
1935                                 temp = allsites[besttree][j] - allsites[i][j] - difflklps;
1936                                 sum += temp*temp*Weight[j];
1937                         }
1938                         sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1939                         fprintf(ofp, "%11.2f         ", sum);
1940                         if (difflkl > 1.96*sum)
1941                                 fprintf(ofp, "yes");
1942                         else
1943                                 fprintf(ofp, "no");
1944                 }
1945                 fprintf(ofp, "\n");
1946         }
1947         fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1948         
1949         if (compclock) {
1950         
1951                 /* find best tree */
1952                 besttree = 0;
1953                 bestlkl = ulklc[0];
1954                 for (i = 1; i < numutrees; i++)
1955                         if (ulklc[i] > bestlkl) {
1956                                 besttree = i;
1957                                 bestlkl = ulklc[i];
1958                         }
1959         
1960                 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n");
1961                 fprintf(ofp, "Tree   log L   difference     S.E.   Significantly worse\n");
1962                 fprintf(ofp, "--------------------------------------------------------\n");
1963                 for (i = 0; i < numutrees; i++) {
1964                         difflkl = ulklc[besttree]-ulklc[i];
1965                         fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl);
1966                         if (i == besttree) {
1967                                 fprintf(ofp, " <----------------- best tree");
1968                         } else {
1969                                 /* compute variance of Log L differences over sites */
1970                                 difflklps = difflkl/(double)Maxsite;
1971                                 sum = 0.0;
1972                                 for (j = 0; j < Numptrn; j++) {
1973                                         temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps;
1974                                         sum += temp*temp*Weight[j];
1975                                 }
1976                                 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1977                                 fprintf(ofp, "%11.2f         ", sum);
1978                                 if (difflkl > 1.96*sum)
1979                                         fprintf(ofp, "yes");
1980                                 else
1981                                         fprintf(ofp, "no");
1982                         }
1983                         fprintf(ofp, "\n");
1984                 }
1985                 fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1986         }
1987 }
1988
1989 /* time stamp */
1990 void timestamp(FILE* ofp)
1991 {
1992         double timespan;
1993         double cpuspan;
1994         timespan = difftime(Stoptime, Starttime);
1995         cpuspan  = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC);
1996         fprintf(ofp, "\n\nTIME STAMP\n\n");
1997         fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) );
1998         fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
1999                 timespan, timespan/60., timespan/3600.);
2000         fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
2001                 fulltime, fulltime/60., fulltime/3600.);
2002 #ifdef TIMEDEBUG
2003         fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n",
2004                 fullcpu, fullcpu/60., fullcpu/3600.);
2005 #endif /* TIMEDEBUG */
2006
2007 }
2008
2009 /* extern int bestrfound; */
2010
2011 /* write output file */
2012 void writeoutputfile(FILE *ofp, int part)
2013 {
2014         int i, fail, df;
2015         uli li;
2016         double pval, delta;
2017
2018    if ((part == WRITEPARAMS) || (part == WRITEALL)) {
2019 #       ifndef ALPHA
2020                 fprintf(ofp, "TREE-PUZZLE %s\n\n", VERSION);
2021 #       else
2022                 fprintf(ofp, "TREE-PUZZLE %s%s\n\n", VERSION, ALPHA);
2023 #       endif
2024
2025         fprintf(ofp, "Input file name: %s\n",INFILE);
2026         if (puzzlemode == USERTREE) fprintf(ofp, "User tree file name: %s\n",INTREE);
2027
2028
2029         fprintf(ofp, "Type of analysis: ");
2030         if (typ_optn == TREERECON_OPTN) fprintf(ofp, "tree reconstruction\n");
2031         if (typ_optn == LIKMAPING_OPTN) fprintf(ofp, "likelihood mapping\n");
2032         fprintf(ofp, "Parameter estimation: ");
2033         if (approxp_optn) fprintf(ofp, "approximate (faster)\n");
2034         else fprintf(ofp, "accurate (slow)\n");
2035         if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) {
2036                 fprintf(ofp, "Parameter estimation uses: ");
2037                 if (qcalg_optn)
2038                         fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n");
2039                 else
2040                         fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n");
2041         } else {
2042                 fprintf(ofp, "Parameter estimation uses: ");
2043                 if (utree_optn)
2044                         fprintf(ofp, "1st user tree (for substitution process and rate variation)\n");
2045                 else if (qcalg_optn)
2046                         fprintf(ofp, "quartet sampling (for substitution process) + NJ tree (for rate variation)\n");
2047                 else
2048                         fprintf(ofp, "neighbor-joining tree (for substitution process and rate variation)\n");
2049         }
2050         fprintf(ofp, "\nStandard errors (S.E.) are obtained by the curvature method.\n");
2051         fprintf(ofp, "The upper and lower bounds of an approximate 95%% confidence interval\n");
2052         fprintf(ofp, "for parameter or branch length x are x-1.96*S.E. and x+1.96*S.E.\n");
2053         fprintf(ofp, "\n\n");
2054         fprintf(ofp, "SEQUENCE ALIGNMENT\n\n");
2055         fprintf(ofp, "Input data: %d sequences with %d ", Maxspc, Maxsite);
2056         if (data_optn == AMINOACID)
2057                 fprintf(ofp, "amino acid");
2058         else if (data_optn == NUCLEOTIDE && SH_optn)
2059                 fprintf(ofp, "doublet (%d nucleotide)", Maxsite*2);
2060         else if (data_optn == NUCLEOTIDE && nuc_optn)
2061                 fprintf(ofp, "nucleotide");
2062         else if (data_optn == BINARY)
2063                 fprintf(ofp, "binary state");
2064         fprintf(ofp, " sites");
2065         if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0  && !SH_optn) {
2066                 if (codon_optn == 1) fprintf(ofp, " (1st codon positions)");
2067                 if (codon_optn == 2) fprintf(ofp, " (2nd codon positions)");
2068                 if (codon_optn == 3) fprintf(ofp, " (3rd codon positions)");
2069                 if (codon_optn == 4) fprintf(ofp, " (1st and 2nd codon positions)");
2070         }
2071         if (data_optn == NUCLEOTIDE && SH_optn) {
2072                 if (SHcodon)
2073                         fprintf(ofp, " (1st and 2nd codon positions)");
2074                 else
2075                         fprintf(ofp, " (1st+2nd, 3rd+4th, etc. site)");
2076         }       
2077         fprintf(ofp, "\n");
2078         fprintf(ofp, "Number of constant sites: %d (= %.1f%% of all sites)\n",
2079                 Numconst, 100.0*fracconst);
2080         fprintf(ofp, "Number of site patterns: %d\n",
2081                 Numptrn);
2082         fprintf(ofp, "Number of constant site patterns: %d (= %.1f%% of all site patterns)\n\n\n",
2083                 Numconstpat, 100.0*fracconstpat);
2084         fprintf(ofp, "SUBSTITUTION PROCESS\n\n");
2085         fprintf(ofp, "Model of substitution: ");
2086         if (data_optn == NUCLEOTIDE) { /* nucleotides */
2087                 if (nuc_optn) {
2088                         if(HKY_optn) fprintf(ofp, "HKY (Hasegawa et al. 1985)\n");      
2089                         else fprintf(ofp, "TN (Tamura-Nei 1993)\n");
2090                         fprintf(ofp, "Transition/transversion parameter");
2091                         if (optim_optn)
2092                                 fprintf(ofp, " (estimated from data set)");
2093                         fprintf(ofp, ": %.2f", TSparam);
2094                         if (optim_optn)
2095                                 fprintf(ofp, " (S.E. %.2f)", tserr);
2096                         fprintf(ofp, "\n");
2097                         
2098                         if (optim_optn && TSparam > MAXTS - 1.0)
2099                                 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2100                         if (optim_optn && TSparam < MINTS + 0.1)
2101                                 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");                
2102                         
2103                         if (TN_optn) {
2104                                 fprintf(ofp, "Y/R transition parameter");
2105                                 if (optim_optn)
2106                                         fprintf(ofp, " (estimated from data set)");
2107                                 fprintf(ofp, ": %.2f", YRparam);
2108                                 if (optim_optn)
2109                                         fprintf(ofp, " (S.E. %.2f)", yrerr);
2110                                 fprintf(ofp, "\n");
2111                                 
2112                                 if (optim_optn && YRparam > MAXYR - 0.5)
2113                                         fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2114                                 if (optim_optn && YRparam < MINYR + 0.1)
2115                                         fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");                
2116
2117                         }
2118                 }
2119                 if (SH_optn) {
2120                         fprintf(ofp, "SH (Schoeniger-von Haeseler 1994)\n");
2121                         fprintf(ofp, "Transition/transversion parameter");
2122                         if (optim_optn) fprintf(ofp, " (estimated from data set)");
2123                         fprintf(ofp, ": %.2f\n", TSparam);
2124                         if (optim_optn)
2125                                 fprintf(ofp, " (S.E. %.2f)", tserr);
2126                         fprintf(ofp, "\n");
2127                                                 
2128                         if (optim_optn && TSparam > MAXTS - 1.0)
2129                                 fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2130                         if (optim_optn && TSparam < MINTS + 0.1)
2131                                 fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");                
2132
2133                 }       
2134         }
2135         if (data_optn == AMINOACID) { /* amino acids */
2136                 if (Dayhf_optn) fprintf(ofp, "Dayhoff (Dayhoff et al. 1978)\n");        
2137                 if (Jtt_optn) fprintf(ofp, "JTT (Jones et al. 1992)\n");
2138                 if (mtrev_optn) fprintf(ofp, "mtREV24 (Adachi-Hasegawa 1996)\n");
2139                 if (cprev_optn) fprintf(ofp, "cpREV45 (Adachi et al. 2000)\n");
2140                 if (blosum62_optn) fprintf(ofp, "BLOSUM 62 (Henikoff-Henikoff 1992)\n");
2141                 if (vtmv_optn) fprintf(ofp, "VT (Mueller-Vingron 2000)\n");
2142                 if (wag_optn) fprintf(ofp, "WAG (Whelan-Goldman 2000)\n");
2143         }
2144         if (data_optn == BINARY) { /* binary states */
2145                 fprintf(ofp, "Two-state model (Felsenstein 1981)\n");
2146         }
2147         if (data_optn == AMINOACID)
2148                         fprintf(ofp, "Amino acid ");
2149                 else if (data_optn == NUCLEOTIDE && SH_optn)
2150                         fprintf(ofp, "Doublet ");
2151                 else if (data_optn == NUCLEOTIDE && nuc_optn)
2152                         fprintf(ofp, "Nucleotide ");
2153                 else if (data_optn == BINARY)
2154                         fprintf(ofp, "Binary state ");
2155         fprintf(ofp, "frequencies (");
2156         if (Frequ_optn) fprintf(ofp, "estimated from data set");
2157         else fprintf(ofp, "user specified");
2158         if (data_optn == NUCLEOTIDE && SH_optn && sym_optn)
2159                 fprintf(ofp, " and symmetrized");
2160         fprintf(ofp, "):\n\n");
2161         for (i = 0; i < gettpmradix(); i++)
2162                 fprintf(ofp, " pi(%s) = %5.1f%%\n",
2163                         int2code(i), Freqtpm[i]*100);
2164         if (data_optn == NUCLEOTIDE) {
2165                 fprintf(ofp, "\nExpected transition/transversion ratio: %.2f",
2166                         tstvratio);
2167                 if (tstvf84 == 0.0) fprintf(ofp, "\n");
2168                 else fprintf(ofp, " (= F84 parameter)\n");
2169                 fprintf(ofp, "Expected pyrimidine transition/purine transition");
2170                 fprintf(ofp, " ratio: %.2f\n", yrtsratio);
2171                 if (tstvf84 != 0.0 && TN_optn)
2172                         fprintf(ofp,
2173                                 "This TN model is equivalent to a F84 model (Felsenstein 1984).\n");
2174         }
2175         fprintf(ofp, "\n\nRATE HETEROGENEITY\n\n");
2176         fprintf(ofp, "Model of rate heterogeneity: ");
2177         if (rhetmode == UNIFORMRATE) fprintf(ofp, "uniform rate\n");
2178         if (rhetmode == GAMMARATE  ) fprintf(ofp, "Gamma distributed rates\n");
2179         if (rhetmode == TWORATE    ) fprintf(ofp, "two rates (1 invariable + 1 variable)\n");
2180         if (rhetmode == MIXEDRATE  ) fprintf(ofp, "mixed (1 invariable + %d Gamma rates)\n", numcats);
2181         if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
2182                 fprintf(ofp, "Fraction of invariable sites");
2183                 if (fracinv_optim) fprintf(ofp, " (estimated from data set)");
2184                 fprintf(ofp, ": %.2f", fracinv);
2185                 if (fracinv_optim) fprintf(ofp, " (S.E. %.2f)", fierr);
2186                 fprintf(ofp, "\n");
2187                         
2188                 if (fracinv_optim && fracinv > MAXFI - 0.05)
2189                         fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2190                 
2191                 fprintf(ofp, "Number of invariable sites: %.0f\n", floor(fracinv*Maxsite));
2192         }
2193         if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
2194                 fprintf(ofp, "Gamma distribution parameter alpha");
2195                 if (grate_optim) fprintf(ofp, " (estimated from data set)");
2196                 fprintf(ofp, ": %.2f", (1.0-Geta)/Geta);
2197                 if (grate_optim) fprintf(ofp, " (S.E. %.2f)",
2198                         geerr/(Geta*Geta)); /* first order approximation */
2199                 fprintf(ofp, "\n");
2200                 
2201                 if (grate_optim && Geta > MAXGE - 0.02)
2202                         fprintf(ofp, "WARNING --- parameter estimate close to internal upper bound!\n");
2203                 if (grate_optim && Geta < MINGE + 0.01)
2204                         fprintf(ofp, "WARNING --- parameter estimate close to internal lower bound!\n");                
2205
2206                 fprintf(ofp, "Number of Gamma rate categories: %d\n", numcats);
2207         }
2208         if (rhetmode == MIXEDRATE) {
2209                 fprintf(ofp, "Total rate heterogeneity (invariable sites + Gamma model): ");
2210                 fprintf(ofp, "%.2f", fracinv + Geta - fracinv*Geta);
2211                 if (grate_optim && fracinv_optim)
2212                         fprintf(ofp, " (S.E. %.2f)", geerr + fierr); /* first order approximation */
2213                 else if (grate_optim && !fracinv_optim)
2214                         fprintf(ofp, " (S.E. %.2f)", geerr);
2215                 else if (!grate_optim && fracinv_optim)
2216                         fprintf(ofp, " (S.E. %.2f)", fierr);
2217                 fprintf(ofp, "\n");
2218         }
2219         if (rhetmode != UNIFORMRATE) {
2220                 fprintf(ofp, "\nRates and their respective probabilities used in the likelihood function:\n");
2221                 fprintf(ofp, "\n Category  Relative rate  Probability\n");
2222                 if (rhetmode == TWORATE || rhetmode == MIXEDRATE)
2223                         fprintf(ofp, "  0         0.0000         %.4f\n", fracinv);
2224                 for (i = 0; i < numcats; i++)
2225                         fprintf(ofp, "  %d         %.4f         %.4f\n",
2226                                 i+1, Rates[i], (1.0-fracinv)/(double) numcats); 
2227         }
2228         if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
2229                 fprintf(ofp, "\nCategories 1-%d approximate a continous ", numcats);
2230                 fprintf(ofp, "Gamma-distribution with expectation 1\n"); 
2231                 fprintf(ofp, "and variance ");
2232                 if (Geta == 1.0) fprintf(ofp, "infinity");
2233                 else fprintf(ofp, "%.2f", Geta/(1.0-Geta));
2234                 fprintf(ofp, ".\n");
2235         }
2236
2237         if (typ_optn == TREERECON_OPTN && (puzzlemode == QUARTPUZ || puzzlemode == USERTREE))
2238                 if (rhetmode != UNIFORMRATE) {
2239                         fprintf(ofp, "\nCombination of categories that contributes");
2240                         fprintf(ofp, " the most to the likelihood\n");
2241                         fprintf(ofp, "(computation done without clock assumption assuming ");
2242                         if (puzzlemode == QUARTPUZ) fprintf(ofp, "quartet-puzzling tree");
2243                         if (puzzlemode == USERTREE) {
2244                                 if (utree_optn) fprintf(ofp, "1st user tree");
2245                                 else            fprintf(ofp, "NJ tree");
2246                         }
2247                         fprintf(ofp, "):\n\n");
2248                         if (bestratefound==0) findbestratecombination();
2249                         printbestratecombination(ofp);
2250                 }
2251                 
2252         fprintf(ofp, "\n\nSEQUENCE COMPOSITION (SEQUENCES IN INPUT ORDER)\n\n");
2253         fail = FALSE;
2254         fprintf(ofp, "              5%% chi-square test  p-value\n");
2255         for (i = 0; i < Maxspc; i++) {
2256                 fprintf(ofp, " ");
2257                 fputid10(ofp, i);
2258                 pval = homogentest(i);
2259                 if ( pval < 0.05 ) fprintf(ofp, "        failed       ");
2260                 else fprintf(ofp, "        passed       ");
2261                 if (chi2fail) fail = TRUE;
2262                 fprintf(ofp, "  %6.2f%%  ", pval*100.0);
2263                 fprintf(ofp, "\n");     
2264         }
2265         fprintf(ofp, "\n");
2266         fprintf(ofp, "The chi-square tests compares the ");
2267         if (data_optn == AMINOACID)
2268                 fprintf(ofp, "amino acid");
2269         else if (data_optn == NUCLEOTIDE && SH_optn)
2270                 fprintf(ofp, "doublet");
2271         else if (data_optn == NUCLEOTIDE && nuc_optn)
2272                 fprintf(ofp, "nucleotide");
2273         else if (data_optn == BINARY)
2274                 fprintf(ofp, "binary state");
2275         fprintf(ofp," composition of each sequence\n");
2276         fprintf(ofp, "to the frequency distribution assumed in the maximum likelihood model.\n");       
2277         if (fail) {
2278                 fprintf(ofp, "\nWARNING: Result of chi-square test may not be valid");
2279                 fprintf(ofp, " because of small\nmaximum likelihood frequencies and");
2280                 fprintf(ofp, " short sequence length!\n");
2281         }
2282         fprintf(ofp, "\n\nIDENTICAL SEQUENCES\n\n");
2283         fprintf(ofp, "The sequences in each of the following groups are all identical. To speed\n");
2284         fprintf(ofp, "up computation please remove all but one of each group from the data set.\n\n");
2285         findidenticals(ofp);
2286         fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD DISTANCES\n\n");
2287         fprintf(ofp, "Maximum likelihood distances are computed using the ");
2288         fprintf(ofp, "selected model of\nsubstitution and rate heterogeneity.\n\n");
2289         putdistance(ofp);
2290         fprintf(ofp, "\nAverage distance (over all possible pairs of sequences):  %.5f\n",
2291                 averagedist() / 100.0);
2292
2293
2294    } /* if WRITEPARAMS) || WRITEALL */
2295
2296    if ((part == WRITEREST) || (part == WRITEALL)) {
2297
2298         if (puzzlemode == QUARTPUZ &&typ_optn == TREERECON_OPTN) {
2299                 fprintf(ofp, "\n\nBAD QUARTET STATISTICS (SEQUENCES IN INPUT ORDER)\n\n");
2300                 for (i = 0; i < Maxspc; i++) {
2301                         fprintf(ofp, " ");
2302                         fputid10(ofp, i);
2303                         if (badqs > 0)
2304                            fprintf(ofp, "  [%lu]   %6.2f%%\n", badtaxon[i], (double) (100 * badtaxon[i]) / (double) badqs);
2305                         else
2306                            fprintf(ofp, "  [%lu]\n", badtaxon[i]);
2307                 }
2308                 fprintf(ofp, "\nThe number in square brackets indicates how often each sequence is\n");
2309                 fprintf(ofp, "involved in one of the %lu completely unresolved quartets of the\n", badqs);
2310                 fprintf(ofp, "quartet puzzling tree search.\n");
2311                 if (badqs > 0)
2312                    fprintf(ofp, "Additionally the according percentages are given.\n");
2313         }
2314
2315         if (typ_optn == TREERECON_OPTN) {
2316         
2317                 fprintf(ofp, "\n\nTREE SEARCH\n\n");
2318                 if (puzzlemode == QUARTPUZ) {
2319                         fprintf(ofp, "Quartet puzzling is used to choose from the possible tree topologies\n");
2320                         fprintf(ofp, "and to simultaneously infer support values for internal branches.\n\n");
2321                         fprintf(ofp, "Number of puzzling steps: %lu\n", Numtrial);
2322                         fprintf(ofp, "Analysed quartets: %lu\n", Numquartets);
2323                         fprintf(ofp, "Unresolved quartets: %lu (= %.1f%%)\n",
2324                                 badqs, (double) badqs / (double) Numquartets * 100);    
2325                         fprintf(ofp, "\nQuartet trees are based on %s maximum likelihood values\n",
2326                                 (approxqp ? "approximate" : "exact"));
2327                         fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n");
2328                 }
2329                 if (puzzlemode == USERTREE) {
2330                         fprintf(ofp, "%d tree topologies were specified by the user.\n", numutrees);            
2331                 }
2332                 if (puzzlemode == PAIRDIST) {
2333                         fprintf(ofp, "No tree search performed (maximum likelihood distances only).\n");
2334                 }
2335
2336                 if (puzzlemode == QUARTPUZ) {
2337                         fprintf(ofp, "QUARTET PUZZLING TREE\n\n");
2338                         fprintf(ofp, "Support for the internal branches of the unrooted quartet puzzling\n");
2339                         fprintf(ofp, "tree topology is shown in percent.\n");
2340                         if (consincluded == Maxspc - 3)
2341                                 fprintf(ofp,"\nThis quartet puzzling tree is completely resolved.\n");
2342                         else
2343                                 fprintf(ofp,"\nThis quartet puzzling tree is not completely resolved!\n");
2344                         fprintf(ofp, "\n\n");
2345                         plotconsensustree(ofp);
2346                         fprintf(ofp, "\n\nQuartet puzzling tree (in CLUSTAL W notation):\n\n");
2347                         writeconsensustree(ofp);
2348                         fprintf(ofp, "\n\nBIPARTITIONS\n\n");
2349                         fprintf(ofp, "The following bipartitions occured at least once");
2350                         fprintf(ofp, " in all intermediate\ntrees that have been generated ");
2351                         fprintf(ofp, "in the %lu puzzling steps:\n\n", Numtrial);
2352                         fprintf(ofp, "Bipartitions included in the quartet puzzling tree:\n");
2353                         fprintf(ofp,
2354                                 "(bipartition with sequences in input order : number of times seen)\n\n");
2355                         for (li = 0; li < consincluded; li++) {
2356                                 fprintf(ofp, " ");
2357                                 printsplit(ofp, splitfreqs[2*li+1]);
2358                                 fprintf(ofp, "  :  %lu\n", splitfreqs[2*li]);
2359                         }
2360                         if (consincluded == 0) fprintf(ofp, " None (no bipartition included)\n");
2361                         fprintf(ofp, "\nBipartitions not included in the quartet puzzling tree:\n");
2362                         fprintf(ofp,
2363                                 "(bipartition with sequences in input order : number of times seen)\n\n");
2364                                                 
2365                         if (consincluded == numbiparts) {
2366                                 fprintf(ofp, " None (all bipartitions are included)\n");
2367                         } else {
2368                                 /* print first 20 bipartions not included */                            
2369                                 for (li = consincluded; (li < numbiparts) && (li < consincluded + 20UL); li++) {
2370                                         fprintf(ofp, " ");
2371                                         printsplit(ofp, splitfreqs[2*li+1]);
2372                                         fprintf(ofp, "  :  %lu\n", splitfreqs[2*li]);
2373                                 }
2374                                 if ((li == consincluded + 20UL) && (li != numbiparts)) 
2375                                         fprintf(ofp, "\n(%lu other less frequent bipartitions not shown)\n",
2376                                                 numbiparts - consincluded - 20UL);                      
2377                         }       
2378                         fprintfsortedpstrees(ofp, psteptreelist, psteptreenum, psteptreesum, 0, 5.0);
2379                 }
2380         
2381                 if (puzzlemode == QUARTPUZ) {
2382                         fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS ON QUARTET");
2383                         fprintf(ofp, " PUZZLING TREE (NO CLOCK)\n\nBranch lengths are computed using");
2384                         fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
2385                         clockmode = 0; /* nonclocklike branch lengths */
2386                         prtopology(ofp);
2387                         fprintf(ofp, "\n");
2388                         resulttree(ofp);
2389                         fprintf(ofp, "\n\nQuartet puzzling tree with maximum likelihood branch lengths");
2390                         fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
2391                         fputphylogeny(ofp);                     
2392                         if (compclock) {
2393                                 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF QUARTET");
2394                                 fprintf(ofp, " PUZZLING TREE (WITH CLOCK)\n\nBranch lengths are computed using");
2395                                 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n");
2396                                 fprintf(ofp, "\nRoot located at branch: %d  ", locroot+1);
2397                                 if (rootsearch == 0) fprintf(ofp, "(user specified)\n\n\n");
2398                                 if (rootsearch == 1) {
2399                                         fprintf(ofp, "(automatic search)");
2400                                         if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot);
2401                                         fprintf(ofp, "\n\n");   
2402                                         fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n");
2403                                         fprintf(ofp, "(rename \"outtree\" to \"intree\") and select location of root manually!");
2404                                         fprintf(ofp, "\n\n\n");
2405                                 }
2406                                 if (rootsearch == 2) fprintf(ofp, "(displayed outgroup)\n\n\n");
2407                                 clockmode = 1; /* clocklike branch lengths */
2408                                 prtopology(ofp);
2409                                 fprintf(ofp, "\n");
2410                                 fprintf(ofp, "\nTree drawn as unrooted tree for better ");
2411                                 fprintf(ofp, "comparison with non-clock tree!\n");
2412                                 resulttree(ofp);
2413                                 fprintf(ofp, "\n");
2414                                 resultheights(ofp);
2415                                 fprintf(ofp, "\n\nRooted quartet puzzling tree with clocklike");
2416                                 fprintf(ofp, " maximum likelihood branch lengths\n");
2417                                 fprintf(ofp, "(in CLUSTAL W notation):\n\n");
2418                                 fputrooted(ofp, locroot);
2419                         }
2420                         
2421                         if (compclock) {
2422                                 fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST\n\n");
2423                                 fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n",
2424                                         Ctree->lklhd, Numspc + Numibrnch);
2425                                 fprintf(ofp, "log L with clock:    %.2f (independent branch parameters: %d)\n\n",
2426                                         Ctree->lklhdc, Numhts + 1);
2427                                 delta = 2.0*((Ctree->lklhd) - (Ctree->lklhdc));
2428                                 fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta);   
2429                                 df = Numspc + Numibrnch - Numhts - 1;
2430                                 fprintf(ofp, "Degress of freedom of chi-square distribution: %d\n", df);
2431                                 
2432                                 pval = IncompleteGammaQ(df*0.5, delta*0.5);
2433                                 
2434                                 fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0);
2435                                 if (pval >= 0.05) {
2436                                         fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n");
2437                                         fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n");
2438                                         fprintf(ofp, "is not significantly increased.\n");
2439                                 } else {
2440                                         fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n");
2441                                         fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n");
2442                                         fprintf(ofp, "significantly increased.\n");
2443                                 }
2444                                 fprintf(ofp, "\nPlease take care that the correct root is used!\n");
2445                         }
2446                                 
2447                 }
2448         }
2449         
2450         if (typ_optn == LIKMAPING_OPTN) {
2451         
2452                         fprintf(ofp, "\n\nLIKELIHOOD MAPPING ANALYSIS\n\n");
2453                         fprintf(ofp, "Number of quartets: %lu", Numquartets);
2454                         if (lmqts == 0) fprintf(ofp, " (all possible)\n");
2455                         else fprintf(ofp, " (random choice)\n");
2456                         fprintf(ofp, "\nQuartet trees are based on approximate maximum likelihood values\n");
2457                         fprintf(ofp, "using the selected model of substitution and rate heterogeneity.\n\n\n");
2458                         if (numclust == 1) {
2459                                 fprintf(ofp, "Sequences are not grouped in clusters.\n");
2460                         } else {
2461                                 fprintf(ofp, "Sequences are grouped in %d clusters.\n", numclust);
2462                                 fprintf(ofp, "\nCluster a: %d sequences\n\n", clustA);
2463                                 for (i = 0; i < clustA; i++) {
2464                                         fprintf(ofp, " ");
2465                                         fputid(ofp, clusterA[i]);
2466                                         fprintf(ofp, "\n");
2467                                 }
2468                                 fprintf(ofp, "\nCluster b: %d sequences\n\n", clustB);
2469                                 for (i = 0; i < clustB; i++) {
2470                                         fprintf(ofp, " ");
2471                                         fputid(ofp, clusterB[i]);
2472                                         fprintf(ofp, "\n");
2473                                 }
2474                                 if (numclust > 2) {
2475                                         fprintf(ofp, "\nCluster c: %d sequences\n\n", clustC);
2476                                         for (i = 0; i < clustC; i++) {
2477                                                 fprintf(ofp, " ");
2478                                                 fputid(ofp, clusterC[i]);
2479                                                 fprintf(ofp, "\n");
2480                                         }
2481                                 }
2482                                 if (numclust == 4) {
2483                                         fprintf(ofp, "\nCluster d: %d sequences\n\n", clustD);
2484                                         for (i = 0; i < clustD; i++) {
2485                                                 fprintf(ofp, " ");
2486                                                 fputid(ofp, clusterD[i]);
2487                                                 fprintf(ofp, "\n");
2488                                         }
2489                                 }
2490                                 fprintf(ofp, "\nQuartets of sequences used in the likelihood");
2491                                 fprintf(ofp, " mapping analysis are generated\n");
2492                                 if (numclust == 2)
2493                                         fprintf(ofp, "by drawing two sequences from cluster a and two from cluster b.");
2494                                 if (numclust == 3)
2495                                         fprintf(ofp, "by drawing one sequence from clusters a and b and two from cluster c.");
2496                                 if (numclust == 4)
2497                                         fprintf(ofp, "by drawing one sequence from each of the clusters a, b, c, and d.");
2498                         }
2499
2500                         fprintf(ofp, "\n\nLIKELIHOOD MAPPING STATISTICS\n\n");
2501                         fprintf(ofp, "Occupancies of the three areas 1, 2, 3:\n\n");
2502                         if (numclust == 4)
2503                                 fprintf(ofp, "                    (a,b)-(c,d)\n");
2504                         if (numclust == 3)
2505                                 fprintf(ofp, "                    (a,b)-(c,c)\n");
2506                         if (numclust == 2)
2507                                 fprintf(ofp, "                    (a,a)-(b,b)\n");
2508                         fprintf(ofp, "                        /\\\n");
2509                         fprintf(ofp, "                       /  \\\n");
2510                         fprintf(ofp, "                      /    \\\n");
2511                         fprintf(ofp, "                     /   1  \\\n");
2512                         fprintf(ofp, "                    / \\    / \\\n");
2513                         fprintf(ofp, "                   /   \\  /   \\\n");
2514                         fprintf(ofp, "                  /     \\/     \\\n");
2515                         fprintf(ofp, "                 /  3    :   2  \\\n");
2516                         fprintf(ofp, "                /        :       \\\n");
2517                         fprintf(ofp, "               /__________________\\\n");
2518                         if (numclust == 4)
2519                                 fprintf(ofp, "     (a,d)-(b,c)                  (a,c)-(b,d)\n");
2520                         if (numclust == 3)
2521                                 fprintf(ofp, "     (a,c)-(b,c)                  (a,c)-(b,c)\n");
2522                         if (numclust == 2)
2523                                 fprintf(ofp, "     (a,b)-(a,b)                  (a,b)-(a,b)\n");
2524                         fprintf(ofp, "\n");
2525                         fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%)\n",
2526                                 ar1, (double) ar1*100.0/Numquartets);
2527                         fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%)\n",
2528                                 ar2, (double) ar2*100.0/Numquartets);
2529                         fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%)\n\n",
2530                                 ar3, (double) ar3*100.0/Numquartets);
2531                         fprintf(ofp, "Occupancies of the seven areas 1, 2, 3, 4, 5, 6, 7:\n\n");
2532                         if (numclust == 4)
2533                                 fprintf(ofp, "                    (a,b)-(c,d)\n");
2534                         if (numclust == 3)
2535                                 fprintf(ofp, "                    (a,b)-(c,c)\n");
2536                         if (numclust == 2)
2537                                 fprintf(ofp, "                    (a,a)-(b,b)\n");
2538                         fprintf(ofp, "                        /\\\n");
2539                         fprintf(ofp, "                       /  \\\n");
2540                         fprintf(ofp, "                      /  1 \\\n");
2541                         fprintf(ofp, "                     / \\  / \\\n");
2542                         fprintf(ofp, "                    /   /\\   \\\n");
2543                         fprintf(ofp, "                   / 6 /  \\ 4 \\\n");
2544                         fprintf(ofp, "                  /   /  7 \\   \\\n");
2545                         fprintf(ofp, "                 / \\ /______\\ / \\\n");
2546                         fprintf(ofp, "                / 3  :   5  :  2 \\\n");
2547                         fprintf(ofp, "               /__________________\\\n");
2548                         if (numclust == 4)
2549                                 fprintf(ofp, "     (a,d)-(b,c)                  (a,c)-(b,d)\n");
2550                         if (numclust == 3)
2551                                 fprintf(ofp, "     (a,c)-(b,c)                  (a,c)-(b,c)\n");
2552                         if (numclust == 2)
2553                                 fprintf(ofp, "     (a,b)-(a,b)                  (a,b)-(a,b)\n");
2554                         fprintf(ofp, "\n");
2555                         fprintf(ofp, "Number of quartets in region 1: %lu (= %.1f%%)    left:   %lu   right: %lu\n",
2556                                 reg1, (double) reg1*100.0/Numquartets, reg1l, reg1r);
2557                         fprintf(ofp, "Number of quartets in region 2: %lu (= %.1f%%)    bottom: %lu   top:   %lu\n",
2558                                 reg2, (double) reg2*100.0/Numquartets, reg2d, reg2u);
2559                         fprintf(ofp, "Number of quartets in region 3: %lu (= %.1f%%)    bottom: %lu   top:   %lu\n",
2560                                 reg3, (double) reg3*100.0/Numquartets, reg3d, reg3u);
2561                         fprintf(ofp, "Number of quartets in region 4: %lu (= %.1f%%)    bottom: %lu   top:   %lu\n",
2562                                 reg4, (double) reg4*100.0/Numquartets, reg4d, reg4u);
2563                         fprintf(ofp, "Number of quartets in region 5: %lu (= %.1f%%)    left:   %lu   right: %lu\n",
2564                                 reg5, (double) reg5*100.0/Numquartets, reg5l, reg5r);
2565                         fprintf(ofp, "Number of quartets in region 6: %lu (= %.1f%%)    bottom: %lu   top:   %lu\n",
2566                                 reg6, (double) reg6*100.0/Numquartets, reg6d, reg6u);
2567                         fprintf(ofp, "Number of quartets in region 7: %lu (= %.1f%%)\n",
2568                                 reg7, (double) reg7*100.0/Numquartets);
2569         }
2570     
2571    } /* if WRITEREST) || WRITEALL */
2572 }
2573
2574
2575 #if PARALLEL
2576 void writetimesstat(FILE *ofp)
2577 {
2578         int n;
2579         double cpusum = 0.0;
2580         double wallmax = 0.0;
2581         cputimes[0]      = ((double)(cputimestop - cputimestart) / CLOCKS_PER_SEC);
2582         walltimes[0]     = difftime(walltimestop, walltimestart);
2583         fullcpu          = tarr.fullcpu;
2584         fulltime         = tarr.fulltime;
2585         fullcputimes[0]  = tarr.fullcpu;
2586         fullwalltimes[0] = tarr.fulltime;
2587         altcputimes[0]   = tarr.cpu;
2588         altwalltimes[0]  = tarr.time;
2589         fprintf(ofp, "\n\n\nPARALLEL LOAD STATISTICS\n\n");
2590         
2591         fprintf(ofp, "The analysis was performed with %d parallel processes (1 master and \n", PP_NumProcs);
2592         fprintf(ofp, "%d worker processes).\n\n", PP_NumProcs-1);
2593         fprintf(ofp, "The following table the distribution of computation to the processes.\n");
2594         fprintf(ofp, "The first column gives the process number, where 0 is the master process.\n");
2595         fprintf(ofp, "The second and third column show the number of quartets computed (3 topologies \n");
2596         fprintf(ofp, "each) and the the number of scheduling blocks the came in. The last two columns \n");
2597         fprintf(ofp, "state the number of puzzling steps done by a process and number of scheduling \n");
2598         fprintf(ofp, "blocks.\n\n");
2599         fprintf(ofp, "process  #quartets #chunks  #puzzlings #chunks \n");
2600         fprintf(ofp, "-----------------------------------------------\n");
2601         for (n=0; n<PP_NumProcs; n++) {
2602                 fprintf(ofp, "%6d  %9d %7d  %10d %7d \n", n,
2603                         quartsent[n], quartsentn[n],
2604                         splitsent[n], splitsentn[n]);
2605         } /* for */
2606         fprintf(ofp, "-----------------------------------------------\n");
2607         fprintf(ofp, " Sums:  %9d %7d  %10d %7d \n",
2608                 PP_quartrecved, PP_quartrecvedn,
2609                 PP_splitrecved, PP_splitrecvedn);
2610
2611 #ifdef TIMEDEBUG
2612         fprintf(ofp, "\n\nBelow the distribution of computing times (CPU and wallclock) per host is shown.\n");
2613         fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n");
2614         fprintf(ofp, "sum of CPU times and the maximum wallclock time is shown.\n\n");
2615         fprintf(ofp, "process   CPU-time[s]     [min]   [hours] | wallclock[s]     [min]   [hours] \n");
2616         fprintf(ofp, "----------------------------------------------------------------------------\n");
2617         for (n=0; n<PP_NumProcs; n++) {
2618
2619 #               ifdef TIMEDEBUG
2620                 fprintf(ofp, "%6d   %11.1f %9.1f %9.1f  | %11.1f %9.1f %9.1f\n", n,
2621                         cputimes[n],  cputimes[n] /60, cputimes[n] /3600,
2622                         walltimes[n], walltimes[n]/60, walltimes[n]/3600);
2623 #               endif /* TIMEDEBUG */
2624
2625                 fprintf(ofp, "%6d   %11.1f %9.1f %9.1f  | %11.1f %9.1f %9.1f\n", n,
2626                         fullcputimes[n],  fullcputimes[n] /60, fullcputimes[n] /3600,
2627                         fullwalltimes[n], fullwalltimes[n]/60, fullwalltimes[n]/3600);
2628
2629 #               ifdef TIMEDEBUG
2630                 fprintf(ofp, "%6d   %11.1f %9.1f %9.1f  | %11.1f %9.1f %9.1f alt\n", n,
2631                         altcputimes[n],  altcputimes[n] /60, altcputimes[n] /3600,
2632                         altwalltimes[n], altwalltimes[n]/60, altwalltimes[n]/3600);
2633 #               endif /* TIMEDEBUG */
2634
2635                 if (fullwalltimes[n] > wallmax) wallmax=fullwalltimes[n];
2636                 cpusum += fullcputimes[n];
2637         } /* for */
2638         fprintf(ofp, "----------------------------------------------------------------------------\n");
2639         fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f  | %11.1f %9.1f %9.1f \n",
2640                 cpusum, cpusum/60, cpusum/3600, wallmax, wallmax/60, wallmax/3600);
2641 #else /* TIMEDEBUG */
2642         fprintf(ofp, "\n\nBelow the distribution of computing times (wallclock) per host is shown.\n");
2643         fprintf(ofp, "The times are shown in seconds, minutes, and hours. At the bottom of the table the\n");
2644         fprintf(ofp, "the maximum wallclock times is shown.\n\n");
2645         fprintf(ofp, "process   wallclock[s]     [min]   [hours] \n");
2646         fprintf(ofp, "----------------------------------------------------------------------------\n");
2647         for (n=0; n<PP_NumProcs; n++) {
2648
2649 #               ifdef TIMEDEBUG
2650                 fprintf(ofp, "%6d   %11.1f %9.1f %9.1f\n", n,
2651                         walltimes[n], walltimes[n]/60, walltimes[n]/3600);
2652 #               endif /* TIMEDEBUG */
2653
2654                 fprintf(ofp, "%6d   %11.1f %9.1f %9.1f\n", n,
2655                         fullwalltimes[n], fullwalltimes[n]/60, fullwalltimes[n]/3600);
2656
2657 #               ifdef TIMEDEBUG
2658                 fprintf(ofp, "%6d   %11.1f %9.1f %9.1f alt\n", n,
2659                         altwalltimes[n], altwalltimes[n]/60, altwalltimes[n]/3600);
2660 #               endif /* TIMEDEBUG */
2661
2662                 if (fullwalltimes[n] > wallmax) wallmax=fullwalltimes[n];
2663                 cpusum += fullcputimes[n];
2664         } /* for */
2665         fprintf(ofp, "----------------------------------------------------------------------------\n");
2666         fprintf(ofp, "Sum/Max: %11.1f %9.1f %9.1f \n",
2667                 wallmax, wallmax/60, wallmax/3600);
2668 #endif /* TIMEDEBUG */
2669
2670         fullcpu          = cpusum;
2671         fulltime         = wallmax;
2672
2673 } /* writetimesstat */
2674 #endif
2675
2676
2677 /* write current user tree to file */
2678 void writecutree(FILE *ofp, int num)
2679 {
2680         int df;
2681         double pval, delta;
2682
2683
2684         if (typ_optn == TREERECON_OPTN) {
2685         
2686                 if (puzzlemode == USERTREE) {
2687                         fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER");
2688                         fprintf(ofp, " DEFINED TREE # %d (NO CLOCK)\n\nBranch lengths are computed using", num);
2689                         fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n\n\n");
2690                         clockmode = 0; /* nonclocklike branch lengths */
2691                         prtopology(ofp);
2692                         fprintf(ofp, "\n");
2693                         resulttree(ofp);
2694                         fprintf(ofp, "\n\nUnrooted user defined tree with maximum likelihood branch lengths");
2695                         fprintf(ofp, "\n(in CLUSTAL W notation):\n\n");
2696                         fputphylogeny(ofp);
2697                         if (compclock) {
2698                                 fprintf(ofp, "\n\nMAXIMUM LIKELIHOOD BRANCH LENGTHS OF USER");
2699                                 fprintf(ofp, " DEFINED TREE # %d (WITH CLOCK)\n\nBranch lengths are computed using", num);
2700                                 fprintf(ofp, " the selected model of\nsubstitution and rate heterogeneity.\n");
2701                                 fprintf(ofp, "\nRoot located at branch: %d  ", locroot+1);
2702                                 if (rootsearch == 0) fprintf(ofp, "(user specified)\n\n\n");
2703                                 if (rootsearch == 1) {
2704                                         fprintf(ofp, "(automatic search)");
2705                                         if (numbestroot > 1) fprintf(ofp, "- WARNING: %d best locations found! -", numbestroot);
2706                                         fprintf(ofp, "\n\n");
2707                                         fprintf(ofp, "If the automatic search misplaces the root please rerun the analysis\n");
2708                                         fprintf(ofp, "and select location of root manually!");
2709                                         fprintf(ofp, "\n\n\n");
2710
2711                                 }
2712                                 if (rootsearch == 2) fprintf(ofp, "(displayed outgroup)\n\n\n");
2713                                 clockmode = 1; /* clocklike branch lengths */
2714                                 prtopology(ofp);
2715                                 fprintf(ofp, "\n");
2716                                 resulttree(ofp);
2717                                 fprintf(ofp, "\n");
2718                                 resultheights(ofp);
2719                                 fprintf(ofp, "\n\nRooted user defined tree with clocklike ");
2720                                 fprintf(ofp, "maximum likelihood branch lengths\n");
2721                                 fprintf(ofp, "(in CLUSTAL W notation):\n\n");
2722                                 fputrooted(ofp, locroot);
2723                         }
2724                         
2725                         if (compclock) {
2726                                 fprintf(ofp, "\n\nMOLECULAR CLOCK LIKELIHOOD RATIO TEST FOR USER TREE # %d\n\n", num);
2727                                 fprintf(ofp, "log L without clock: %.2f (independent branch parameters: %d)\n",
2728                                         Ctree->lklhd, Numspc + Numibrnch);
2729                                 fprintf(ofp, "log L with clock:    %.2f (independent branch parameters: %d)\n\n",
2730                                         Ctree->lklhdc, Numhts + 1);
2731                                 delta = 2.0*((Ctree->lklhd) - (Ctree->lklhdc));
2732                                 fprintf(ofp, "Likelihood ratio test statistic delta: %.2f\n", delta);   
2733                                 df = Numspc + Numibrnch - Numhts - 1;
2734                                 fprintf(ofp, "Degrees of freedom of chi-square distribution: %d\n", df);
2735                                 
2736                                 pval = IncompleteGammaQ (df*0.5, delta*0.5);
2737                                 
2738                                 fprintf(ofp, "Critical significance level: %.2f%%\n\n", pval*100.0);
2739                                 if (pval >= 0.05) {
2740                                         fprintf(ofp, "The simpler (clocklike) tree can not be rejected on a significance\n");
2741                                         fprintf(ofp, "level of 5%%. The log-likelihood of the more complex (no clock) tree\n");
2742                                         fprintf(ofp, "is not significantly increased.\n");
2743                                 } else {
2744                                         fprintf(ofp, "The simpler (clocklike) tree is rejected on a significance level\n");
2745                                         fprintf(ofp, "of 5%%. The log-likelihood of the more complex (no clock) tree is\n");
2746                                         fprintf(ofp, "significantly increased.\n");
2747                                 }
2748                                 fprintf(ofp, "\nPlease take care that the correct root is used!\n");
2749                         }                       
2750                 }       
2751         }
2752 }
2753
2754
2755 /******************************************************************************/
2756 /* timer routines                                                             */
2757 /******************************************************************************/
2758
2759 /* start timer */
2760 void starttimer()
2761 {
2762         time(&time0);
2763         time1 = time0;
2764 }
2765
2766 /* check remaining time and print message if necessary */
2767 void checktimer(uli numqts)
2768 {
2769         double tc2, mintogo, minutes, hours;
2770
2771         time(&time2);
2772         if ( (time2 - time1) > 900) { /* generate message every 15 minutes */
2773                 /* every 900 seconds */
2774                 /* percentage of completed quartets */
2775                 if (mflag == 0) {
2776                         mflag = 1;
2777                         FPRINTF(STDOUTFILE "\n");
2778                 }
2779                 tc2 = 100.*numqts/Numquartets;
2780                 mintogo = (100.0-tc2) *
2781                         (double) (time2-time0)/60.0/tc2;
2782                 hours = floor(mintogo/60.0);
2783                 minutes = mintogo - 60.0*hours;
2784                 FPRINTF(STDOUTFILE "%.2f%%", tc2);
2785                 FPRINTF(STDOUTFILE " completed (remaining");
2786                 FPRINTF(STDOUTFILE " time: %.0f", hours);
2787                 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2788                 FPRINTF(STDOUTFILE " minutes)\n");
2789                 fflush(STDOUT);
2790                 time1 = time2;
2791         }
2792
2793 }
2794
2795 /* check remaining time and print message if necessary */
2796 void checktimer2(uli numqts, uli all, int flag)
2797 {
2798         double tc2, mintogo, minutes, hours;
2799
2800         static time_t tt1;
2801         static time_t tt2;
2802
2803         if (flag == 1) {
2804                 time(&tt1);
2805                 time(&tt2);
2806         } else {
2807                 time(&tt2);
2808                 if ( (tt2 - tt1) > 900) { /* generate message every 15 minutes */
2809                         /* every 900 seconds */
2810                         /* percentage of completed quartets */
2811                         if (mflag == 0) {
2812                                 mflag = 1;
2813                                 FPRINTF(STDOUTFILE "\n");
2814                         }
2815                         tc2 = 100.*numqts/Numquartets;
2816                         mintogo = (100.0-tc2) *
2817                                 (double) (tt2-time0)/60.0/tc2;
2818                         hours = floor(mintogo/60.0);
2819                         minutes = mintogo - 60.0*hours;
2820                         FPRINTF(STDOUTFILE "%.2f%%", tc2);
2821                         FPRINTF(STDOUTFILE " completed (remaining");
2822                         FPRINTF(STDOUTFILE " time: %.0f", hours);
2823                         FPRINTF(STDOUTFILE " hours %.0f", minutes);
2824                         FPRINTF(STDOUTFILE " minutes)\n");
2825                         fflush(STDOUT);
2826                         tt1 = tt2;
2827                 }
2828         }
2829 }
2830
2831 void resetqblocktime(timearray_t *ta)
2832 {
2833         ta->quartcpu += ta->quartblockcpu;
2834         ta->quartblockcpu = 0.0;
2835         ta->quarttime += ta->quartblocktime;
2836         ta->quartblocktime = 0.0;
2837 } /* resetqblocktime */
2838
2839
2840 void resetpblocktime(timearray_t *ta)
2841 {
2842         ta->puzzcpu += ta->puzzblockcpu;
2843         ta->puzzblockcpu = 0.0;
2844         ta->puzztime += ta->puzzblocktime;
2845         ta->puzzblocktime = 0.0;
2846 } /* resetpblocktime */
2847
2848
2849 #ifdef TIMEDEBUG
2850 void printtimearr(timearray_t *ta)
2851 {
2852 #       if ! PARALLEL
2853           int PP_Myid;
2854           PP_Myid = -1;
2855 #       endif
2856         printf("(%2d) MMCPU: %11ld  /  %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu);
2857         printf("(%2d) CTick: %11.6f [tks]  /  %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime);
2858
2859         printf("(%2d) MMTIM: %11ld  /  %11ld \n", PP_Myid, ta->maxtime, ta->mintime);
2860
2861         printf("(%2d) Mxblk: %11.6e  /  %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock);
2862         printf("(%2d) Mnblk: %11.6e  /  %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock);
2863
2864         printf("(%2d) Gnrl: %11.6e  /  %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime);
2865         printf("(%2d) Optn: %11.6e  /  %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime);
2866         printf("(%2d) Estm: %11.6e  /  %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime);
2867         printf("(%2d) Qurt: %11.6e  /  %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime);
2868         printf("(%2d) QBlk: %11.6e  /  %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime);
2869         printf("(%2d) QMax: %11.6e  /  %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime);
2870         printf("(%2d) QMin: %11.6e  /  %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime);
2871
2872         printf("(%2d) Puzz: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime);
2873         printf("(%2d) PBlk: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime);
2874         printf("(%2d) PMax: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime);
2875         printf("(%2d) PMin: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime);
2876
2877         printf("(%2d) Tree: %11.6e  /  %11.6e \n", PP_Myid, ta->treecpu, ta->treetime);
2878         printf("(%2d) TBlk: %11.6e  /  %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime);
2879         printf("(%2d) TMax: %11.6e  /  %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime);
2880         printf("(%2d) TMin: %11.6e  /  %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime);
2881
2882         printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid, 
2883                 (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu),
2884                 (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime));
2885         printf("(%2d)  CPU: %11.6e /  Time: %11.6e \n", PP_Myid, ta->cpu, ta->time);
2886         printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime);
2887
2888 } /* printtimearr */
2889 #endif /* TIMEDEBUG */
2890
2891 char *jtype [7];
2892
2893 void inittimearr(timearray_t *ta)
2894 {
2895         clock_t c0, c1, c2; 
2896
2897         jtype[OVERALL]  = "OVERALL";
2898         jtype[GENERAL]  = "GENERAL";
2899         jtype[OPTIONS]  = "OPTIONS";
2900         jtype[PARAMEST] = "PARAMeter ESTimation";
2901         jtype[QUARTETS] = "QUARTETS";
2902         jtype[PUZZLING] = "PUZZLING steps";
2903         jtype[TREEEVAL] = "TREE EVALuation";
2904         ta->currentjob = GENERAL;
2905
2906         c1 = clock();
2907         c2 = clock();
2908         while (c1 == c2)
2909            c2 = clock(); 
2910         ta->mincputick = (double)(c2 - c1);
2911         ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC;
2912
2913         ta->tempcpu = clock();
2914         ta->tempcpustart = ta->tempcpu;
2915         ta->tempfullcpu  = ta->tempcpu;
2916         time(&(ta->temptime));
2917         ta->temptimestart = ta->temptime;
2918         ta->tempfulltime  = ta->temptime;
2919
2920         c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);;
2921         while (c1 < c2) {
2922            c0 = c1;
2923            c1 = c2;
2924            c2 = (clock_t)((2 * c1) + 1);
2925         }
2926         if (c1 == c2) ta->maxcpu=c0;
2927         if (c1  > c2) ta->maxcpu=c1;
2928
2929         c0=0; c1=0; c2=(clock_t)((2 * c1) - 1);
2930         while (c1 > c2) {
2931            c0 = c1;
2932            c1 = c2;
2933            c2 = (clock_t)((2 * c1) - 1);
2934         }
2935         if (c1 == c2) ta->mincpu=c0;
2936         if (c1  < c2) ta->mincpu=c1;
2937
2938
2939
2940         ta->maxtime        = 0;
2941         ta->mintime        = 0;
2942
2943         ta->maxcpublock    = 0;
2944         ta->mincpublock    = DBL_MAX;
2945         ta->maxtimeblock   = 0;
2946         ta->mintimeblock   = DBL_MAX;
2947
2948         ta->cpu            = 0.0;
2949         ta->time           = 0.0;
2950
2951         ta->fullcpu        = 0.0;
2952         ta->fulltime       = 0.0;
2953
2954         ta->generalcpu     = 0.0;
2955         ta->optionscpu     = 0.0;
2956         ta->paramestcpu    = 0.0;
2957         ta->quartcpu       = 0.0;
2958         ta->quartblockcpu  = 0.0;
2959         ta->quartmaxcpu    = 0.0;
2960         ta->quartmincpu    = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
2961         ta->puzzcpu        = 0.0;
2962         ta->puzzblockcpu   = 0.0;
2963         ta->puzzmaxcpu     = 0.0;
2964         ta->puzzmincpu     = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
2965         ta->treecpu        = 0.0;
2966         ta->treeblockcpu   = 0.0;
2967         ta->treemaxcpu     = 0.0;
2968         ta->treemincpu     = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
2969
2970         ta->generaltime    = 0.0;
2971         ta->optionstime    = 0.0;
2972         ta->paramesttime   = 0.0;
2973         ta->quarttime      = 0.0;
2974         ta->quartblocktime = 0.0;
2975         ta->quartmaxtime   = 0.0;
2976         ta->quartmintime   = DBL_MAX;
2977         ta->puzztime       = 0.0;
2978         ta->puzzblocktime  = 0.0;
2979         ta->puzzmaxtime    = 0.0;
2980         ta->puzzmintime    = DBL_MAX;
2981         ta->treetime       = 0.0;
2982         ta->treeblocktime  = 0.0;
2983         ta->treemaxtime    = 0.0;
2984         ta->treemintime    = DBL_MAX;
2985 } /* inittimearr */
2986
2987
2988 /***************/
2989
2990 void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta)
2991 {
2992    double  c,
2993            t;
2994
2995    if (t2 != t1) t = difftime(t2, t1); 
2996    else          t = 0.0;
2997
2998    if (c2 < c1)
2999       c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC +
3000           ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC;
3001    else
3002       c = ((double)(c2 - c1))/CLOCKS_PER_SEC;
3003
3004    if (jobtype != OVERALL) {
3005
3006       if (ta->mincpublock  > c) ta->mincpublock = c;
3007       if (ta->maxcpublock  < c) ta->maxcpublock = c;
3008       if (ta->mintimeblock > t) ta->mintimeblock = t;
3009       if (ta->maxtimeblock < t) ta->maxtimeblock = t;
3010
3011       switch (jobtype) {
3012          case GENERAL: ta->generalcpu  += c;
3013                        ta->generaltime += t;
3014                        break;
3015          case OPTIONS: ta->optionscpu  += c;
3016                        ta->optionstime += t;
3017                        break;
3018          case PARAMEST: ta->paramestcpu  += c;
3019                         ta->paramesttime += t;
3020                         break;
3021          case QUARTETS: ta->quartblockcpu  += c;
3022                         ta->quartblocktime += t;
3023                         if (ta->quartmincpu  > c) ta->quartmincpu = c;
3024                         if (ta->quartmaxcpu  < c) ta->quartmaxcpu = c;
3025                         if (ta->quartmintime > t) ta->quartmintime = t;
3026                         if (ta->quartmaxtime < t) ta->quartmaxtime = t;
3027                         break;
3028          case PUZZLING: ta->puzzblockcpu  += c;
3029                         ta->puzzblocktime += t;
3030                         if (ta->puzzmincpu  > c) ta->puzzmincpu = c;
3031                         if (ta->puzzmaxcpu  < c) ta->puzzmaxcpu = c;
3032                         if (ta->puzzmintime > t) ta->puzzmintime = t;
3033                         if (ta->puzzmaxtime < t) ta->puzzmaxtime = t;
3034                         break;
3035          case TREEEVAL: ta->treeblockcpu  += c;
3036                         ta->treeblocktime += t;
3037                         if (ta->treemincpu  > c) ta->treemincpu = c;
3038                         if (ta->treemaxcpu  < c) ta->treemaxcpu = c;
3039                         if (ta->treemintime > t) ta->treemintime = t;
3040                         if (ta->treemaxtime < t) ta->treemaxtime = t;
3041                         break;
3042       }
3043       ta->cpu  += c;
3044       ta->time += t;
3045    
3046    } else {
3047          ta->fullcpu  += c;
3048          ta->fulltime += t;
3049    }
3050
3051 #     ifdef TIMEDEBUG
3052          {
3053 #        if ! PARALLEL
3054             int PP_Myid =  -1;
3055 #        endif /* !PARALLEL */
3056          printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]);
3057          printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]);
3058          printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]);
3059          }
3060 #     endif /* TIMEDEBUG */
3061 } /* addup */
3062
3063
3064 /***************/
3065
3066
3067 void addtimes(int jobtype, timearray_t *ta)
3068 {
3069    clock_t tempc;
3070    time_t  tempt;
3071
3072    time(&tempt);
3073    tempc = clock();
3074
3075    if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) {   /* CPU counter overflow for overall time */
3076         addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta);
3077         ta->tempfullcpu  = tempc;
3078         ta->tempfulltime = tempt;
3079         if (jobtype == OVERALL) {
3080            addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
3081            ta->tempcpustart = ta->tempcpu;
3082            ta->tempcpu      = tempc;
3083            ta->temptimestart = ta->temptime;
3084            ta->temptime      = tempt;
3085         }
3086    }
3087
3088    if((jobtype != ta->currentjob) && (jobtype != OVERALL)) {   /* change of job type */
3089         addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta);
3090         ta->tempcpustart  = ta->tempcpu;
3091         ta->tempcpu       = tempc;
3092         ta->temptimestart = ta->temptime;
3093         ta->temptime      = tempt;
3094         ta->currentjob    = jobtype;
3095    }
3096
3097    if (tempc < ta->tempcpustart) {   /* CPU counter overflow */
3098         addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
3099         ta->tempcpustart = ta->tempcpu;
3100         ta->tempcpu      = tempc;
3101         ta->temptimestart = ta->temptime;
3102         ta->temptime      = tempt;
3103    }
3104
3105 } /* addtimes */
3106
3107
3108
3109 /******************************************************************************/
3110
3111 /* estimate parameters of substitution process and rate heterogeneity - no tree
3112    n-taxon tree is not needed because of quartet method or NJ tree topology */
3113 void estimateparametersnotree()
3114 {
3115         int it, nump, change;
3116         double TSold, YRold, FIold, GEold;
3117
3118         it = 0;
3119         nump = 0;
3120
3121         /* count number of parameters */
3122         if (data_optn == NUCLEOTIDE && optim_optn) nump++;
3123         if (fracinv_optim || grate_optim) nump++;
3124
3125         do { /* repeat until nothing changes any more */
3126                 it++;
3127                 change = FALSE;
3128
3129                 /* optimize substitution parameters */
3130                 if (data_optn == NUCLEOTIDE && optim_optn) {
3131                 
3132                         TSold = TSparam;
3133                         YRold = YRparam;
3134                         
3135
3136                         /*
3137                          * optimize
3138                          */
3139                          
3140                         FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
3141                         fflush(STDOUT);
3142
3143                         if (qcalg_optn) { /* quartet sampling */
3144                                 optimseqevolparamsq();
3145                         } else { /* NJ tree */
3146                                 tmpfp = tmpfile();
3147                                 njtree(tmpfp);                          
3148                                 rewind(tmpfp);
3149                                 readusertree(tmpfp);
3150                                 closefile(tmpfp);
3151                                 optimseqevolparamst();                          
3152                         }
3153                         
3154                         computedistan(); /* update ML distances */
3155                 
3156                         /* same tolerance as 1D minimization */
3157                         if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
3158                                 (fabs(YRparam - YRold) > 3.3*PEPS1)
3159                         ) change = TRUE;
3160
3161                 }
3162
3163                 /* optimize rate heterogeneity variables */
3164                 if (fracinv_optim || grate_optim) {
3165
3166                         FIold = fracinv;
3167                         GEold = Geta;
3168
3169
3170                         /* 
3171                          * optimize 
3172                          */
3173
3174                         FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
3175                         fflush(STDOUT);
3176                         /* compute NJ tree */
3177                         tmpfp = tmpfile();
3178                         njtree(tmpfp);
3179                         /* use NJ tree topology to estimate parameters */
3180                         rewind(tmpfp);
3181                         readusertree(tmpfp);
3182                         closefile(tmpfp);
3183
3184                         optimrateparams();                              
3185                         computedistan(); /* update ML distances */
3186
3187
3188                         /* same tolerance as 1D minimization */
3189                         if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
3190                                 (fabs(Geta - GEold) > 3.3*PEPS2)
3191                         ) change = TRUE;
3192                         
3193                 }
3194
3195                 if (nump == 1) return;
3196                 
3197         } while (it != MAXITS && change);
3198
3199         return;
3200 }
3201
3202
3203 /* estimate parameters of substitution process and rate heterogeneity - tree
3204    same as above but here the n-taxon tree is already in memory */
3205 void estimateparameterstree()
3206 {
3207         int it, nump, change;
3208         double TSold, YRold, FIold, GEold;
3209
3210         it = 0;
3211         nump = 0;
3212
3213         /* count number of parameters */
3214         if (data_optn == NUCLEOTIDE && optim_optn) nump++;
3215         if (fracinv_optim || grate_optim) nump++;
3216
3217         do { /* repeat until nothing changes any more */
3218                 it++;
3219                 change = FALSE;
3220
3221                 /* optimize substitution process parameters */
3222                 if (data_optn == NUCLEOTIDE && optim_optn) {
3223                 
3224                         TSold = TSparam;
3225                         YRold = YRparam;
3226                         
3227
3228                         /*
3229                          * optimize
3230                          */
3231                          
3232                         FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
3233                         fflush(STDOUT);
3234                         optimseqevolparamst();
3235                         computedistan(); /* update ML distances */
3236
3237                 
3238                         /* same tolerance as 1D minimization */
3239                         if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
3240                                 (fabs(YRparam - YRold) > 3.3*PEPS1)
3241                         ) change = TRUE;
3242
3243                 }
3244
3245                 /* optimize rate heterogeneity variables */
3246                 if (fracinv_optim || grate_optim) {
3247
3248                         FIold = fracinv;
3249                         GEold = Geta;
3250
3251
3252                         /* 
3253                          * optimize 
3254                          */
3255
3256                         FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
3257                         fflush(STDOUT);
3258                         optimrateparams();                              
3259                         computedistan(); /* update ML distances */
3260
3261
3262                         /* same tolerance as 1D minimization */
3263                         if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
3264                                 (fabs(Geta - GEold) > 3.3*PEPS2)
3265                         ) change = TRUE;
3266                         
3267                 }
3268
3269                 if (nump == 1) return;
3270                 
3271         } while (it != MAXITS && change);
3272
3273         return;
3274 }
3275
3276
3277 /******************************************************************************/
3278 /* exported from main                                                         */
3279 /******************************************************************************/
3280
3281 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx)
3282 {
3283         if (approx == APPROX) {
3284
3285                 *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */
3286                 *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */
3287                 *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */
3288
3289         } else /* approx == EXACT */ {
3290
3291                 *d1 = quartet_lklhd(a,b, c,d);  /* (a,b)-(c,d) */
3292                 *d2 = quartet_lklhd(a,c, b,d);  /* (a,c)-(b,d) */
3293                 *d3 = quartet_lklhd(a,d, b,c);  /* (a,d)-(b,c) */
3294
3295         }
3296 }
3297
3298 /***************************************************************/ 
3299
3300 void recon_tree()
3301 {       
3302         int i;
3303 #       if ! PARALLEL
3304                 int a, b, c;
3305                 uli nq;
3306                 double tc2, mintogo, minutes, hours;
3307 #       endif
3308
3309         /* allocate memory for taxon list of bad quartets */
3310         badtaxon = new_ulivector(Maxspc);
3311         for (i = 0; i < Maxspc; i++) badtaxon[i] = 0;
3312
3313         /* allocate variable used for randomizing input order */
3314         trueID = new_ivector(Maxspc);
3315
3316         /* allocate memory for quartets */
3317         quartetinfo = mallocquartets(Maxspc);
3318         
3319         /* prepare for consensus tree analysis */
3320         initconsensus();
3321                 
3322         if (!(readquart_optn) || (readquart_optn && savequart_optn)) {
3323           /* compute quartets */
3324           FPRINTF(STDOUTFILE "Computing quartet maximum likelihood trees\n");
3325           fflush(STDOUT);
3326           computeallquartets();
3327         }
3328
3329         if (savequart_optn) 
3330                 writeallquarts(Maxspc, ALLQUART, quartetinfo);
3331         if (readquart_optn) {
3332                 int xx1, xx2, xx3, xx4, count;
3333                 readallquarts (Maxspc, ALLQUART, quartetinfo);
3334                 if (show_optn) { /* list all unresolved quartets */
3335                         openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
3336                         fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
3337                 }
3338
3339                 /* initialize bad quartet memory */
3340                 for (count = 0; count < Maxspc; count++) badtaxon[count] = 0;
3341                 badqs = 0;
3342
3343                 for (xx4 = 3; xx4 < Maxspc; xx4++)
3344                   for (xx3 = 2; xx3 < xx4; xx3++)
3345                     for (xx2 = 1; xx2 < xx3; xx2++)
3346                       for (xx1 = 0; xx1 < xx2; xx1++) {
3347                         if (readquartet(xx1, xx2, xx3, xx4) == 7) {
3348                                 badqs++;
3349                                 badtaxon[xx1]++;
3350                                 badtaxon[xx2]++;
3351                                 badtaxon[xx3]++;
3352                                 badtaxon[xx4]++;
3353                                 if (show_optn) {
3354                                         fputid10(unresfp, xx1);
3355                                         fprintf(unresfp, "  ");
3356                                         fputid10(unresfp, xx2);
3357                                         fprintf(unresfp, "  ");
3358                                         fputid10(unresfp, xx3);
3359                                         fprintf(unresfp, "  ");
3360                                         fputid  (unresfp, xx4);
3361                                         fprintf(unresfp, "\n");
3362                                 }
3363                         }
3364                 } /* end for xx4; for xx3; for xx2; for xx1 */
3365                 if (show_optn)  /* list all unresolved quartets */
3366                         fclose(unresfp);
3367         } /* readquart_optn */
3368
3369 #       if PARALLEL
3370                 PP_SendAllQuarts(numquarts(Maxspc), quartetinfo);
3371 #       endif /* PARALLEL */
3372
3373         FPRINTF(STDOUTFILE "Computing quartet puzzling tree\n");
3374         fflush(STDOUT);
3375         
3376         /* start timer - percentage of completed trees */
3377         time(&time0);
3378         time1 = time0;
3379         mflag = 0;
3380                         
3381         /* open file for chronological list of puzzling step trees */
3382         if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
3383                 openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)");
3384                                                         
3385 #       if PARALLEL
3386                 {
3387                 PP_SendDoPermutBlock(Numtrial);
3388                 }
3389 #       else
3390                 addtimes(GENERAL, &tarr);
3391                 for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) {
3392                         
3393                         /* randomize input order */
3394                         chooser(Maxspc, Maxspc, trueID);
3395                                 
3396                         /* initialize tree */
3397                         inittree();
3398
3399                         /* adding all other leafs */
3400                         for (i = 3; i < Maxspc; i++) { 
3401                                         
3402                                 /* clear all edgeinfos */
3403                                 resetedgeinfo();
3404                                 
3405                                 /* clear counter of quartets */
3406                                 nq = 0;
3407
3408                                 /*
3409                                  * core of quartet puzzling algorithm
3410                                  */
3411
3412                                 for (a = 0; a < nextleaf - 2; a++)
3413                                         for (b = a + 1; b < nextleaf - 1; b++)
3414                                                 for (c = b + 1; c < nextleaf; c++) {
3415
3416                                                         /*  check which two _leaves_ out of a, b, c
3417                                                             are closer related to each other than
3418                                                             to leaf i according to a least squares
3419                                                             fit of the continous Baysian weights to the
3420                                                             seven trivial "attractive regions". We assign 
3421                                                             a score of 1 to all edges between these two leaves
3422                                                             chooseA and chooseB */
3423
3424                                                         checkquartet(a, b, c, i);
3425                                                         incrementedgeinfo(chooseA, chooseB);
3426
3427                                                         nq++;
3428
3429                                                         /* generate message every 15 minutes */
3430                                                                 
3431                                                         /* check timer */
3432                                                         time(&time2);
3433                                                         if ( (time2 - time1) > 900) {
3434                                                                 /* every 900 seconds */
3435                                                                 /* percentage of completed trees */
3436                                                                 if (mflag == 0) {
3437                                                                         FPRINTF(STDOUTFILE "\n");
3438                                                                         mflag = 1;
3439                                                                 }
3440                                                                 tc2 = 100.0*Currtrial/Numtrial + 
3441                                                                         100.0*nq/Numquartets/Numtrial;
3442                                                                         mintogo = (100.0-tc2) *
3443                                                                         (double) (time2-time0)/60.0/tc2;
3444                                                                         hours = floor(mintogo/60.0);
3445                                                                         minutes = mintogo - 60.0*hours;
3446                                                                         FPRINTF(STDOUTFILE "%2.2f%%", tc2);
3447                                                                         FPRINTF(STDOUTFILE " completed (remaining");
3448                                                                         FPRINTF(STDOUTFILE " time: %.0f", hours);
3449                                                                         FPRINTF(STDOUTFILE " hours %.0f", minutes);
3450                                                                         FPRINTF(STDOUTFILE " minutes)\n");
3451                                                                         fflush(STDOUT);
3452                                                                         time1 = time2;
3453                                                                 }
3454                                                         }
3455
3456                                 /* find out which edge has the lowest edgeinfo */
3457                                 minimumedgeinfo();
3458
3459                                 /* add the next leaf on minedge */
3460                                 addnextleaf(minedge);
3461                         }
3462         
3463                         /* compute bipartitions of current tree */
3464                         computebiparts();
3465                         makenewsplitentries();
3466
3467                         {
3468                                 int *ctree, startnode;
3469                                 char *trstr;
3470                                 treelistitemtype *treeitem;
3471                                 ctree = initctree();
3472                                 copytree(ctree);
3473                                 startnode = sortctree(ctree);
3474                                 trstr=sprintfctree(ctree, psteptreestrlen);
3475
3476
3477                                 treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum);
3478
3479                                 if((listqptrees == PSTOUT_LIST) 
3480                                   || (listqptrees == PSTOUT_LISTORDER)) {
3481                                         /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */
3482                                         fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n", 
3483                                                 Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum);
3484                                 }
3485
3486 #                               ifdef VERBOSE1
3487                                         printf("%s\n", trstr);
3488                                         printfsortedpstrees(psteptreelist);
3489 #                               endif
3490                                 freectree(&ctree);
3491                         }
3492
3493
3494
3495                         /* free tree before building the next tree */
3496                         freetree();
3497                         
3498                         addtimes(PUZZLING, &tarr);
3499                 }
3500 #       endif /* PARALLEL */
3501                         
3502                         /* close file for list of puzzling step trees */
3503                         if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
3504                                 closefile(qptlist);
3505                         
3506                         if (mflag == 1) FPRINTF(STDOUTFILE "\n");
3507
3508                         /* garbage collection */
3509                         free(splitcomp);
3510                         free_ivector(trueID);
3511
3512 #                       if ! PARALLEL
3513                                 free_cmatrix(biparts);
3514 #                       endif /* PARALLEL */
3515
3516                         freequartets();
3517
3518                         /* compute majority rule consensus tree */
3519                         makeconsensus();
3520                         
3521                         /* write consensus tree to tmp file */
3522                         tmpfp = tmpfile();
3523                         writeconsensustree(tmpfp);
3524 } /* recon_tree */
3525
3526 /***************************************************************/ 
3527
3528 void map_lklhd()
3529 {       
3530         int i, a, a1, a2, b, b1, b2, c, c1, c2, d;
3531         uli nq;
3532         double logs[3], d1, d2, d3, temp;
3533         ivector qts, mlorder, gettwo;
3534                 /* reset variables */
3535                 ar1 = ar2 = ar3 = 0;
3536                 reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0;
3537                 reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u =
3538                         reg4d = reg5l = reg5r = reg6u = reg6d = 0;
3539                         
3540                 /* place for random quartet */
3541                         qts = new_ivector(4);
3542
3543                 /* initialize output file */
3544                 openfiletowrite(&trifp, TRIANGLE, "Postscript output");
3545                 initps(trifp);
3546                 FPRINTF(STDOUTFILE "Performing likelihood mapping analysis\n");
3547                 fflush(STDOUT);
3548                         
3549                 /* start timer */
3550                 starttimer();
3551                 nq = 0;
3552                 mflag = 0;
3553
3554                 addtimes(GENERAL, &tarr);
3555                 if (lmqts == 0) { /* all possible quartets */
3556                         
3557                         if (numclust == 4) { /* four-cluster analysis */
3558
3559                                 for (a = 0; a < clustA; a++)
3560                                         for (b = 0; b < clustB; b++)
3561                                                 for (c = 0; c < clustC; c++)
3562                                                         for (d = 0; d < clustD; d++) {
3563                                                                         
3564                                                                 nq++;
3565                                                                         
3566                                                                 /* check timer */
3567                                                                 checktimer(nq);
3568
3569                                                                 /* maximum likelihood values */
3570                                                                 /* approximate ML is sufficient */
3571                                                                 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX);
3572                                                                 
3573                                                                 /* draw point for LM analysis */
3574                                                                 makelmpoint(trifp, d1, d2, d3);
3575                                                                 addtimes(QUARTETS, &tarr);
3576
3577                                                         }                                       
3578                         }
3579                                 
3580                         if (numclust == 3) { /* three-cluster analysis */
3581
3582                                 gettwo = new_ivector(2);
3583
3584                                 for (a = 0; a < clustA; a++)
3585                                         for (b = 0; b < clustB; b++)
3586                                                 for (c1 = 0; c1 < clustC-1; c1++)
3587                                                         for (c2 = c1+1; c2 < clustC; c2++) {
3588
3589                                                                 nq++;
3590
3591                                                                 /* check timer */
3592                                                                 checktimer(nq);
3593                                                                 
3594                                                                 /* maximum likelihood values */
3595                                                                 /* approximate ML is sufficient */
3596                                                                 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX);
3597                                                                 
3598                                                                 /* randomize order of d2 and d3 */
3599                                                                 if (randominteger(2) == 1) {
3600                                                                         temp = d3;
3601                                                                         d3 = d2;
3602                                                                         d2 = temp;
3603                                                                 }
3604                                                 
3605                                                                 /* draw point for LM analysis */
3606                                                                 makelmpoint(trifp, d1, d2, d3);
3607                                                                 addtimes(QUARTETS, &tarr);
3608
3609                                 }                               
3610                                 free_ivector(gettwo);
3611                         }
3612                                 
3613                         if (numclust == 2) { /* two-cluster analysis */
3614                                         
3615                                 gettwo = new_ivector(2);
3616
3617                                 for (a1 = 0; a1 < clustA-1; a1++)
3618                                         for (a2 = a1+1; a2 < clustA; a2++)
3619                                                 for (b1 = 0; b1 < clustB-1; b1++)
3620                                                         for (b2 = b1+1; b2 < clustB; b2++) {
3621
3622                                                                 nq++;
3623
3624                                                                 /* check timer */
3625                                                                 checktimer(nq);
3626                                                                 
3627                                                                 /* maximum likelihood values */
3628                                                                 /* approximate ML is sufficient */
3629                                                                 compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX);
3630                                                                 
3631                                                                 /* randomize order of d2 and d3 */
3632                                                                 if (randominteger(2) == 1) {
3633                                                                         temp = d3;
3634                                                                         d3 = d2;
3635                                                                         d2 = temp;
3636                                                                 }
3637                                                 
3638                                                                 /* draw point for LM analysis */
3639                                                                 makelmpoint(trifp, d1, d2, d3);
3640                                                                 addtimes(QUARTETS, &tarr);
3641
3642                                 }
3643                                         
3644                                 free_ivector(gettwo);
3645                         }
3646                         
3647                         if (numclust == 1) { /* normal likelihood mapping (one cluster) */
3648                         
3649                                 mlorder = new_ivector(3);
3650
3651 #if 0
3652                                 for (i = 3; i < Maxspc; i++) 
3653                                         for (a = 0; a < i - 2; a++) 
3654                                                 for (b = a + 1; b < i - 1; b++) 
3655                                                         for (c = b + 1; c < i; c++)
3656    for (d = 3; d < Maxspc; d++) 
3657       for (c = 2; c < d; c++) 
3658          for (b = 1; b < c; b++) 
3659             for (a = 0; a < b; a++) 
3660 #endif
3661
3662                                 for (i = 3; i < Maxspc; i++) 
3663                                         for (c = 2; c < i; c++) 
3664                                                 for (b = 1; b < c; b++) 
3665                                                         for (a = 0; a < b; a++) {
3666                                                         
3667                                                                 nq++;
3668
3669                                                                 /* check timer */
3670                                                                 checktimer(nq);
3671
3672                                                                 /* maximum likelihood values */
3673                                                                 /* approximate ML is sufficient */
3674                                                                 compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX);
3675
3676                                                                 /* randomize order */
3677                                                                 chooser(3,3,mlorder);
3678                                                                 d1 = logs[mlorder[0]];
3679                                                                 d2 = logs[mlorder[1]];
3680                                                                 d3 = logs[mlorder[2]];
3681                                                                 
3682                                                                 /* draw point for LM analysis */
3683                                                                 makelmpoint(trifp, d1, d2, d3);
3684                                                                 addtimes(QUARTETS, &tarr);
3685                                 
3686                                 }
3687                                 free_ivector(mlorder);
3688                         }
3689                         
3690                 } else { /* randomly selected quartets */
3691                         
3692                         if (numclust == 4) { /* four-cluster analysis */
3693
3694                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3695                                 
3696                                         nq++;
3697
3698                                         /* check timer */
3699                                         checktimer(nq);
3700
3701                                         /* choose random quartet */     
3702                                         qts[0] = clusterA[ randominteger(clustA) ];
3703                                         qts[1] = clusterB[ randominteger(clustB) ];
3704                                         qts[2] = clusterC[ randominteger(clustC) ];
3705                                         qts[3] = clusterD[ randominteger(clustD) ];                     
3706
3707                                         /* maximum likelihood values */
3708                                         /* approximate ML is sufficient */
3709                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3710                                         
3711                                         /* draw point for LM analysis */
3712                                         makelmpoint(trifp, d1, d2, d3);
3713                                         addtimes(QUARTETS, &tarr);
3714
3715                                 }
3716                         }
3717                                 
3718                         if (numclust == 3) { /* three-cluster analysis */
3719
3720                                 gettwo = new_ivector(2);
3721
3722                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3723                                 
3724                                         nq++;
3725
3726                                         /* check timer */
3727                                         checktimer(nq);
3728
3729                                         /* choose random quartet */     
3730                                         qts[0] = clusterA[ randominteger(clustA) ];
3731                                         qts[1] = clusterB[ randominteger(clustB) ];
3732                                         chooser(clustC, 2, gettwo);
3733                                         qts[2] = clusterC[gettwo[0]];
3734                                         qts[3] = clusterC[gettwo[1]];           
3735
3736                                         /* maximum likelihood values */
3737                                         /* approximate ML is sufficient */
3738                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3739                                         
3740                                         /* order of d2 and d3 is already randomized! */
3741                                                 
3742                                         /* draw point for LM analysis */
3743                                         makelmpoint(trifp, d1, d2, d3);
3744                                         addtimes(QUARTETS, &tarr);
3745
3746                                 }
3747                                         
3748                                 free_ivector(gettwo);
3749                         }
3750                                 
3751                         if (numclust == 2) { /* two-cluster analysis */
3752
3753                                 gettwo = new_ivector(2);
3754
3755                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3756                                 
3757                                         nq++;
3758
3759                                         /* check timer */
3760                                         checktimer(nq);
3761
3762                                         /* choose random quartet */     
3763                                         chooser(clustA, 2, gettwo);
3764                                         qts[0] = clusterA[gettwo[0]];
3765                                         qts[1] = clusterA[gettwo[1]];           
3766                                         chooser(clustB, 2, gettwo);
3767                                         qts[2] = clusterB[gettwo[0]];
3768                                         qts[3] = clusterB[gettwo[1]];           
3769
3770                                         /* maximum likelihood values */
3771                                         /* approximate ML is sufficient */
3772                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3773                                         
3774                                         /* order of d2 and d3 is already randomized! */
3775                                                 
3776                                         /* draw point for LM analysis */
3777                                         makelmpoint(trifp, d1, d2, d3);
3778                                         addtimes(QUARTETS, &tarr);
3779
3780                                 }
3781                                 free_ivector(gettwo);
3782                         }
3783                                 
3784                         if (numclust == 1) { /* normal likelihood mapping (one cluster) */
3785                         
3786                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
3787                                 
3788                                         nq++;
3789
3790                                         /* check timer */
3791                                         checktimer(nq);
3792
3793                                         /* choose random quartet */     
3794                                         chooser(Maxspc, 4, qts);                                
3795
3796                                         /* maximum likelihood values */
3797                                         /* approximate ML is sufficient */
3798                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
3799                                         
3800                                         /* order of d1, d2, and d3 is already randomized! */
3801                                 
3802                                         /* draw point for LM analysis */
3803                                         makelmpoint(trifp, d1, d2, d3);
3804                                         addtimes(QUARTETS, &tarr);
3805
3806                                 }
3807                         }
3808                 }
3809
3810                 finishps(trifp);
3811                 closefile(trifp);
3812                 free_ivector(qts);
3813
3814 } /* map_lklhd */
3815
3816 /***************************************************************/ 
3817
3818 void setdefaults() {
3819
3820   strcpy(INFILE,     INFILEDEFAULT);
3821   strcpy(OUTFILE,    OUTFILEDEFAULT);
3822   strcpy(TREEFILE,   TREEFILEDEFAULT);
3823   strcpy(INTREE,     INTREEDEFAULT);
3824   strcpy(DISTANCES,  DISTANCESDEFAULT);
3825   strcpy(TRIANGLE,   TRIANGLEDEFAULT);
3826   strcpy(UNRESOLVED, UNRESOLVEDDEFAULT);
3827   strcpy(ALLQUART,   ALLQUARTDEFAULT);
3828   strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT);
3829   strcpy(OUTPTLIST,  OUTPTLISTDEFAULT);
3830   strcpy(OUTPTORDER, OUTPTORDERDEFAULT);
3831
3832   usebestq_optn    = FALSE;
3833   savequartlh_optn = FALSE;
3834   savequart_optn   = FALSE;
3835   readquart_optn   = FALSE;
3836
3837   randseed = -1;                  /* to set random random seed */
3838
3839 } /* setdefaults */
3840
3841 /***************************************************************/ 
3842
3843 void printversion()
3844 {
3845 #  if ! PARALLEL
3846    fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION);
3847 #else
3848    fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION);
3849 #  endif
3850    exit (0);
3851 }
3852 /***************************************************************/ 
3853
3854 void printusage(char *fname)
3855 {
3856    fprintf(stderr, "\n\nUsage: %s [-h] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
3857 #  if PARALLEL
3858         PP_SendDone();
3859         MPI_Finalize();
3860 #  endif
3861    exit (1);
3862 }
3863
3864 /***************************************************************/ 
3865
3866 #ifdef HHH
3867 void printusagehhh(char *fname)
3868 {
3869    fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
3870    fprintf(stderr, "  -h           - print usage\n");
3871    fprintf(stderr, "  -wqf         - write quartet file to Infilename.allquart\n");
3872    fprintf(stderr, "  -rqf         - read quartet file from Infilename.allquart\n");
3873    fprintf(stderr, "  -wqlb        - write quart lhs to Infilename.allquartlh (binary)\n");
3874    fprintf(stderr, "  -wqla        - write quart lhs to Infilename.allquartlh (ASCII)\n");
3875    fprintf(stderr, "  -bestq       - use best quart, no basian weights\n");
3876    fprintf(stderr, "  -randseed<#> - use <#> as random number seed, for debug purposes only\n");
3877 #  if PARALLEL
3878         PP_SendDone();
3879         MPI_Finalize();
3880 #  endif
3881    exit (2);
3882 }
3883 #endif /* HHH */
3884
3885 /***************************************************************/ 
3886
3887
3888 void scancmdline(int *argc, char **argv[]) 
3889 {
3890    static short infileset     = 0;
3891    static short intreefileset = 0;
3892    short flagused;
3893    int n;
3894    int count, dummyint;
3895
3896    for (n = 1; n < *argc; n++) {
3897 #     ifdef VERBOSE1
3898          printf("argv[%d] = %s\n", n, (*argv)[n]);
3899 #     endif
3900  
3901       flagused = FALSE;
3902
3903 #     ifdef HHH
3904       dummyint = 0;
3905       count = sscanf((*argv)[n], "-wqlb%n", &dummyint);
3906       if (dummyint == 5) {
3907         savequartlh_optn = TRUE;
3908         saveqlhbin_optn  = TRUE;
3909         flagused         = TRUE;
3910       }
3911
3912       dummyint = 0;
3913       count = sscanf((*argv)[n], "-wqla%n", &dummyint);
3914       if (dummyint == 5) {
3915         savequartlh_optn = TRUE;
3916         saveqlhbin_optn  = FALSE;
3917         flagused         = TRUE;
3918       }
3919
3920       dummyint = 0;
3921       count = sscanf((*argv)[n], "-wqf%n", &dummyint);
3922       if (dummyint == 4) {
3923         savequart_optn = TRUE;
3924         flagused = TRUE;
3925       }
3926
3927       dummyint = 0;
3928       count = sscanf((*argv)[n],"-rqf%n", &dummyint);
3929       if (dummyint == 4) {
3930         readquart_optn = TRUE; 
3931         flagused = TRUE;
3932       }
3933
3934       dummyint = 0;
3935       count = sscanf((*argv)[n],"-bestq%n", &dummyint);
3936       if (dummyint == 6) {
3937         usebestq_optn = TRUE; 
3938         flagused = TRUE;
3939       }
3940
3941       dummyint = 0;
3942       count = sscanf((*argv)[n],"-hhh%n", &dummyint);
3943       if (dummyint==4) {
3944         printusagehhh((*argv)[0]); 
3945         flagused = TRUE;
3946       }
3947 #     endif /* HHH */
3948
3949       dummyint = 0;
3950       count = sscanf((*argv)[n],"-V%n", &dummyint);
3951       if (dummyint==2) {
3952         printversion((*argv)[0]); 
3953         flagused = TRUE;
3954       }
3955
3956       dummyint = 0;
3957       count = sscanf((*argv)[n],"-version%n", &dummyint);
3958       if (dummyint==8) {
3959         printversion((*argv)[0]); 
3960         flagused = TRUE;
3961       }
3962
3963       dummyint = 0;
3964       count = sscanf((*argv)[n],"--version%n", &dummyint);
3965       if (dummyint>=4) {
3966         printversion((*argv)[0]); 
3967         flagused = TRUE;
3968       }
3969
3970       dummyint = 0;
3971       count = sscanf((*argv)[n],"-h%n", &dummyint);
3972       if (dummyint==2) {
3973         printusage((*argv)[0]); 
3974         flagused = TRUE;
3975       }
3976
3977       count = sscanf((*argv)[n],"-randseed%d", &dummyint);
3978       if (count == 1) {
3979         randseed = dummyint; 
3980         flagused = TRUE;
3981       }
3982
3983 #if 0
3984       count = sscanf((*argv)[n],"-h%n", &dummyint);
3985       if ((count == 1) && (dummyint>=2)) printusage((*argv)[0]); 
3986
3987       count = sscanf((*argv)[n],"-writequarts%n", &dummyint);
3988       if (count == 1) writequartstofile = 1;; 
3989
3990       count = sscanf((*argv)[n],"-ws%d", &dummyint);
3991       if (count == 1) windowsize = dummyint; 
3992 #endif
3993
3994       if ((*argv)[n][0] != '-') {
3995          if (infileset == 0) {
3996             strcpy(INFILE, (*argv)[n]);
3997             infileset++;
3998             sprintf(OUTFILE    ,"%s.%s", INFILE, OUTFILEEXT);
3999             sprintf(TREEFILE   ,"%s.%s", INFILE, TREEFILEEXT);
4000             sprintf(DISTANCES  ,"%s.%s", INFILE, DISTANCESEXT);
4001             sprintf(TRIANGLE   ,"%s.%s", INFILE, TRIANGLEEXT);
4002             sprintf(UNRESOLVED ,"%s.%s", INFILE, UNRESOLVEDEXT);
4003             sprintf(ALLQUART   ,"%s.%s", INFILE, ALLQUARTEXT);
4004             sprintf(ALLQUARTLH ,"%s.%s", INFILE, ALLQUARTLHEXT);
4005             sprintf(OUTPTLIST  ,"%s.%s", INFILE, OUTPTLISTEXT);
4006             sprintf(OUTPTORDER ,"%s.%s", INFILE, OUTPTORDEREXT);
4007             FPRINTF(STDOUTFILE "Input file: %s\n", INFILE);
4008             flagused = TRUE;
4009          } else {
4010             if (intreefileset == 0) {
4011                strcpy(INTREE, (*argv)[n]);
4012                intreefileset++;
4013                sprintf(OUTFILE    ,"%s.%s", INTREE, OUTFILEEXT);
4014                sprintf(TREEFILE   ,"%s.%s", INTREE, TREEFILEEXT);
4015                sprintf(DISTANCES  ,"%s.%s", INTREE, DISTANCESEXT);
4016                FPRINTF(STDOUTFILE "Usertree file: %s\n", INTREE);
4017                flagused = TRUE;
4018             }
4019          }
4020       }
4021       if (flagused == FALSE) {
4022          fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]);
4023       }
4024       flagused = FALSE;
4025    }
4026
4027 } /* scancmdline */
4028
4029
4030 /***************************************************************/ 
4031
4032 void inputandinit(int *argc, char **argv[]) {
4033
4034         int ci;
4035
4036         /* vectors used in QP and LM analysis */
4037         qweight = new_dvector(3);
4038         sqdiff = new_dvector(3);
4039         qworder = new_ivector(3);
4040         sqorder = new_ivector(3);
4041         
4042         /* Initialization and parsing of Commandline */
4043         setdefaults();
4044         scancmdline(argc, argv);
4045
4046         /* initialize random numbers generator */
4047         if (randseed >= 0)
4048            fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed);
4049         randseed = initrandom(randseed);
4050
4051         psteptreelist = NULL;
4052         psteptreesum = 0;
4053         bestratefound = 0;
4054
4055 #       ifndef ALPHA
4056                 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n", VERSION);
4057 #       else
4058                 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n", VERSION, ALPHA);
4059 #       endif
4060
4061
4062         /* get sequences */
4063         openfiletoread(&seqfp, INFILE, "sequence data");
4064         getsizesites(seqfp);
4065         FPRINTF(STDOUTFILE "\nInput data set contains %d sequences of length %d\n", Maxspc, Maxseqc);
4066         getdataset(seqfp);
4067         closefile(seqfp);               
4068         data_optn = guessdatatype();
4069         
4070         /* translate characters into format used by ML engine */
4071         nuc_optn = TRUE; 
4072         SH_optn = FALSE;
4073         Seqchar = NULL;
4074         translatedataset();
4075         
4076         /* estimate base frequencies from data set */
4077         Freqtpm = NULL;
4078         Basecomp = NULL;
4079         estimatebasefreqs();
4080         
4081         /* guess model of substitution */
4082         guessmodel();
4083
4084         /* initialize guess variables */
4085         auto_datatype = AUTO_GUESS;
4086         if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS;
4087         else                        auto_aamodel = AUTO_DEFAULT;
4088         /* save guessed amino acid options */
4089         guessDayhf_optn    = Dayhf_optn;
4090         guessJtt_optn      = Jtt_optn;
4091         guessmtrev_optn    = mtrev_optn;
4092         guesscprev_optn    = cprev_optn;
4093         guessblosum62_optn = blosum62_optn;
4094         guessvtmv_optn     = vtmv_optn;
4095         guesswag_optn     = wag_optn;
4096         guessauto_aamodel  = auto_aamodel;
4097
4098
4099         /* check for user specified tree */
4100         if ((utfp = fopen(INTREE, "r")) != NULL) {
4101                 fclose(utfp);
4102                 puzzlemode = USERTREE;
4103         } else {
4104                 puzzlemode = QUARTPUZ;
4105         }
4106
4107         /* reserve memory for cluster LM analysis */
4108         clusterA = new_ivector(Maxspc);
4109         clusterB = new_ivector(Maxspc);
4110         clusterC = new_ivector(Maxspc);
4111         clusterD = new_ivector(Maxspc);
4112
4113         /* set options interactively */
4114         setoptions();
4115
4116         /* open usertree file right after start */
4117         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
4118                 openfiletoread(&utfp, INTREE, "user trees");
4119         }
4120
4121         /* start main timer */
4122         time(&Starttime);
4123         Startcpu=clock();
4124         addtimes(OPTIONS, &tarr);
4125
4126         /* symmetrize doublet frequencies if specified */
4127         symdoublets();
4128
4129         /* initialise ML */
4130         mlstart();
4131
4132         /* determine how many usertrees */
4133         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
4134                 numutrees = 0;          
4135                 do {
4136                         ci = fgetc(utfp);
4137                         if ((char) ci == ';') numutrees++;
4138                 } while (ci != EOF);
4139                 rewind(utfp);
4140                 if (numutrees < 1) {
4141                         FPRINTF(STDOUTFILE "Unable to proceed (no tree in input tree file)\n\n\n");
4142                         exit(1);
4143                 }               
4144         }
4145         
4146         /* check fraction of invariable sites */
4147         if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim)
4148                 /* fraction of invariable site was specified manually */
4149                 if (fracinv > MAXFI)
4150                         fracinv = MAXFI;
4151
4152         addtimes(GENERAL, &tarr);
4153         /* estimate parameters */
4154         if (!(typ_optn == TREERECON_OPTN && puzzlemode == USERTREE)) {
4155                 /* no tree present */
4156                 estimateparametersnotree();
4157         } else {
4158                 if (utree_optn) {
4159                         /* use 1st user tree */
4160                         readusertree(utfp);
4161                         rewind(utfp);
4162                         estimateparameterstree();
4163                 } else {
4164                         /* don't use first user tree */
4165                         estimateparametersnotree();
4166                 }
4167         }
4168         addtimes(PARAMEST, &tarr);
4169
4170         /* compute expected Ts/Tv ratio */
4171         if (data_optn == NUCLEOTIDE) computeexpectations();
4172
4173 } /* inputandinit */
4174
4175
4176
4177 /***************************************************************/ 
4178
4179 void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot)
4180 {
4181
4182         switch (pmode) {
4183                 case QUARTPUZ: /* read QP tree */
4184                         readusertree(intreefp);                 
4185                         FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock)\n");
4186                         fflush(STDOUT);
4187                         usertree_lklhd();
4188                         findbestratecombination();
4189                         break;
4190                 case USERTREE: /* read user tree */
4191                         readusertree(intreefp);
4192                         FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1);
4193                         fflush(STDOUT);
4194                         usertree_lklhd();
4195                         if (maxutree > 1) {
4196                                 ulkl[utreenum] = Ctree->lklhd;
4197                                 allsitelkl(Ctree->condlkl, allsites[utreenum]);
4198                         }
4199                         if (utreenum==0) findbestratecombination();
4200                         break;
4201         }
4202
4203
4204         if (compclock) { /* clocklike branch length */
4205                 switch (pmode) {
4206                         case QUARTPUZ:
4207                                 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock)\n");
4208                                 fflush(STDOUT);
4209                                 break;
4210                         case USERTREE:
4211                                 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1);
4212                                 fflush(STDOUT);
4213                                 break;
4214                 }
4215                                                                         
4216                 /* find best place for root */
4217                 rootsearch = 0;
4218
4219                 if (utreenum==0) locroot = *oldlocroot;
4220                 else             *oldlocroot = locroot;
4221
4222                 if (locroot < 0) {
4223                         locroot = findrootedge();
4224                         rootsearch = 1;
4225                 }
4226                 /* if user-specified edge for root does not exist use displayed outgroup */
4227                 if (!checkedge(locroot)) {
4228                         locroot = outgroup; 
4229                         rootsearch = 2;
4230                 }
4231                 /* compute likelihood */
4232                 clock_lklhd(locroot);
4233                 if (maxutree > 1) {
4234                         ulklc[utreenum] = Ctree->lklhdc;
4235                         allsitelkl(Ctree->condlkl, allsitesc[utreenum]);
4236                 }
4237
4238         }
4239
4240         if (clockmode == 0)
4241                 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd);
4242         else
4243                 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc);
4244
4245         /* write ML branch length tree to outree file */
4246         clockmode = 0; /* nonclocklike branch lengths */
4247         fputphylogeny(outtreefp);
4248                 
4249         /* clocklike branch lengths */
4250         if (compclock) {
4251                 clockmode = 1;
4252                 fputrooted(outtreefp, locroot);
4253         }
4254 } /* evaluatetree */
4255
4256 /***************************************************************/ 
4257
4258 void memcleanup() { 
4259         if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
4260                 free(splitfreqs);
4261                 free(splitpatterns);
4262                 free(splitsizes);
4263                 free_ivector(consconfid);
4264                 free_ivector(conssizes);
4265                 free_cmatrix(consbiparts);
4266                 free_ulivector(badtaxon);
4267         }
4268         free_cmatrix(Identif);
4269         free_dvector(Freqtpm);
4270         free_imatrix(Basecomp);
4271         free_ivector(clusterA);
4272         free_ivector(clusterB);
4273         free_ivector(clusterC);
4274         free_ivector(clusterD);
4275         free_dvector(qweight);
4276         free_dvector(sqdiff);
4277         free_ivector(qworder);
4278         free_ivector(sqorder);
4279         freetreelist(&psteptreelist, &psteptreenum, &psteptreesum);
4280 } /* memcleanup */
4281
4282 /***************************************************************/ 
4283
4284
4285 /******************************************************************************/
4286 /* main part                                                                  */
4287 /******************************************************************************/
4288
4289 int main(int argc, char *argv[])
4290 {       
4291         int i, oldlocroot=0;
4292         
4293         /* start main timer */
4294         time(&walltimestart);
4295         cputimestart = clock();
4296         inittimearr(&tarr);
4297
4298 #       if PARALLEL
4299                 PP_Init(&argc, &argv);
4300                 if (PP_IamSlave) {
4301                         slave_main(argc, argv);
4302                 } else {
4303 #       endif /* PARALLEL */
4304         
4305         inputandinit(&argc, &argv);
4306
4307     /* CZ 05/19/01 */
4308         /* FPRINTF(STDOUTFILE "Writing parameters to file %s\n", OUTFILE); */
4309         /* openfiletowrite(&ofp, OUTFILE, "general output"); */
4310         /* writeoutputfile(ofp,WRITEPARAMS); */ 
4311         /* fclose(ofp); */
4312
4313
4314         /* write distance matrix */
4315         FPRINTF(STDOUTFILE "Writing pairwise distances to file %s\n", DISTANCES);
4316         openfiletowrite(&dfp, DISTANCES, "pairwise distances");
4317         putdistance(dfp);
4318         closefile(dfp);
4319
4320 #       if PARALLEL
4321                 PP_SendSizes(Maxspc, Maxsite, numcats, Numptrn, tpmradix, outgroup, fracconst, randseed);
4322                 PP_SendData(Seqpat,                       /* cmatrix */
4323                             Alias, Weight, constpat,      /* ivector */
4324                             Rates, Eval, Freqtpm,         /* dvector */
4325                             Evec, Ievc, iexp, Distanmat,  /* dmatrix */
4326                             ltprobr);                     /* dcube   */
4327 #       endif /* PARALLEL */
4328         psteptreestrlen = (Maxspc * (int)(1 + log10(Maxspc))) +
4329                           (Maxspc * 3);
4330
4331         switch (typ_optn) {
4332                 case TREERECON_OPTN: /* tree reconstruction */
4333
4334                         if (puzzlemode == QUARTPUZ) { /* quartet puzzling */
4335                                 recon_tree();
4336                         } /* quartet puzzling */
4337                         break;
4338         
4339                 case LIKMAPING_OPTN: /* likelihood mapping */
4340         
4341                         map_lklhd();
4342                         break;
4343         } /* switch typ_optn */
4344
4345
4346         free_cmatrix(Seqchar);
4347         free_cmatrix(seqchars);
4348
4349         /* reserve memory for tree statistics */
4350         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) {
4351                 ulkl = new_dvector(numutrees);
4352                 allsites = new_dmatrix(numutrees,Numptrn);
4353                 if (compclock) {
4354                         ulklc = new_dvector(numutrees);
4355                         allsitesc = new_dmatrix(numutrees,Numptrn);
4356                 }
4357         }
4358
4359         /* write puzzling step tree list */
4360         if ((listqptrees == PSTOUT_ORDER) || (listqptrees == PSTOUT_LISTORDER)) {
4361                 openfiletowrite(&qptorder, OUTPTORDER, "puzzling step trees (unique)");
4362
4363                 fprintfsortedpstrees(qptorder, psteptreelist, psteptreenum, psteptreesum, 1, 0.0);
4364                 closefile(qptorder);
4365         }
4366
4367         /* compute ML branch lengths for QP tree and for 1st user tree */
4368         switch(typ_optn) {
4369                 case TREERECON_OPTN:
4370
4371                         /* open outtree file */
4372                         openfiletowrite(&tfp, TREEFILE, "output tree(s)");
4373
4374                         addtimes(GENERAL, &tarr);
4375
4376                         switch (puzzlemode) {
4377                                 case QUARTPUZ: /* read QP tree */
4378                                         rewind(tmpfp);
4379                                         openfiletowrite(&tfp, TREEFILE, "output tree(s)");
4380                                         evaluatetree(tmpfp, tfp, puzzlemode, 0, 1, &oldlocroot);
4381                                         addtimes(TREEEVAL, &tarr);
4382                                         closefile(tmpfp);
4383                                         closefile(tfp);
4384         
4385                     /* CZ 05/19/01 */
4386                                         /*openfiletoappend(&ofp, OUTFILE, "general output");*/
4387                                         /*writeoutputfile(ofp,WRITEREST);*/
4388                                         break;
4389                                 case USERTREE: /* read user tree */
4390                                         openfiletoappend(&ofp, OUTFILE, "general output");
4391         
4392                                         openfiletowrite(&tfp, TREEFILE, "output tree(s)");
4393                                         for (i = 0; i < numutrees; i++) {
4394                                                 evaluatetree(utfp, tfp, puzzlemode, i, numutrees, &oldlocroot);
4395                                                 if (i==0) writeoutputfile(ofp,WRITEREST);
4396                                                 writecutree(ofp, i+1);
4397                                                 addtimes(TREEEVAL, &tarr);
4398                                         }
4399                                         closefile(tfp);
4400                                         closefile(utfp);
4401                                         break;
4402                                 default:
4403                     /* CZ 05/19/01 */
4404                                         /*openfiletoappend(&ofp, OUTFILE, "general output");*/
4405                                         /*writeoutputfile(ofp,WRITEREST);*/
4406                                         break;
4407                         } /* switch puzzlemode */
4408                         break;
4409                 default:
4410             /* CZ 05/19/01 */
4411                         /*openfiletoappend(&ofp, OUTFILE, "general output");*/
4412                         /*writeoutputfile(ofp,WRITEREST);*/
4413                         break;
4414         } /* switch typ_optn */
4415
4416         /* print tree statistics */
4417         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1)
4418                 printtreestats(ofp);
4419         
4420         /* free memory for tree statistics */
4421         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE && numutrees > 1) {
4422                 free_dvector(ulkl);
4423                 free_dmatrix(allsites);
4424                 if (compclock) {
4425                         free_dvector(ulklc);
4426                         free_dmatrix(allsitesc);
4427                 }
4428         }
4429         
4430 #       if PARALLEL
4431                 PP_SendDone();
4432 #       endif /* PARALLEL */
4433
4434         /* write CPU/Wallclock times and parallel statistics */
4435         time(&walltimestop);
4436         cputimestop = clock();
4437         addtimes(OVERALL, &tarr);
4438 #       ifdef TIMEDEBUG
4439                 printtimearr(&tarr);
4440 #       endif /* TIMEDEBUG */
4441         fullcpu          = tarr.fullcpu;
4442         fulltime         = tarr.fulltime;
4443
4444 #       if PARALLEL
4445                 writetimesstat(ofp);
4446 #       endif /* PARALLEL */
4447
4448         /* stop timer */
4449         time(&Stoptime);
4450         Stopcpu=clock();
4451     /* CZ 05/19/01 */
4452         /*timestamp(ofp);*/
4453         /*closefile(ofp);*/
4454
4455
4456      /* printbestratecombination(stderr); */
4457         mlfinish();
4458
4459         FPRINTF(STDOUTFILE "\nAll results written to disk:\n");
4460         FPRINTF(STDOUTFILE "       Puzzle report file:         %s\n", OUTFILE);
4461         FPRINTF(STDOUTFILE "       Likelihood distances:       %s\n", DISTANCES);
4462         
4463         if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST) 
4464                 FPRINTF(STDOUTFILE "       Phylip tree file:           %s\n", TREEFILE);
4465         if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) {
4466                 if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER)) 
4467                         FPRINTF(STDOUTFILE "       Unique puzzling step trees: %s\n", OUTPTORDER);      
4468                 if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER)) 
4469                         FPRINTF(STDOUTFILE "       Puzzling step tree list:    %s\n", OUTPTLIST);       
4470         }
4471         if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) 
4472                 FPRINTF(STDOUTFILE "       Unresolved quartets:        %s\n", UNRESOLVED);
4473         if (typ_optn == LIKMAPING_OPTN) 
4474                 FPRINTF(STDOUTFILE "       Likelihood mapping diagram: %s\n", TRIANGLE);
4475         FPRINTF(STDOUTFILE "\n");
4476
4477         /* runtime message */
4478         FPRINTF(STDOUTFILE 
4479                 "The computation took %.0f seconds (= %.1f minutes = %.1f hours)\n",
4480                         difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60.,
4481                         difftime(Stoptime, Starttime)/3600.);
4482         FPRINTF(STDOUTFILE 
4483                 "     including input %.0f seconds (= %.1f minutes = %.1f hours)\n",
4484                         fulltime, fulltime/60., fulltime/3600.);
4485 #ifdef TIMEDEBUG
4486         FPRINTF(STDOUTFILE 
4487                 "and %.0f seconds CPU time (= %.1f minutes = %.1f hours)\n\n",
4488                         fullcpu, fullcpu/60., fullcpu/3600.);
4489 #endif /* TIMEDEBUG */
4490
4491         /* free memory */
4492         memcleanup();
4493
4494 #       if PARALLEL
4495                 } /* !IamSlave */
4496                 PP_Finalize();
4497 #       endif /* PARALLEL */
4498
4499         return 0;
4500 }
4501
4502
4503 /* compare function for uli - sort largest numbers first */
4504 int ulicmp(const void *ap, const void *bp)
4505 {
4506         uli a, b;
4507         
4508         a = *((uli *) ap);
4509         b = *((uli *) bp);
4510
4511         if (a > b) return -1;
4512         else if (a < b) return 1;
4513         else return 0;
4514 }
4515
4516 /* compare function for int - sort smallest numbers first */
4517 int intcmp(const void *ap, const void *bp)
4518 {
4519         int a, b;
4520         
4521         a = *((int *) ap);
4522         b = *((int *) bp);
4523
4524         if (a < b) return -1;
4525         else if (a > b) return 1;
4526         else return 0;
4527 }