initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_dqo / src / puzzle1.c
1 /*
2  * puzzle1.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 #define EXTERN
17
18 #include "puzzle.h"
19 #include "gamma.h"
20
21 void num2quart(uli qnum, int *a, int *b, int *c, int *d)
22 {
23         double temp;
24         uli aa, bb, cc, dd;
25         uli lowval=0, highval=0;
26
27         aa=0; bb=1; cc=2; dd=3;
28
29         temp = (double)(24 * qnum);
30         temp = sqrt(temp);
31         temp = sqrt(temp);
32         /* temp = pow(temp, (double)(1/4)); */
33         dd = (uli) floor(temp) + 1;
34         if (dd < 3) dd = 3;
35         lowval =  (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
36         highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
37         if (lowval >= qnum)
38             while ((lowval > qnum)) {
39                 dd -= 1; lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
40             }
41         else {
42             while (highval <= qnum) {
43                 dd += 1; highval = (uli) (dd+1)*dd*(dd-1)*(dd-2)/24;
44             }
45             lowval = (uli) dd*(dd-1)*(dd-2)*(dd-3)/24;
46         }
47         qnum -= lowval;
48         if (qnum > 0) {
49             temp = (double)(6 * qnum);
50             temp = pow(temp, (double)(1/3));
51             cc = (uli) floor(temp);
52             if (cc < 2) cc= 2;
53             lowval =  (uli) cc*(cc-1)*(cc-2)/6;
54             highval = (uli) (cc+1)*cc*(cc-1)/6;
55             if (lowval >= qnum)
56                 while ((lowval > qnum)) {
57                    cc -= 1; lowval = (uli) cc*(cc-1)*(cc-2)/6;
58                 }
59             else {
60                 while (highval <= qnum) {
61                    cc += 1; highval = (uli) (cc+1)*cc*(cc-1)/6;
62                 }
63                 lowval = (uli) cc*(cc-1)*(cc-2)/6;
64             }
65             qnum -= lowval;
66             if (qnum > 0) {
67                 temp = (double)(2 * qnum);
68                 temp = sqrt(temp);
69                 bb = (uli) floor(temp);
70                 if (bb < 1) bb= 1;
71                 lowval =  (uli) bb*(bb-1)/2;
72                 highval = (uli) (bb+1)*bb/2;
73                 if (lowval >= qnum)
74                     while ((lowval > qnum)) {
75                         bb -= 1; lowval = (uli) bb*(bb-1)/2;
76                     }
77                 else {
78                     while (highval <= qnum) {
79                        bb += 1; highval = (uli) (bb+1)*bb/2;
80                     }
81                     lowval = (uli) bb*(bb-1)/2;
82                 }
83                 qnum -= lowval;
84                 if (qnum > 0) {
85                    aa = (uli) qnum;
86                    if (aa < 0) aa= 0;
87                 }
88             }
89         }
90         *d = (int)dd;
91         *c = (int)cc;
92         *b = (int)bb;
93         *a = (int)aa;
94 }  /* num2quart */
95
96 /******************/
97
98 uli numquarts(int maxspc)
99 {
100    uli tmp;
101    int a, b, c, d;
102
103    if (maxspc < 4)
104      return (uli)0;
105    else {
106       maxspc--;
107       a = maxspc-3;
108       b = maxspc-2;
109       c = maxspc-1;
110       d = maxspc;
111
112       tmp = (uli) 1 + a +
113             (uli) b * (b-1) / 2 +
114             (uli) c * (c-1) * (c-2) / 6 +
115             (uli) d * (d-1) * (d-2) * (d-3) / 24;
116       return (tmp);
117    }
118 }  /* numquarts */
119
120 /******************/
121
122 uli quart2num (int a, int b, int c, int d)
123 {
124       uli tmp;
125       if ((a>b) || (b>c) || (c>d)) {
126          fprintf(stderr, "Error PP5 not (%d <= %d <= %d <= %d) !!!\n", a, b, c,
127 d);
128          exit (1);
129       }
130       tmp = (uli) a +
131             (uli) b * (b-1) / 2 +
132             (uli) c * (c-1) * (c-2) / 6 +
133             (uli) d * (d-1) * (d-2) * (d-3) / 24;
134       return (tmp);
135 }  /* quart2num */
136
137 /******************/
138
139
140
141 /*  flag=0      old allquart binary  */
142 /*  flag=1      allquart binary  */
143 /*  flag=2      allquart ACSII   */
144 /*  flag=3      quartlh  binary  */
145 /*  flag=4      quartlh  ASCII   */
146
147 void writetpqfheader(int            nspec,
148                      FILE          *ofp,
149                      int            flag)
150 { int            currspec;
151
152   if (flag == 0) {
153      unsigned long  nquart;
154      unsigned long  blocklen;
155
156      nquart = numquarts(nspec);
157      /* compute number of bytes */
158      if (nquart % 2 == 0) { /* even number */
159         blocklen = (nquart)/2;
160      } else { /* odd number */
161         blocklen = (nquart + 1)/2;
162      }
163      /* FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename); */
164      fprintf(ofp, "TREE-PUZZLE\n%s\n\n", VERSION);
165      fprintf(ofp, "species: %d\n",       nspec);
166      fprintf(ofp, "quartets: %lu\n",     nquart);
167      fprintf(ofp, "bytes: %lu\n\n",      blocklen);
168
169
170      /* fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp); */
171   }
172
173   if (flag == 1) fprintf(ofp, "##TPQF-BB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
174   if (flag == 2) fprintf(ofp, "##TPQF-BA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
175   if (flag == 3) fprintf(ofp, "##TPQF-LB (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
176   if (flag == 4) fprintf(ofp, "##TPQF-LA (TREE-PUZZLE %s)\n%d\n", VERSION, nspec);
177
178   for (currspec=0; currspec<nspec; currspec++) {
179      fputid(ofp, currspec);
180      fprintf(ofp, "\n");
181   } /* for each species */
182   fprintf(ofp, "\n");
183
184 } /* writetpqfheader */
185
186
187
188 void writeallquarts(int            nspec,
189                     char          *filename,
190                     unsigned char *quartetinfo)
191 { unsigned long  nquart;
192   unsigned long  blocklen;
193   FILE          *ofp;
194
195   nquart = numquarts(nspec);
196
197   /* compute number of bytes */
198   if (nquart % 2 == 0) { /* even number */
199      blocklen = (nquart)/2;
200   } else { /* odd number */
201      blocklen = (nquart + 1)/2;
202   }
203
204   FPRINTF(STDOUTFILE "Writing quartet file: %s\n", filename);
205
206   openfiletowrite(&ofp, filename, "all quartets");
207
208   writetpqfheader(nspec, ofp, 0);
209
210   fwrite(&(quartetinfo[0]), sizeof(char), blocklen, ofp);
211   fclose(ofp);
212
213 } /* writeallquart */
214
215
216
217 void readallquarts(int            nspec,
218                    char          *filename,
219                    unsigned char *quartetinfo)
220 { unsigned long  nquart;
221   unsigned long  blocklen;
222   int            currspec;
223   unsigned long  dummynquart;
224   unsigned long  dummyblocklen;
225   int            dummynspec;
226   char           dummyversion[128];
227   char           dummyname[128];
228   FILE          *ifp;
229
230   nquart = numquarts(nspec);
231
232   /* compute number of bytes */
233   if (nquart % 2 == 0) { /* even number */
234      blocklen = (nquart)/2;
235   } else { /* odd number */
236      blocklen = (nquart + 1)/2;
237   }
238
239 /*  &(quartetinfo[0] */
240
241   openfiletoread(&ifp, filename, "all quartets");
242
243   fscanf(ifp, "TREE-PUZZLE\n");
244   fscanf(ifp, "%s\n\n", dummyversion);
245   
246   fscanf(ifp, "species: %d\n",   &dummynspec);
247   fscanf(ifp, "quartets: %lu\n", &dummynquart);
248   fscanf(ifp, "bytes: %lu\n\n",  &dummyblocklen);
249
250   if ((nspec != dummynspec) ||
251       (nquart != dummynquart) ||
252       (blocklen != dummyblocklen)) {
253      FPRINTF(STDOUTFILE "ERROR: Parameters in quartet file %s do not match!!!\n",
254                      filename);
255 #    if PARALLEL
256          PP_SendDone();
257          MPI_Finalize();
258 #    endif /* PARALLEL */
259      exit(1);
260   }
261
262   FPRINTF(STDOUTFILE "Reading quartet file: %s\n", filename);
263   FPRINTF(STDOUTFILE "   generated by: TREE-PUZZLE %s\n", dummyversion);
264   FPRINTF(STDOUTFILE "   current:      TREE-PUZZLE %s\n", VERSION);
265
266   FPRINTF(STDOUTFILE "   %d species, %lu quartets, %lu bytes\n", 
267                    dummynspec, dummynquart, dummyblocklen);
268
269   for (currspec=0; currspec<nspec; currspec++) {
270
271      fscanf(ifp, "%s\n", dummyname);
272      FPRINTF(STDOUTFILE "   %3d. %s (", currspec+1, dummyname);
273      fputid(STDOUT, currspec);
274      FPRINTF(STDOUTFILE ")\n");
275
276   } /* for each species */
277   FPRINTF(STDOUTFILE "\n");
278
279   fread(&(quartetinfo[0]), sizeof(char), blocklen, ifp);
280   fclose(ifp);
281
282 } /* writeallquart */
283
284
285
286
287 /******************************************************************************/
288 /* options - file I/O - output                                                */
289 /******************************************************************************/
290
291 /* compute TN parameters according to F84 Ts/Tv ratio */
292 void makeF84model()
293 {
294         double rho, piA, piC, piG, piT, piR, piY, ts, yr;
295         
296         piA = Freqtpm[0];
297         piC = Freqtpm[1];
298         piG = Freqtpm[2];
299         piT = Freqtpm[3];
300         piR = piA + piG;
301         piY = piC + piT;
302         if (piC*piT*piR + piA*piG*piY == 0.0) {         
303                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
304                 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
305                 tstvf84 = 0.0;
306                 return;
307         }
308         rho = (piR*piY*(piR*piY*tstvf84 - (piA*piG + piC*piT)))/
309                 (piC*piT*piR + piA*piG*piY);
310         
311         if(piR == 0.0 || piY == 0.0 || (piR + rho) == 0.0) {
312                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
313                 FPRINTF(STDOUTFILE "(bad combination of base frequencies)\n");
314                 tstvf84 = 0.0;
315                 return;
316         }
317         ts = 0.5 + 0.25*rho*(1.0/piR + 1.0/piY);
318         yr = (piY + rho)/piY * piR/(piR + rho);
319         if (ts < MINTS || ts > MAXTS) {
320                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
321                 FPRINTF(STDOUTFILE "(bad Ts/Tv parameter)\n");
322                 tstvf84 = 0.0;
323                 return;
324         }
325         if (yr < MINYR || yr > MAXYR) {
326                 FPRINTF(STDOUTFILE "\n\n\nF84 model not possible ");
327                 FPRINTF(STDOUTFILE "(bad Y/R transition parameter)\n");
328                 tstvf84 = 0.0;
329                 return;
330         }
331         TSparam = ts;
332         YRparam = yr;
333         optim_optn = FALSE;
334 }
335
336 /* compute number of quartets used in LM analysis */
337 void compnumqts()
338 {
339         if (lmqts == 0) {
340                 if (numclust == 4)
341                         Numquartets = (uli) clustA*clustB*clustC*clustD;
342                 if (numclust == 3)
343                         Numquartets = (uli) clustA*clustB*clustC*(clustC-1)/2;
344                 if (numclust == 2)
345                         Numquartets = (uli) clustA*(clustA-1)/2 * clustB*(clustB-1)/2;
346                 if (numclust == 1)      
347                         Numquartets = (uli) Maxspc*(Maxspc-1)*(Maxspc-2)*(Maxspc-3)/24;
348         } else {
349                 Numquartets = lmqts;
350         }
351 }
352
353 /* set options interactively */
354 void setoptions()
355 {       
356         int i, valid;
357         double sumfreq;
358         char ch;
359
360     puzzlemode = PAIRDIST; /*Only do pairwise dist. CZ, 05/16/01*/
361
362         /* defaults */
363         rhetmode = UNIFORMRATE;          /* assume rate homogeneity               */
364         numcats = 1;
365         Geta = 0.05;
366         grate_optim = FALSE;
367         fracinv = 0.0;
368         fracinv_optim = FALSE;
369
370         compclock = FALSE;           /* compute clocklike branch lengths      */
371         locroot = -1;                /* search for optimal place of root      */ 
372         qcalg_optn = FALSE;          /* don't use sampling of quartets        */
373         approxp_optn = TRUE;         /* approximate parameter estimates       */
374         listqptrees = PSTOUT_NONE;   /* list puzzling step trees              */
375         
376         /* approximate QP quartets? */
377         if (Maxspc <= 6) approxqp = FALSE;
378         else approxqp = TRUE;
379         
380         codon_optn = 0;        /* use all positions in a codon          */
381         
382         /* number of puzzling steps */
383         if (Maxspc <= 25) Numtrial = 1000;
384         else if (Maxspc <= 50) Numtrial = 10000;
385         else if (Maxspc <= 75) Numtrial = 25000;
386         else Numtrial = 50000;
387             
388         utree_optn = TRUE;     /* use first user tree for estimation    */
389         outgroup = 0;          /* use first taxon as outgroup           */
390         sym_optn = FALSE;      /* symmetrize doublet frequencies        */
391         tstvf84 = 0.0;         /* disable F84 model                     */
392         show_optn = FALSE;     /* show unresolved quartets              */
393         typ_optn = TREERECON_OPTN;          /* tree reconstruction                   */
394         numclust = 1;          /* one clusters in LM analysis           */
395         lmqts = 0;             /* all quartets in LM analysis           */
396         compnumqts();
397         if (Numquartets > 10000) {
398                 lmqts = 10000;     /* 10000 quartets in LM analysis         */
399                 compnumqts();
400         }
401         
402         do {
403                 FPRINTF(STDOUTFILE "\n\n\nGENERAL OPTIONS\n");
404                 FPRINTF(STDOUTFILE " b                     Type of analysis?  ");
405                 if (typ_optn == TREERECON_OPTN) FPRINTF(STDOUTFILE "Tree reconstruction\n");
406                 if (typ_optn == LIKMAPING_OPTN) FPRINTF(STDOUTFILE "Likelihood mapping\n");
407                 if (typ_optn == TREERECON_OPTN) {
408                         FPRINTF(STDOUTFILE " k                Tree search procedure?  ");
409                         if (puzzlemode == QUARTPUZ) FPRINTF(STDOUTFILE "Quartet puzzling\n");
410                         if (puzzlemode == USERTREE) FPRINTF(STDOUTFILE "User defined trees\n");
411                         if (puzzlemode == PAIRDIST) FPRINTF(STDOUTFILE "Pairwise distances only (no tree)\n");
412                         if (puzzlemode == QUARTPUZ) {
413                                 FPRINTF(STDOUTFILE " v       Approximate quartet likelihood?  %s\n",
414                                         (approxqp ? "Yes" : "No"));
415                                 FPRINTF(STDOUTFILE " u             List unresolved quartets?  %s\n",
416                                         (show_optn ? "Yes" : "No"));
417                                 FPRINTF(STDOUTFILE " n             Number of puzzling steps?  %lu\n",
418                                                 Numtrial);
419                                 FPRINTF(STDOUTFILE " j             List puzzling step trees?  ");
420                                 switch (listqptrees) {
421                                         case PSTOUT_NONE:      FPRINTF(STDOUTFILE "No\n"); break;
422                                         case PSTOUT_ORDER:     FPRINTF(STDOUTFILE "Unique topologies\n"); break;
423                                         case PSTOUT_LISTORDER: FPRINTF(STDOUTFILE "Unique topologies & Chronological list\n"); break;
424                                         case PSTOUT_LIST:      FPRINTF(STDOUTFILE "Chronological list only\n"); break;
425                                 }
426
427                                 FPRINTF(STDOUTFILE " o                  Display as outgroup?  ");
428                                 fputid(STDOUT, outgroup);
429                                 FPRINTF(STDOUTFILE "\n");
430                         }
431                         if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
432                                 FPRINTF(STDOUTFILE " z     Compute clocklike branch lengths?  ");
433                                 if (compclock) FPRINTF(STDOUTFILE "Yes\n");
434                                 else FPRINTF(STDOUTFILE "No\n");
435                         }
436                         if (compclock)
437                                 if (puzzlemode == QUARTPUZ || puzzlemode == USERTREE) {
438                                         FPRINTF(STDOUTFILE " l                     Location of root?  ");
439                                         if (locroot < 0) FPRINTF(STDOUTFILE "Best place (automatic search)\n");
440                                         else if (locroot < Maxspc) {
441                                                 FPRINTF(STDOUTFILE "Branch %d (", locroot + 1);
442                                                 fputid(STDOUT, locroot);
443                                                 FPRINTF(STDOUTFILE ")\n");
444                                         } else FPRINTF(STDOUTFILE "Branch %d (internal branch)\n", locroot + 1);
445                                 }
446                 }
447                 if (typ_optn == LIKMAPING_OPTN) {
448                           FPRINTF(STDOUTFILE " g          Group sequences in clusters?  ");
449                           if (numclust == 1) FPRINTF(STDOUTFILE "No\n");
450                           else FPRINTF(STDOUTFILE "Yes (%d clusters as specified)\n", numclust);
451                           FPRINTF(STDOUTFILE " n                   Number of quartets?  ");
452                           if (lmqts == 0) FPRINTF(STDOUTFILE "%lu (all possible)\n", Numquartets);
453                           else FPRINTF(STDOUTFILE "%lu (random choice)\n", lmqts);
454                 }
455                 FPRINTF(STDOUTFILE " e                  Parameter estimates?  ");
456                 if (approxp_optn) FPRINTF(STDOUTFILE "Approximate (faster)\n");
457                 else FPRINTF(STDOUTFILE "Exact (slow)\n");
458                 if (!(puzzlemode == USERTREE && typ_optn == TREERECON_OPTN)) {  
459                         FPRINTF(STDOUTFILE " x            Parameter estimation uses?  ");
460                         if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
461                         else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
462                         
463                 } else {
464                         FPRINTF(STDOUTFILE " x            Parameter estimation uses?  ");
465                         if (utree_optn)
466                                 FPRINTF(STDOUTFILE "1st input tree\n");
467                         else if (qcalg_optn) FPRINTF(STDOUTFILE "Quartet sampling + NJ tree\n");
468                         else FPRINTF(STDOUTFILE "Neighbor-joining tree\n");
469                 }
470                 FPRINTF(STDOUTFILE "SUBSTITUTION PROCESS\n");
471                 FPRINTF(STDOUTFILE " d          Type of sequence input data?  ");
472                 if (auto_datatype == AUTO_GUESS) FPRINTF(STDOUTFILE "Auto: ");
473                 if (data_optn == NUCLEOTIDE) FPRINTF(STDOUTFILE "Nucleotides\n");
474                 if (data_optn == AMINOACID) FPRINTF(STDOUTFILE "Amino acids\n");
475                 if (data_optn == BINARY) FPRINTF(STDOUTFILE "Binary states\n");
476                 if (data_optn == NUCLEOTIDE && (Maxseqc % 3) == 0  && !SH_optn) {
477                         FPRINTF(STDOUTFILE " h             Codon positions selected?  ");
478                         if (codon_optn == 0) FPRINTF(STDOUTFILE "Use all positions\n");
479                         if (codon_optn == 1) FPRINTF(STDOUTFILE "Use only 1st positions\n");
480                         if (codon_optn == 2) FPRINTF(STDOUTFILE "Use only 2nd positions\n");
481                         if (codon_optn == 3) FPRINTF(STDOUTFILE "Use only 3rd positions\n");
482                         if (codon_optn == 4) FPRINTF(STDOUTFILE "Use 1st and 2nd positions\n");
483                 }
484                 FPRINTF(STDOUTFILE " m                Model of substitution?  ");
485                 if (data_optn == NUCLEOTIDE) { /* nucleotides */
486                         if (nuc_optn) {
487                                 if(HKY_optn)
488                                         FPRINTF(STDOUTFILE "HKY (Hasegawa et al. 1985)\n");     
489                                 else {
490                                         FPRINTF(STDOUTFILE "TN (Tamura-Nei 1993)\n");
491                                         FPRINTF(STDOUTFILE " p      Constrain TN model to F84 model?  ");
492                                         if (tstvf84 == 0.0)
493                                                 FPRINTF(STDOUTFILE "No\n");
494                                         else FPRINTF(STDOUTFILE "Yes (Ts/Tv ratio = %.2f)\n", tstvf84);
495                                 }
496                                 FPRINTF(STDOUTFILE " t    Transition/transversion parameter?  ");
497                                 if (optim_optn)
498                                         FPRINTF(STDOUTFILE "Estimate from data set\n");
499                                 else
500                                         FPRINTF(STDOUTFILE "%.2f\n", TSparam);  
501                                 if (TN_optn) {
502                                         FPRINTF(STDOUTFILE " r             Y/R transition parameter?  ");
503                                         if (optim_optn)
504                                                 FPRINTF(STDOUTFILE "Estimate from data set\n");
505                                         else
506                                                 FPRINTF(STDOUTFILE "%.2f\n", YRparam);          
507                                 }                       
508                         }
509                         if (SH_optn) {
510                                         FPRINTF(STDOUTFILE "SH (Schoeniger-von Haeseler 1994)\n");
511                                         FPRINTF(STDOUTFILE " t    Transition/transversion parameter?  ");
512                                         if (optim_optn)
513                                                 FPRINTF(STDOUTFILE "Estimate from data set\n");
514                                         else
515                                                 FPRINTF(STDOUTFILE "%.2f\n", TSparam);
516                         }       
517                 }
518                 if (data_optn == NUCLEOTIDE && SH_optn) {
519                         FPRINTF(STDOUTFILE " h                  Doublets defined by?  ");
520                         if (SHcodon)
521                                 FPRINTF(STDOUTFILE "1st and 2nd codon positions\n");
522                         else
523                                 FPRINTF(STDOUTFILE "1st+2nd, 3rd+4th, etc. site\n");
524                 }
525                 if (data_optn == AMINOACID) { /* amino acids */
526                         switch (auto_aamodel) {
527                                 case AUTO_GUESS:
528                                         FPRINTF(STDOUTFILE "Auto: ");
529                                         break;
530                                 case AUTO_DEFAULT:
531                                         FPRINTF(STDOUTFILE "Def.: ");
532                                         break;
533                         }
534                         if (Dayhf_optn) FPRINTF(STDOUTFILE "Dayhoff (Dayhoff et al. 1978)\n");  
535                         if (Jtt_optn) FPRINTF(STDOUTFILE "JTT (Jones et al. 1992)\n");
536                         if (mtrev_optn) FPRINTF(STDOUTFILE "mtREV24 (Adachi-Hasegawa 1996)\n");
537                         if (cprev_optn) FPRINTF(STDOUTFILE "cpREV45 (Adachi et al. 2000)\n");
538                         if (blosum62_optn) FPRINTF(STDOUTFILE "BLOSUM62 (Henikoff-Henikoff 92)\n");
539                         if (vtmv_optn) FPRINTF(STDOUTFILE "VT (Mueller-Vingron 2000)\n");
540                         if (wag_optn) FPRINTF(STDOUTFILE "WAG (Whelan-Goldman 2000)\n");
541                 }
542                 if (data_optn == BINARY) { /* binary states */
543                         FPRINTF(STDOUTFILE "Two-state model (Felsenstein 1981)\n");
544                 }
545                 if (data_optn == AMINOACID)
546                         FPRINTF(STDOUTFILE " f               Amino acid frequencies?  ");
547                 else if (data_optn == NUCLEOTIDE && SH_optn)
548                         FPRINTF(STDOUTFILE " f                  Doublet frequencies?  ");
549                 else if (data_optn == NUCLEOTIDE && nuc_optn)
550                         FPRINTF(STDOUTFILE " f               Nucleotide frequencies?  ");
551                 else if (data_optn == BINARY)
552                         FPRINTF(STDOUTFILE " f             Binary state frequencies?  ");
553                 FPRINTF(STDOUTFILE "%s\n", (Frequ_optn ? "Estimate from data set" :
554                         "Use specified values"));
555                 if (data_optn == NUCLEOTIDE && SH_optn)
556                         FPRINTF(STDOUTFILE " s       Symmetrize doublet frequencies?  %s\n",
557                                 (sym_optn ? "Yes" : "No"));
558                                 
559                 FPRINTF(STDOUTFILE "RATE HETEROGENEITY\n");
560                 FPRINTF(STDOUTFILE " w          Model of rate heterogeneity?  ");
561                 if (rhetmode == UNIFORMRATE) FPRINTF(STDOUTFILE "Uniform rate\n");
562                 if (rhetmode == GAMMARATE  ) FPRINTF(STDOUTFILE "Gamma distributed rates\n");
563                 if (rhetmode == TWORATE    ) FPRINTF(STDOUTFILE "Two rates (1 invariable + 1 variable)\n");
564                 if (rhetmode == MIXEDRATE  ) FPRINTF(STDOUTFILE "Mixed (1 invariable + %d Gamma rates)\n", numcats);
565                 
566                 if (rhetmode == TWORATE || rhetmode == MIXEDRATE) {
567                         FPRINTF(STDOUTFILE " i         Fraction of invariable sites?  ");
568                         if (fracinv_optim) FPRINTF(STDOUTFILE "Estimate from data set");
569                         else FPRINTF(STDOUTFILE "%.2f", fracinv);
570                         if (fracinv == 0.0 && !fracinv_optim) FPRINTF(STDOUTFILE " (all sites variable)");
571                         FPRINTF(STDOUTFILE "\n");
572                 }
573                 if (rhetmode == GAMMARATE || rhetmode == MIXEDRATE) {
574                         FPRINTF(STDOUTFILE " a   Gamma distribution parameter alpha?  ");
575                         if (grate_optim)
576                                 FPRINTF(STDOUTFILE "Estimate from data set\n");
577                         else if (Geta > 0.5)
578                                 FPRINTF(STDOUTFILE "%.2f (strong rate heterogeneity)\n", (1.0-Geta)/Geta);
579                         else FPRINTF(STDOUTFILE "%.2f (weak rate heterogeneity)\n", (1.0-Geta)/Geta);
580                         FPRINTF(STDOUTFILE " c      Number of Gamma rate categories?  %d\n", numcats);
581                 }
582         
583                 FPRINTF(STDOUTFILE "\nQuit [q], confirm [y], or change [menu] settings: ");
584                 
585                 /* read one char */
586                 ch = getchar();
587                 if (ch != '\n') {
588                         do ;
589                         while (getchar() != '\n');
590                 }
591                 ch = (char) tolower((int) ch);
592                 
593                 /* letters in use: d m */
594                 /* letters not in use:  */
595
596                 switch (ch) {
597
598                         case '\n':      break;
599                         
600                         
601
602                         case 'd':       if (auto_datatype == AUTO_GUESS) {
603                                                 auto_datatype = AUTO_OFF;
604                                                 guessdata_optn = data_optn;
605                                                 data_optn = 0;
606                                         } else {
607                                                 data_optn = data_optn + 1;
608                                                 if (data_optn == 3) {
609                                                         auto_datatype = AUTO_GUESS;
610                                                         data_optn = guessdata_optn;
611                                                 }
612                                         }
613                                         /* translate characters into format used by ML engine */
614                                         translatedataset();
615                                         estimatebasefreqs();
616                                         break;
617                                                 
618                         
619         
620                         case 'm':       if (data_optn == NUCLEOTIDE) { /* nucleotide data */
621                                                         if(HKY_optn && nuc_optn) {
622                                                                 /* HKY -> TN */
623                                                                 tstvf84       = 0.0;
624                                                                 TSparam       = 2.0;
625                                                                 YRparam       = 0.9;
626                                                                 HKY_optn      = FALSE;
627                                                                 TN_optn       = TRUE;
628                                                                 optim_optn    = TRUE;
629                                                                 nuc_optn      = TRUE;
630                                                                 SH_optn       = FALSE;
631                                                                 break;
632                                                         }
633                                                         if(TN_optn && nuc_optn) {
634                                                                 if (Maxseqc % 2 == 0 || Maxseqc % 3 == 0) {
635                                                                         /* number of chars needs to be a multiple 2 or 3 */
636                                                                         /* TN -> SH */          
637                                                                         if (Maxseqc % 2 != 0 && Maxseqc % 3 == 0)
638                                                                                 SHcodon = TRUE;
639                                                                         else
640                                                                                 SHcodon = FALSE;                                                                
641                                                                         tstvf84       = 0.0;
642                                                                         TSparam       = 2.0;
643                                                                         YRparam       = 1.0;
644                                                                         HKY_optn      = TRUE;
645                                                                         TN_optn       = FALSE;
646                                                                         optim_optn    = TRUE;
647                                                                         nuc_optn      = FALSE;
648                                                                         SH_optn       = TRUE;
649                                                                         /* translate characters into format */
650                                                                         /* used by ML engine */
651                                                                         translatedataset();
652                                                                         estimatebasefreqs();
653                                                                 } else {
654                                                                         FPRINTF(STDOUTFILE "\n\n\nSH model not ");
655                                                                         FPRINTF(STDOUTFILE "available for the data set!\n");
656                                                                         /* TN -> HKY */
657                                                                         tstvf84       = 0.0;
658                                                                         TSparam       = 2.0;
659                                                                         YRparam       = 1.0;
660                                                                         HKY_optn      = TRUE;
661                                                                         TN_optn       = FALSE;
662                                                                         optim_optn    = TRUE;
663                                                                         nuc_optn      = TRUE;
664                                                                         SH_optn       = FALSE;
665                                                                 }
666                                                                 break;
667                                                         }
668                                                         if(SH_optn) {
669                                                                 /* SH -> HKY */
670                                                                 tstvf84       = 0.0;
671                                                                 TSparam       = 2.0;
672                                                                 YRparam       = 1.0;
673                                                                 HKY_optn      = TRUE;
674                                                                 TN_optn       = FALSE;
675                                                                 optim_optn    = TRUE;
676                                                                 nuc_optn      = TRUE;
677                                                                 SH_optn       = FALSE;
678                                                                 /* translate characters into format */
679                                                                 /* used by ML engine */
680                                                                 translatedataset();
681                                                                 estimatebasefreqs();
682                                                                 break;
683                                                         }
684                                                         break;
685                                                 }
686                                                 if (data_optn == AMINOACID) { /* amino acid data */
687                                                         if (auto_aamodel) {
688                                                                 /* AUTO -> Dayhoff */
689                                                                 Dayhf_optn    = TRUE;
690                                                                 Jtt_optn      = FALSE;
691                                                                 mtrev_optn    = FALSE;
692                                                                 cprev_optn    = FALSE;
693                                                                 blosum62_optn = FALSE;
694                                                                 vtmv_optn     = FALSE;
695                                                                 wag_optn      = FALSE;
696                                                                 auto_aamodel  = AUTO_OFF;
697                                                                 break;
698                                                         }
699                                                         if (Dayhf_optn) {
700                                                                 /* Dayhoff -> JTT */
701                                                                 Dayhf_optn    = FALSE;
702                                                                 Jtt_optn      = TRUE;
703                                                                 mtrev_optn    = FALSE;
704                                                                 cprev_optn    = FALSE;
705                                                                 blosum62_optn = FALSE;
706                                                                 vtmv_optn     = FALSE;
707                                                                 wag_optn      = FALSE;
708                                                                 auto_aamodel  = AUTO_OFF;
709                                                                 break;
710                                                         }
711                                                         if (Jtt_optn) {
712                                                                 /* JTT -> mtREV */
713                                                                 Dayhf_optn    = FALSE;
714                                                                 Jtt_optn      = FALSE;
715                                                                 mtrev_optn    = TRUE;
716                                                                 cprev_optn    = FALSE;
717                                                                 blosum62_optn = FALSE;
718                                                                 vtmv_optn     = FALSE;
719                                                                 wag_optn      = FALSE;
720                                                                 auto_aamodel  = AUTO_OFF;
721                                                                 break;
722                                                         }
723 #ifdef CPREV
724                                                         if (mtrev_optn) {
725                                                                 /* mtREV -> cpREV */
726                                                                 Dayhf_optn    = FALSE;
727                                                                 Jtt_optn      = FALSE;
728                                                                 mtrev_optn    = FALSE;
729                                                                 cprev_optn    = TRUE;
730                                                                 blosum62_optn = FALSE;
731                                                                 vtmv_optn     = FALSE;
732                                                                 wag_optn      = FALSE;
733                                                                 auto_aamodel  = AUTO_OFF;
734                                                                 break;
735                                                         }
736 #else /* ! CPREV */
737                                                         if (mtrev_optn) {
738                                                                 /* mtREV -> BLOSUM 62 */
739                                                                 Dayhf_optn    = FALSE;
740                                                                 Jtt_optn      = FALSE;
741                                                                 mtrev_optn    = FALSE;
742                                                                 cprev_optn    = FALSE;
743                                                                 blosum62_optn = TRUE;
744                                                                 vtmv_optn     = FALSE;
745                                                                 wag_optn      = FALSE;
746                                                                 auto_aamodel  = AUTO_OFF;
747                                                                 break;
748                                                         }
749 #endif /* ! CPREV */
750
751 #ifdef CPREV
752                                                         if (cprev_optn) {
753                                                                 /* cpREV -> BLOSUM 62 */
754                                                                 Dayhf_optn    = FALSE;
755                                                                 Jtt_optn      = FALSE;
756                                                                 mtrev_optn    = FALSE;
757                                                                 cprev_optn    = FALSE;
758                                                                 blosum62_optn = TRUE;
759                                                                 vtmv_optn     = FALSE;
760                                                                 wag_optn      = FALSE;
761                                                                 auto_aamodel  = AUTO_OFF;
762                                                                 break;
763                                                         }
764 #endif
765                                                         if (blosum62_optn) {
766                                                                 /* BLOSUM 62 -> VT model */
767                                                                 Dayhf_optn    = FALSE;
768                                                                 Jtt_optn      = FALSE;
769                                                                 mtrev_optn    = FALSE;
770                                                                 cprev_optn    = FALSE;
771                                                                 blosum62_optn = FALSE;
772                                                                 vtmv_optn     = TRUE;
773                                                                 wag_optn      = FALSE;
774                                                                 auto_aamodel  = AUTO_OFF;
775                                                                 break;
776                                                         }
777                                                         if (vtmv_optn) {
778                                                                 /* VT model -> WAG model */
779                                                                 Dayhf_optn    = FALSE;
780                                                                 Jtt_optn      = FALSE;
781                                                                 mtrev_optn    = FALSE;
782                                                                 cprev_optn    = FALSE;
783                                                                 blosum62_optn = FALSE;
784                                                                 vtmv_optn     = FALSE;
785                                                                 wag_optn      = TRUE;
786                                                                 auto_aamodel  = AUTO_OFF;
787                                                                 break;
788                                                         }
789                                                         if (wag_optn) {
790                                                                 /* WAG model -> AUTO */
791                                                                 Dayhf_optn    = guessDayhf_optn;
792                                                                 Jtt_optn      = guessJtt_optn;
793                                                                 mtrev_optn    = guessmtrev_optn;
794                                                                 cprev_optn    = guesscprev_optn;
795                                                                 blosum62_optn = guessblosum62_optn;
796                                                                 vtmv_optn     = guessvtmv_optn;
797                                                                 wag_optn      = guesswag_optn;
798                                                                 auto_aamodel  = guessauto_aamodel;
799                                                                 break;
800                                                         }
801                                                         break;
802                                                 }
803                                                 if (data_optn == BINARY) {
804                                                         FPRINTF(STDOUTFILE "\n\n\nNo other model available!\n");
805                                                 }
806                                                 break;
807                                                 
808                         
809
810                         case 'y':       break;
811
812                         default:        FPRINTF(STDOUTFILE "\n\n\nThis is not a possible option!\n");
813                                                 break;
814                 }
815         } while (ch != 'y');
816
817         FPRINTF(STDOUTFILE "\n\n\n");
818 }
819
820 /* open file for reading */
821 void openfiletoread(FILE **fp, char name[], char descr[])
822 {
823         int count = 0;
824         cvector str;
825
826         if ((*fp = fopen(name, "r")) == NULL) {
827                 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
828                 str = mygets();
829                 while ((*fp = fopen(str, "r")) == NULL)
830                 {
831                         count++;
832                         if (count > 10)
833                         {
834                                 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
835                                 exit(1);
836                         }
837                         FPRINTF(STDOUTFILE "File '%s' not found, ", str);
838                         FPRINTF(STDOUTFILE "please enter alternative name: ");
839                         free_cvector(str);
840                         str = mygets();
841                 }
842                 free_cvector(str);
843                 FPRINTF(STDOUTFILE "\n");
844         }
845 } /* openfiletoread */
846
847
848 /* open file for writing */
849 void openfiletowrite(FILE **fp, char name[], char descr[])
850 {
851         int count = 0;
852         cvector str;
853
854         if ((*fp = fopen(name, "w")) == NULL) {
855                 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
856                 str = mygets();
857                 while ((*fp = fopen(str, "w")) == NULL)
858                 {
859                         count++;
860                         if (count > 10)
861                         {
862                                 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
863                                 exit(1);
864                         }
865                         FPRINTF(STDOUTFILE "File '%s' not created, ", str);
866                         FPRINTF(STDOUTFILE "please enter other name: ");
867                         free_cvector(str);
868                         str = mygets();
869                 }
870                 free_cvector(str);
871                 FPRINTF(STDOUTFILE "\n");
872         }
873 } /* openfiletowrite */
874
875
876 /* open file for appending */
877 void openfiletoappend(FILE **fp, char name[], char descr[])
878 {
879         int count = 0;
880         cvector str;
881
882         if ((*fp = fopen(name, "a")) == NULL) {
883                 FPRINTF(STDOUTFILE "\n\n\nPlease enter a file name for the %s: ", descr);
884                 str = mygets();
885                 while ((*fp = fopen(str, "a")) == NULL)
886                 {
887                         count++;
888                         if (count > 10)
889                         {
890                                 FPRINTF(STDOUTFILE "\n\n\nToo many trials - quitting ...\n");
891                                 exit(1);
892                         }
893                         FPRINTF(STDOUTFILE "File '%s' not created, ", str);
894                         FPRINTF(STDOUTFILE "please enter other name: ");
895                         free_cvector(str);
896                         str = mygets();
897                 }
898                 free_cvector(str);
899                 FPRINTF(STDOUTFILE "\n");
900         }
901 } /* openfiletowrite */
902
903
904 /* close file */
905 void closefile(FILE *fp)
906 {       
907         fclose(fp);
908 } /* closefile */
909
910 /* symmetrize doublet frequencies */
911 void symdoublets()
912 {
913         int i, imean;
914         double mean;
915         
916         if (data_optn == NUCLEOTIDE && SH_optn && sym_optn) {
917                 /* ML frequencies */
918                 mean = (Freqtpm[1] + Freqtpm[4])/2.0; /* AC CA */
919                 Freqtpm[1] = mean;
920                 Freqtpm[4] = mean;
921                 mean = (Freqtpm[2] + Freqtpm[8])/2.0; /* AG GA */
922                 Freqtpm[2] = mean;
923                 Freqtpm[8] = mean;
924                 mean = (Freqtpm[3] + Freqtpm[12])/2.0; /* AT TA */
925                 Freqtpm[3] = mean;
926                 Freqtpm[12] = mean;
927                 mean = (Freqtpm[6] + Freqtpm[9])/2.0; /* CG GC */
928                 Freqtpm[6] = mean;
929                 Freqtpm[9] = mean;
930                 mean = (Freqtpm[7] + Freqtpm[13])/2.0; /* CT TC */
931                 Freqtpm[7] = mean;
932                 Freqtpm[13] = mean;
933                 mean = (Freqtpm[11] + Freqtpm[14])/2.0; /* GT TG */
934                 Freqtpm[11] = mean;
935                 Freqtpm[14] = mean;
936                 
937                 /* base composition of each taxon */
938                 for (i = 0; i < Maxspc; i++) {
939                         imean = (Basecomp[i][1] + Basecomp[i][4])/2; /* AC CA */
940                         Basecomp[i][1] = imean;
941                         Basecomp[i][4] = imean;
942                         imean = (Basecomp[i][2] + Basecomp[i][8])/2; /* AG GA */
943                         Basecomp[i][2] = imean;
944                         Basecomp[i][8] = imean;
945                         imean = (Basecomp[i][3] + Basecomp[i][12])/2; /* AT TA */
946                         Basecomp[i][3] = imean;
947                         Basecomp[i][12] = imean;
948                         imean = (Basecomp[i][6] + Basecomp[i][9])/2; /* CG GC */
949                         Basecomp[i][6] = imean;
950                         Basecomp[i][9] = imean;
951                         imean = (Basecomp[i][7] + Basecomp[i][13])/2; /* CT TC */
952                         Basecomp[i][7] = imean;
953                         Basecomp[i][13] = imean;
954                         imean = (Basecomp[i][11] + Basecomp[i][14])/2; /* GT TG */
955                         Basecomp[i][11] = imean;
956                         Basecomp[i][14] = imean;
957                 }
958         }
959 }
960
961 /* show Ts/Tv ratio and Ts Y/R ratio */
962 void computeexpectations()
963 {
964         /* CZ */
965 }
966
967 /* write ML distance matrix to file. Modified CZ 05/29/01 */
968 void putdistance(FILE *fp)
969 {
970         /*int i;*/
971     int i, j;
972         
973     for (i = 0; i < Maxspc - 1; i++) {
974                 /*fprintf(fp, "%.5f  ", Distanmat[i]/100.0);*/
975         for ( j = 0; j < 26; j++ ) {
976             fputc( Identif[i][j], fp ); /*CZ*/
977         }
978         fprintf(fp, "%.5f\n", Distanmat[i]/100.0);
979         }
980         fprintf(fp, "\n");
981         
982 }
983
984
985
986
987 /* first lines of EPSF likelihood mapping file */
988 void initps(FILE *ofp)
989 {
990         /* CZ */
991 }
992
993 /* plot one point of likelihood mapping analysis */
994 void plotlmpoint(FILE *ofp, double w1, double w2)
995 {
996         /* CZ */
997 }
998
999 /* last lines of EPSF likelihood mapping file */
1000 void finishps(FILE *ofp)
1001 {
1002         /* CZ */
1003 }
1004
1005 /* computes LM point from the three log-likelihood values,
1006    plots the point, and does some statistics */
1007 void makelmpoint(FILE *fp, double b1, double b2, double b3)
1008 {
1009         double w1, w2, w3, temp;
1010         unsigned char qpbranching;
1011         double temp1, temp2, temp3, onethird;
1012         unsigned char discreteweight[3], treebits[3];
1013         
1014         onethird = 1.0/3.0;
1015         treebits[0] = (unsigned char) 1;
1016         treebits[1] = (unsigned char) 2;
1017         treebits[2] = (unsigned char) 4;
1018
1019         /* sort in descending order */
1020         qweight[0] = b1;
1021         qweight[1] = b2;
1022         qweight[2] = b3;
1023         sort3doubles(qweight, qworder);
1024
1025         /* compute Bayesian weights */
1026         qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
1027         qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
1028         qweight[qworder[0]] = 1.0;
1029         temp = qweight[0] + qweight[1] + qweight[2];
1030         qweight[0] = qweight[0]/temp;
1031         qweight[1] = qweight[1]/temp;
1032         qweight[2] = qweight[2]/temp;
1033
1034         /* plot one point in likelihood mapping triangle */
1035         w1 = qweight[0];
1036         w2 = qweight[1];
1037         w3 = qweight[2];
1038         plotlmpoint(fp, w1, w2);
1039         
1040         /* check areas 1,2,3 */ 
1041         if (treebits[qworder[0]] == 1) ar1++;
1042         else if (treebits[qworder[0]] == 2) ar2++;
1043         else ar3++;                             
1044
1045         /* check out regions 1,2,3,4,5,6,7 */
1046
1047         /* 100 distribution */
1048         temp1 = 1.0 - qweight[qworder[0]];
1049         sqdiff[0] = temp1*temp1 +
1050                 qweight[qworder[1]]*qweight[qworder[1]] +
1051                 qweight[qworder[2]]*qweight[qworder[2]];
1052         discreteweight[0] = treebits[qworder[0]];
1053
1054         /* 110 distribution */
1055         temp1 = 0.5 - qweight[qworder[0]];
1056         temp2 = 0.5 - qweight[qworder[1]];
1057         sqdiff[1] = temp1*temp1 + temp2*temp2 +
1058                 qweight[qworder[2]]*qweight[qworder[2]];
1059         discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
1060
1061         /* 111 distribution */
1062         temp1 = onethird - qweight[qworder[0]];
1063         temp2 = onethird - qweight[qworder[1]];
1064         temp3 = onethird - qweight[qworder[2]];
1065         sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
1066         discreteweight[2] = (unsigned char) 7;
1067
1068         /* sort in descending order */
1069         sort3doubles(sqdiff, sqorder);
1070                         
1071         qpbranching = (unsigned char) discreteweight[sqorder[2]];
1072                                                         
1073         if (qpbranching == 1) {
1074                 reg1++;
1075                 if (w2 < w3) reg1l++;
1076                 else reg1r++;
1077         }
1078         if (qpbranching == 2) {
1079                 reg2++;
1080                 if (w1 < w3) reg2d++;
1081                 else reg2u++;
1082         }
1083         if (qpbranching == 4) {
1084                 reg3++;
1085                 if (w1 < w2) reg3d++;
1086                 else reg3u++;
1087         }
1088         if (qpbranching == 3) {
1089                 reg4++;
1090                 if (w1 < w2) reg4d++;
1091                 else reg4u++;
1092         }
1093         if (qpbranching == 6) {
1094                 reg5++;
1095                 if (w2 < w3) reg5l++;
1096                 else reg5r++;
1097         }
1098         if (qpbranching == 5) {
1099                 reg6++;
1100                 if (w1 < w3) reg6d++;
1101                 else reg6u++;
1102         }
1103         if (qpbranching == 7) reg7++;
1104 }
1105
1106 /* print tree statistics */
1107 void printtreestats(FILE *ofp)
1108 {
1109         int i, j, besttree;
1110         double bestlkl, difflkl, difflklps, temp, sum;
1111         
1112         /* find best tree */
1113         besttree = 0;
1114         bestlkl = ulkl[0];
1115         for (i = 1; i < numutrees; i++)
1116                 if (ulkl[i] > bestlkl) {
1117                         besttree = i;
1118                         bestlkl = ulkl[i];
1119                 }
1120         
1121         fprintf(ofp, "\n\nCOMPARISON OF USER TREES (NO CLOCK)\n\n");
1122         fprintf(ofp, "Tree   log L   difference     S.E.   Significantly worse\n");
1123         fprintf(ofp, "--------------------------------------------------------\n");
1124         for (i = 0; i < numutrees; i++) {
1125                 difflkl = ulkl[besttree]-ulkl[i];
1126                 fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulkl[i], difflkl);
1127                 if (i == besttree) {
1128                         fprintf(ofp, " <----------------- best tree");
1129                 } else {
1130                         /* compute variance of Log L differences over sites */
1131                         difflklps = difflkl/(double)Maxsite;
1132                         sum = 0.0;
1133                         for (j = 0; j < Numptrn; j++) {
1134                                 temp = allsites[besttree][j] - allsites[i][j] - difflklps;
1135                                 sum += temp*temp*Weight[j];
1136                         }
1137                         sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1138                         fprintf(ofp, "%11.2f         ", sum);
1139                         if (difflkl > 1.96*sum)
1140                                 fprintf(ofp, "yes");
1141                         else
1142                                 fprintf(ofp, "no");
1143                 }
1144                 fprintf(ofp, "\n");
1145         }
1146         fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1147         
1148         if (compclock) {
1149         
1150                 /* find best tree */
1151                 besttree = 0;
1152                 bestlkl = ulklc[0];
1153                 for (i = 1; i < numutrees; i++)
1154                         if (ulklc[i] > bestlkl) {
1155                                 besttree = i;
1156                                 bestlkl = ulklc[i];
1157                         }
1158         
1159                 fprintf(ofp, "\n\nCOMPARISON OF USER TREES (WITH CLOCK)\n\n");
1160                 fprintf(ofp, "Tree   log L   difference     S.E.   Significantly worse\n");
1161                 fprintf(ofp, "--------------------------------------------------------\n");
1162                 for (i = 0; i < numutrees; i++) {
1163                         difflkl = ulklc[besttree]-ulklc[i];
1164                         fprintf(ofp, "%2d %10.2f %8.2f ", i+1, ulklc[i], difflkl);
1165                         if (i == besttree) {
1166                                 fprintf(ofp, " <----------------- best tree");
1167                         } else {
1168                                 /* compute variance of Log L differences over sites */
1169                                 difflklps = difflkl/(double)Maxsite;
1170                                 sum = 0.0;
1171                                 for (j = 0; j < Numptrn; j++) {
1172                                         temp = allsitesc[besttree][j] - allsitesc[i][j] - difflklps;
1173                                         sum += temp*temp*Weight[j];
1174                                 }
1175                                 sum = sqrt(fabs(sum/(Maxsite-1.0)*Maxsite));
1176                                 fprintf(ofp, "%11.2f         ", sum);
1177                                 if (difflkl > 1.96*sum)
1178                                         fprintf(ofp, "yes");
1179                                 else
1180                                         fprintf(ofp, "no");
1181                         }
1182                         fprintf(ofp, "\n");
1183                 }
1184                 fprintf(ofp, "\nThis test (5%% significance) follows Kishino and Hasegawa (1989).\n");
1185         }
1186 }
1187
1188 /* time stamp */
1189 void timestamp(FILE* ofp)
1190 {
1191         double timespan;
1192         double cpuspan;
1193         timespan = difftime(Stoptime, Starttime);
1194         cpuspan  = ((double) (Stopcpu - Startcpu) / CLOCKS_PER_SEC);
1195         fprintf(ofp, "\n\nTIME STAMP\n\n");
1196         fprintf(ofp, "Date and time: %s", asctime(localtime(&Starttime)) );
1197         fprintf(ofp, "Runtime (excl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
1198                 timespan, timespan/60., timespan/3600.);
1199         fprintf(ofp, "Runtime (incl. input) : %.0f seconds (= %.1f minutes = %.1f hours)\n",
1200                 fulltime, fulltime/60., fulltime/3600.);
1201 #ifdef TIMEDEBUG
1202         fprintf(ofp, "CPU time (incl. input): %.0f seconds (= %.1f minutes = %.1f hours)\n\n",
1203                 fullcpu, fullcpu/60., fullcpu/3600.);
1204 #endif /* TIMEDEBUG */
1205
1206 }
1207
1208 /* extern int bestrfound; */
1209
1210 /* write output file */
1211 void writeoutputfile(FILE *ofp, int part)
1212 {
1213         /* CZ */
1214 }
1215
1216
1217 /******************************************************************************/
1218 /* timer routines                                                             */
1219 /******************************************************************************/
1220
1221 /* start timer */
1222 void starttimer()
1223 {
1224         time(&time0);
1225         time1 = time0;
1226 }
1227
1228 /* check remaining time and print message if necessary */
1229 void checktimer(uli numqts)
1230 {
1231         double tc2, mintogo, minutes, hours;
1232
1233         time(&time2);
1234         if ( (time2 - time1) > 900) { /* generate message every 15 minutes */
1235                 /* every 900 seconds */
1236                 /* percentage of completed quartets */
1237                 if (mflag == 0) {
1238                         mflag = 1;
1239                         FPRINTF(STDOUTFILE "\n");
1240                 }
1241                 tc2 = 100.*numqts/Numquartets;
1242                 mintogo = (100.0-tc2) *
1243                         (double) (time2-time0)/60.0/tc2;
1244                 hours = floor(mintogo/60.0);
1245                 minutes = mintogo - 60.0*hours;
1246                 FPRINTF(STDOUTFILE "%.2f%%", tc2);
1247                 FPRINTF(STDOUTFILE " completed (remaining");
1248                 FPRINTF(STDOUTFILE " time: %.0f", hours);
1249                 FPRINTF(STDOUTFILE " hours %.0f", minutes);
1250                 FPRINTF(STDOUTFILE " minutes)\n");
1251                 fflush(STDOUT);
1252                 time1 = time2;
1253         }
1254
1255 }
1256
1257 /* check remaining time and print message if necessary */
1258 void checktimer2(uli numqts, uli all, int flag)
1259 {
1260         double tc2, mintogo, minutes, hours;
1261
1262         static time_t tt1;
1263         static time_t tt2;
1264
1265         if (flag == 1) {
1266                 time(&tt1);
1267                 time(&tt2);
1268         } else {
1269                 time(&tt2);
1270                 if ( (tt2 - tt1) > 900) { /* generate message every 15 minutes */
1271                         /* every 900 seconds */
1272                         /* percentage of completed quartets */
1273                         if (mflag == 0) {
1274                                 mflag = 1;
1275                                 FPRINTF(STDOUTFILE "\n");
1276                         }
1277                         tc2 = 100.*numqts/Numquartets;
1278                         mintogo = (100.0-tc2) *
1279                                 (double) (tt2-time0)/60.0/tc2;
1280                         hours = floor(mintogo/60.0);
1281                         minutes = mintogo - 60.0*hours;
1282                         FPRINTF(STDOUTFILE "%.2f%%", tc2);
1283                         FPRINTF(STDOUTFILE " completed (remaining");
1284                         FPRINTF(STDOUTFILE " time: %.0f", hours);
1285                         FPRINTF(STDOUTFILE " hours %.0f", minutes);
1286                         FPRINTF(STDOUTFILE " minutes)\n");
1287                         fflush(STDOUT);
1288                         tt1 = tt2;
1289                 }
1290         }
1291 }
1292
1293 void resetqblocktime(timearray_t *ta)
1294 {
1295         ta->quartcpu += ta->quartblockcpu;
1296         ta->quartblockcpu = 0.0;
1297         ta->quarttime += ta->quartblocktime;
1298         ta->quartblocktime = 0.0;
1299 } /* resetqblocktime */
1300
1301
1302 void resetpblocktime(timearray_t *ta)
1303 {
1304         ta->puzzcpu += ta->puzzblockcpu;
1305         ta->puzzblockcpu = 0.0;
1306         ta->puzztime += ta->puzzblocktime;
1307         ta->puzzblocktime = 0.0;
1308 } /* resetpblocktime */
1309
1310
1311 #ifdef TIMEDEBUG
1312 void printtimearr(timearray_t *ta)
1313 {
1314 #       if ! PARALLEL
1315           int PP_Myid;
1316           PP_Myid = -1;
1317 #       endif
1318         printf("(%2d) MMCPU: %11ld  /  %11ld \n", PP_Myid, ta->maxcpu, ta->mincpu);
1319         printf("(%2d) CTick: %11.6f [tks]  /  %11.6f [s] \n", PP_Myid, ta->mincputick, ta->mincputicktime);
1320
1321         printf("(%2d) MMTIM: %11ld  /  %11ld \n", PP_Myid, ta->maxtime, ta->mintime);
1322
1323         printf("(%2d) Mxblk: %11.6e  /  %11.6e \n", PP_Myid, ta->maxcpublock, ta->maxtimeblock);
1324         printf("(%2d) Mnblk: %11.6e  /  %11.6e \n", PP_Myid, ta->mincpublock, ta->mintimeblock);
1325
1326         printf("(%2d) Gnrl: %11.6e  /  %11.6e \n", PP_Myid, ta->generalcpu, ta->generaltime);
1327         printf("(%2d) Optn: %11.6e  /  %11.6e \n", PP_Myid, ta->optionscpu, ta->optionstime);
1328         printf("(%2d) Estm: %11.6e  /  %11.6e \n", PP_Myid, ta->paramestcpu, ta->paramesttime);
1329         printf("(%2d) Qurt: %11.6e  /  %11.6e \n", PP_Myid, ta->quartcpu, ta->quarttime);
1330         printf("(%2d) QBlk: %11.6e  /  %11.6e \n", PP_Myid, ta->quartblockcpu, ta->quartblocktime);
1331         printf("(%2d) QMax: %11.6e  /  %11.6e \n", PP_Myid, ta->quartmaxcpu, ta->quartmaxtime);
1332         printf("(%2d) QMin: %11.6e  /  %11.6e \n", PP_Myid, ta->quartmincpu, ta->quartmintime);
1333
1334         printf("(%2d) Puzz: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzcpu, ta->puzztime);
1335         printf("(%2d) PBlk: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzblockcpu, ta->puzzblocktime);
1336         printf("(%2d) PMax: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzmaxcpu, ta->puzzmaxtime);
1337         printf("(%2d) PMin: %11.6e  /  %11.6e \n", PP_Myid, ta->puzzmincpu, ta->puzzmintime);
1338
1339         printf("(%2d) Tree: %11.6e  /  %11.6e \n", PP_Myid, ta->treecpu, ta->treetime);
1340         printf("(%2d) TBlk: %11.6e  /  %11.6e \n", PP_Myid, ta->treeblockcpu, ta->treeblocktime);
1341         printf("(%2d) TMax: %11.6e  /  %11.6e \n", PP_Myid, ta->treemaxcpu, ta->treemaxtime);
1342         printf("(%2d) TMin: %11.6e  /  %11.6e \n", PP_Myid, ta->treemincpu, ta->treemintime);
1343
1344         printf("(%2d) C/T : %11.6e / %11.6e \n", PP_Myid, 
1345                 (ta->generalcpu + ta->optionscpu + ta->paramestcpu + ta->quartblockcpu + ta->puzzblockcpu + ta->treeblockcpu),
1346                 (ta->generaltime + ta->optionstime + ta->paramesttime + ta->quartblocktime + ta->puzzblocktime + ta->treeblocktime));
1347         printf("(%2d)  CPU: %11.6e /  Time: %11.6e \n", PP_Myid, ta->cpu, ta->time);
1348         printf("(%2d) aCPU: %11.6e / aTime: %11.6e \n", PP_Myid, ta->fullcpu, ta->fulltime);
1349
1350 } /* printtimearr */
1351 #endif /* TIMEDEBUG */
1352
1353 char *jtype [7];
1354
1355 void inittimearr(timearray_t *ta)
1356 {
1357         clock_t c0, c1, c2; 
1358
1359         jtype[OVERALL]  = "OVERALL";
1360         jtype[GENERAL]  = "GENERAL";
1361         jtype[OPTIONS]  = "OPTIONS";
1362         jtype[PARAMEST] = "PARAMeter ESTimation";
1363         jtype[QUARTETS] = "QUARTETS";
1364         jtype[PUZZLING] = "PUZZLING steps";
1365         jtype[TREEEVAL] = "TREE EVALuation";
1366         ta->currentjob = GENERAL;
1367
1368         c1 = clock();
1369         c2 = clock();
1370         while (c1 == c2)
1371            c2 = clock(); 
1372         ta->mincputick = (double)(c2 - c1);
1373         ta->mincputicktime = ((double)(c2 - c1))/CLOCKS_PER_SEC;
1374
1375         ta->tempcpu = clock();
1376         ta->tempcpustart = ta->tempcpu;
1377         ta->tempfullcpu  = ta->tempcpu;
1378         time(&(ta->temptime));
1379         ta->temptimestart = ta->temptime;
1380         ta->tempfulltime  = ta->temptime;
1381
1382         c0=0; c1=0; c2=(clock_t)((2 * c1) + 1);;
1383         while (c1 < c2) {
1384            c0 = c1;
1385            c1 = c2;
1386            c2 = (clock_t)((2 * c1) + 1);
1387         }
1388         if (c1 == c2) ta->maxcpu=c0;
1389         if (c1  > c2) ta->maxcpu=c1;
1390
1391         c0=0; c1=0; c2=(clock_t)((2 * c1) - 1);
1392         while (c1 > c2) {
1393            c0 = c1;
1394            c1 = c2;
1395            c2 = (clock_t)((2 * c1) - 1);
1396         }
1397         if (c1 == c2) ta->mincpu=c0;
1398         if (c1  < c2) ta->mincpu=c1;
1399
1400
1401
1402         ta->maxtime        = 0;
1403         ta->mintime        = 0;
1404
1405         ta->maxcpublock    = 0;
1406         ta->mincpublock    = DBL_MAX;
1407         ta->maxtimeblock   = 0;
1408         ta->mintimeblock   = DBL_MAX;
1409
1410         ta->cpu            = 0.0;
1411         ta->time           = 0.0;
1412
1413         ta->fullcpu        = 0.0;
1414         ta->fulltime       = 0.0;
1415
1416         ta->generalcpu     = 0.0;
1417         ta->optionscpu     = 0.0;
1418         ta->paramestcpu    = 0.0;
1419         ta->quartcpu       = 0.0;
1420         ta->quartblockcpu  = 0.0;
1421         ta->quartmaxcpu    = 0.0;
1422         ta->quartmincpu    = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
1423         ta->puzzcpu        = 0.0;
1424         ta->puzzblockcpu   = 0.0;
1425         ta->puzzmaxcpu     = 0.0;
1426         ta->puzzmincpu     = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
1427         ta->treecpu        = 0.0;
1428         ta->treeblockcpu   = 0.0;
1429         ta->treemaxcpu     = 0.0;
1430         ta->treemincpu     = ((double) ta->maxcpu)/CLOCKS_PER_SEC;
1431
1432         ta->generaltime    = 0.0;
1433         ta->optionstime    = 0.0;
1434         ta->paramesttime   = 0.0;
1435         ta->quarttime      = 0.0;
1436         ta->quartblocktime = 0.0;
1437         ta->quartmaxtime   = 0.0;
1438         ta->quartmintime   = DBL_MAX;
1439         ta->puzztime       = 0.0;
1440         ta->puzzblocktime  = 0.0;
1441         ta->puzzmaxtime    = 0.0;
1442         ta->puzzmintime    = DBL_MAX;
1443         ta->treetime       = 0.0;
1444         ta->treeblocktime  = 0.0;
1445         ta->treemaxtime    = 0.0;
1446         ta->treemintime    = DBL_MAX;
1447 } /* inittimearr */
1448
1449
1450 /***************/
1451
1452 void addup(int jobtype, clock_t c1, clock_t c2, time_t t1, time_t t2, timearray_t *ta)
1453 {
1454    double  c,
1455            t;
1456
1457    if (t2 != t1) t = difftime(t2, t1); 
1458    else          t = 0.0;
1459
1460    if (c2 < c1)
1461       c = ((double)(c2 - ta->mincpu))/CLOCKS_PER_SEC +
1462           ((double)(ta->maxcpu - c1))/CLOCKS_PER_SEC;
1463    else
1464       c = ((double)(c2 - c1))/CLOCKS_PER_SEC;
1465
1466    if (jobtype != OVERALL) {
1467
1468       if (ta->mincpublock  > c) ta->mincpublock = c;
1469       if (ta->maxcpublock  < c) ta->maxcpublock = c;
1470       if (ta->mintimeblock > t) ta->mintimeblock = t;
1471       if (ta->maxtimeblock < t) ta->maxtimeblock = t;
1472
1473       switch (jobtype) {
1474          case GENERAL: ta->generalcpu  += c;
1475                        ta->generaltime += t;
1476                        break;
1477          case OPTIONS: ta->optionscpu  += c;
1478                        ta->optionstime += t;
1479                        break;
1480          case PARAMEST: ta->paramestcpu  += c;
1481                         ta->paramesttime += t;
1482                         break;
1483          case QUARTETS: ta->quartblockcpu  += c;
1484                         ta->quartblocktime += t;
1485                         if (ta->quartmincpu  > c) ta->quartmincpu = c;
1486                         if (ta->quartmaxcpu  < c) ta->quartmaxcpu = c;
1487                         if (ta->quartmintime > t) ta->quartmintime = t;
1488                         if (ta->quartmaxtime < t) ta->quartmaxtime = t;
1489                         break;
1490          case PUZZLING: ta->puzzblockcpu  += c;
1491                         ta->puzzblocktime += t;
1492                         if (ta->puzzmincpu  > c) ta->puzzmincpu = c;
1493                         if (ta->puzzmaxcpu  < c) ta->puzzmaxcpu = c;
1494                         if (ta->puzzmintime > t) ta->puzzmintime = t;
1495                         if (ta->puzzmaxtime < t) ta->puzzmaxtime = t;
1496                         break;
1497          case TREEEVAL: ta->treeblockcpu  += c;
1498                         ta->treeblocktime += t;
1499                         if (ta->treemincpu  > c) ta->treemincpu = c;
1500                         if (ta->treemaxcpu  < c) ta->treemaxcpu = c;
1501                         if (ta->treemintime > t) ta->treemintime = t;
1502                         if (ta->treemaxtime < t) ta->treemaxtime = t;
1503                         break;
1504       }
1505       ta->cpu  += c;
1506       ta->time += t;
1507    
1508    } else {
1509          ta->fullcpu  += c;
1510          ta->fulltime += t;
1511    }
1512
1513 #     ifdef TIMEDEBUG
1514          {
1515 #        if ! PARALLEL
1516             int PP_Myid =  -1;
1517 #        endif /* !PARALLEL */
1518          printf("(%2d) CPU: +%10.6f / Time: +%10.6f (%s)\n", PP_Myid, c, t, jtype[jobtype]);
1519          printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->cpu, ta->time, jtype[jobtype]);
1520          printf("(%2d) CPU: %11.6f / Time: %11.6f (%s)\n", PP_Myid, ta->fullcpu, ta->fulltime, jtype[jobtype]);
1521          }
1522 #     endif /* TIMEDEBUG */
1523 } /* addup */
1524
1525
1526 /***************/
1527
1528
1529 void addtimes(int jobtype, timearray_t *ta)
1530 {
1531    clock_t tempc;
1532    time_t  tempt;
1533
1534    time(&tempt);
1535    tempc = clock();
1536
1537    if ((tempc < ta->tempfullcpu) || (jobtype == OVERALL)) {   /* CPU counter overflow for overall time */
1538         addup(OVERALL, ta->tempfullcpu, tempc, ta->tempfulltime, tempt, ta);
1539         ta->tempfullcpu  = tempc;
1540         ta->tempfulltime = tempt;
1541         if (jobtype == OVERALL) {
1542            addup(ta->currentjob, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
1543            ta->tempcpustart = ta->tempcpu;
1544            ta->tempcpu      = tempc;
1545            ta->temptimestart = ta->temptime;
1546            ta->temptime      = tempt;
1547         }
1548    }
1549
1550    if((jobtype != ta->currentjob) && (jobtype != OVERALL)) {   /* change of job type */
1551         addup(ta->currentjob, ta->tempcpustart, ta->tempcpu, ta->temptimestart, ta->temptime, ta);
1552         ta->tempcpustart  = ta->tempcpu;
1553         ta->tempcpu       = tempc;
1554         ta->temptimestart = ta->temptime;
1555         ta->temptime      = tempt;
1556         ta->currentjob    = jobtype;
1557    }
1558
1559    if (tempc < ta->tempcpustart) {   /* CPU counter overflow */
1560         addup(jobtype, ta->tempcpustart, tempc, ta->temptimestart, tempt, ta);
1561         ta->tempcpustart = ta->tempcpu;
1562         ta->tempcpu      = tempc;
1563         ta->temptimestart = ta->temptime;
1564         ta->temptime      = tempt;
1565    }
1566
1567 } /* addtimes */
1568
1569
1570
1571 /******************************************************************************/
1572
1573 /* estimate parameters of substitution process and rate heterogeneity - no tree
1574    n-taxon tree is not needed because of quartet method or NJ tree topology */
1575 void estimateparametersnotree()
1576 {
1577         int it, nump, change;
1578         double TSold, YRold, FIold, GEold;
1579
1580         it = 0;
1581         nump = 0;
1582
1583         /* count number of parameters */
1584         if (data_optn == NUCLEOTIDE && optim_optn) nump++;
1585         if (fracinv_optim || grate_optim) nump++;
1586
1587         do { /* repeat until nothing changes any more */
1588                 it++;
1589                 change = FALSE;
1590
1591                 /* optimize substitution parameters */
1592                 if (data_optn == NUCLEOTIDE && optim_optn) {
1593                 
1594                         TSold = TSparam;
1595                         YRold = YRparam;
1596                         
1597
1598                         /*
1599                          * optimize
1600                          */
1601                          
1602                         FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
1603                         fflush(STDOUT);
1604
1605                         if (qcalg_optn) { /* quartet sampling */
1606                                 optimseqevolparamsq();
1607                         } else { /* NJ tree */
1608                                 tmpfp = tmpfile();
1609                                 njtree(tmpfp);                          
1610                                 rewind(tmpfp);
1611                                 readusertree(tmpfp);
1612                                 closefile(tmpfp);
1613                                 optimseqevolparamst();                          
1614                         }
1615                         
1616                         computedistan(); /* update ML distances */
1617                 
1618                         /* same tolerance as 1D minimization */
1619                         if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
1620                                 (fabs(YRparam - YRold) > 3.3*PEPS1)
1621                         ) change = TRUE;
1622
1623                 }
1624
1625                 /* optimize rate heterogeneity variables */
1626                 if (fracinv_optim || grate_optim) {
1627
1628                         FIold = fracinv;
1629                         GEold = Geta;
1630
1631
1632                         /* 
1633                          * optimize 
1634                          */
1635
1636                         FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
1637                         fflush(STDOUT);
1638                         /* compute NJ tree */
1639                         tmpfp = tmpfile();
1640                         njtree(tmpfp);
1641                         /* use NJ tree topology to estimate parameters */
1642                         rewind(tmpfp);
1643                         readusertree(tmpfp);
1644                         closefile(tmpfp);
1645
1646                         optimrateparams();                              
1647                         computedistan(); /* update ML distances */
1648
1649
1650                         /* same tolerance as 1D minimization */
1651                         if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
1652                                 (fabs(Geta - GEold) > 3.3*PEPS2)
1653                         ) change = TRUE;
1654                         
1655                 }
1656
1657                 if (nump == 1) return;
1658                 
1659         } while (it != MAXITS && change);
1660
1661         return;
1662 }
1663
1664
1665 /* estimate parameters of substitution process and rate heterogeneity - tree
1666    same as above but here the n-taxon tree is already in memory */
1667 void estimateparameterstree()
1668 {
1669         int it, nump, change;
1670         double TSold, YRold, FIold, GEold;
1671
1672         it = 0;
1673         nump = 0;
1674
1675         /* count number of parameters */
1676         if (data_optn == NUCLEOTIDE && optim_optn) nump++;
1677         if (fracinv_optim || grate_optim) nump++;
1678
1679         do { /* repeat until nothing changes any more */
1680                 it++;
1681                 change = FALSE;
1682
1683                 /* optimize substitution process parameters */
1684                 if (data_optn == NUCLEOTIDE && optim_optn) {
1685                 
1686                         TSold = TSparam;
1687                         YRold = YRparam;
1688                         
1689
1690                         /*
1691                          * optimize
1692                          */
1693                          
1694                         FPRINTF(STDOUTFILE "Optimizing missing substitution process parameters\n");
1695                         fflush(STDOUT);
1696                         optimseqevolparamst();
1697                         computedistan(); /* update ML distances */
1698
1699                 
1700                         /* same tolerance as 1D minimization */
1701                         if ((fabs(TSparam - TSold) > 3.3*PEPS1) ||
1702                                 (fabs(YRparam - YRold) > 3.3*PEPS1)
1703                         ) change = TRUE;
1704
1705                 }
1706
1707                 /* optimize rate heterogeneity variables */
1708                 if (fracinv_optim || grate_optim) {
1709
1710                         FIold = fracinv;
1711                         GEold = Geta;
1712
1713
1714                         /* 
1715                          * optimize 
1716                          */
1717
1718                         FPRINTF(STDOUTFILE "Optimizing missing rate heterogeneity parameters\n");
1719                         fflush(STDOUT);
1720                         optimrateparams();                              
1721                         computedistan(); /* update ML distances */
1722
1723
1724                         /* same tolerance as 1D minimization */
1725                         if ((fabs(fracinv - FIold) > 3.3*PEPS2) ||
1726                                 (fabs(Geta - GEold) > 3.3*PEPS2)
1727                         ) change = TRUE;
1728                         
1729                 }
1730
1731                 if (nump == 1) return;
1732                 
1733         } while (it != MAXITS && change);
1734
1735         return;
1736 }
1737
1738
1739 /******************************************************************************/
1740 /* exported from main                                                         */
1741 /******************************************************************************/
1742
1743 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx)
1744 {
1745         if (approx == APPROX) {
1746
1747                 *d1 = quartet_alklhd(a,b, c,d); /* (a,b)-(c,d) */
1748                 *d2 = quartet_alklhd(a,c, b,d); /* (a,c)-(b,d) */
1749                 *d3 = quartet_alklhd(a,d, b,c); /* (a,d)-(b,c) */
1750
1751         } else /* approx == EXACT */ {
1752
1753                 *d1 = quartet_lklhd(a,b, c,d);  /* (a,b)-(c,d) */
1754                 *d2 = quartet_lklhd(a,c, b,d);  /* (a,c)-(b,d) */
1755                 *d3 = quartet_lklhd(a,d, b,c);  /* (a,d)-(b,c) */
1756
1757         }
1758 }
1759
1760 /***************************************************************/ 
1761
1762 void recon_tree()
1763 {       
1764         int i;
1765 #       if ! PARALLEL
1766                 int a, b, c;
1767                 uli nq;
1768                 double tc2, mintogo, minutes, hours;
1769 #       endif
1770
1771         /* allocate memory for taxon list of bad quartets */
1772         badtaxon = new_ulivector(Maxspc);
1773         for (i = 0; i < Maxspc; i++) badtaxon[i] = 0;
1774
1775         /* allocate variable used for randomizing input order */
1776         trueID = new_ivector(Maxspc);
1777
1778         /* allocate memory for quartets */
1779         quartetinfo = mallocquartets(Maxspc);
1780         
1781         /* prepare for consensus tree analysis */
1782         initconsensus();
1783                 
1784         if (!(readquart_optn) || (readquart_optn && savequart_optn)) {
1785           /* compute quartets */
1786           FPRINTF(STDOUTFILE "Computing quartet maximum likelihood trees\n");
1787           fflush(STDOUT);
1788           computeallquartets();
1789         }
1790
1791         if (savequart_optn) 
1792                 writeallquarts(Maxspc, ALLQUART, quartetinfo);
1793         if (readquart_optn) {
1794                 int xx1, xx2, xx3, xx4, count;
1795                 readallquarts (Maxspc, ALLQUART, quartetinfo);
1796                 if (show_optn) { /* list all unresolved quartets */
1797                         openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
1798                         fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
1799                 }
1800
1801                 /* initialize bad quartet memory */
1802                 for (count = 0; count < Maxspc; count++) badtaxon[count] = 0;
1803                 badqs = 0;
1804
1805                 for (xx4 = 3; xx4 < Maxspc; xx4++)
1806                   for (xx3 = 2; xx3 < xx4; xx3++)
1807                     for (xx2 = 1; xx2 < xx3; xx2++)
1808                       for (xx1 = 0; xx1 < xx2; xx1++) {
1809                         if (readquartet(xx1, xx2, xx3, xx4) == 7) {
1810                                 badqs++;
1811                                 badtaxon[xx1]++;
1812                                 badtaxon[xx2]++;
1813                                 badtaxon[xx3]++;
1814                                 badtaxon[xx4]++;
1815                                 if (show_optn) {
1816                                         fputid10(unresfp, xx1);
1817                                         fprintf(unresfp, "  ");
1818                                         fputid10(unresfp, xx2);
1819                                         fprintf(unresfp, "  ");
1820                                         fputid10(unresfp, xx3);
1821                                         fprintf(unresfp, "  ");
1822                                         fputid  (unresfp, xx4);
1823                                         fprintf(unresfp, "\n");
1824                                 }
1825                         }
1826                 } /* end for xx4; for xx3; for xx2; for xx1 */
1827                 if (show_optn)  /* list all unresolved quartets */
1828                         fclose(unresfp);
1829         } /* readquart_optn */
1830
1831 #       if PARALLEL
1832                 PP_SendAllQuarts(numquarts(Maxspc), quartetinfo);
1833 #       endif /* PARALLEL */
1834
1835         FPRINTF(STDOUTFILE "Computing quartet puzzling tree\n");
1836         fflush(STDOUT);
1837         
1838         /* start timer - percentage of completed trees */
1839         time(&time0);
1840         time1 = time0;
1841         mflag = 0;
1842                         
1843         /* open file for chronological list of puzzling step trees */
1844         if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
1845                 openfiletowrite(&qptlist, OUTPTLIST, "puzzling step trees (chonological)");
1846                                                         
1847 #       if PARALLEL
1848                 {
1849                 PP_SendDoPermutBlock(Numtrial);
1850                 }
1851 #       else
1852                 addtimes(GENERAL, &tarr);
1853                 for (Currtrial = 0; Currtrial < Numtrial; Currtrial++) {
1854                         
1855                         /* randomize input order */
1856                         chooser(Maxspc, Maxspc, trueID);
1857                                 
1858                         /* initialize tree */
1859                         inittree();
1860
1861                         /* adding all other leafs */
1862                         for (i = 3; i < Maxspc; i++) { 
1863                                         
1864                                 /* clear all edgeinfos */
1865                                 resetedgeinfo();
1866                                 
1867                                 /* clear counter of quartets */
1868                                 nq = 0;
1869
1870                                 /*
1871                                  * core of quartet puzzling algorithm
1872                                  */
1873
1874                                 for (a = 0; a < nextleaf - 2; a++)
1875                                         for (b = a + 1; b < nextleaf - 1; b++)
1876                                                 for (c = b + 1; c < nextleaf; c++) {
1877
1878                                                         /*  check which two _leaves_ out of a, b, c
1879                                                             are closer related to each other than
1880                                                             to leaf i according to a least squares
1881                                                             fit of the continous Baysian weights to the
1882                                                             seven trivial "attractive regions". We assign 
1883                                                             a score of 1 to all edges between these two leaves
1884                                                             chooseA and chooseB */
1885
1886                                                         checkquartet(a, b, c, i);
1887                                                         incrementedgeinfo(chooseA, chooseB);
1888
1889                                                         nq++;
1890
1891                                                         /* generate message every 15 minutes */
1892                                                                 
1893                                                         /* check timer */
1894                                                         time(&time2);
1895                                                         if ( (time2 - time1) > 900) {
1896                                                                 /* every 900 seconds */
1897                                                                 /* percentage of completed trees */
1898                                                                 if (mflag == 0) {
1899                                                                         FPRINTF(STDOUTFILE "\n");
1900                                                                         mflag = 1;
1901                                                                 }
1902                                                                 tc2 = 100.0*Currtrial/Numtrial + 
1903                                                                         100.0*nq/Numquartets/Numtrial;
1904                                                                         mintogo = (100.0-tc2) *
1905                                                                         (double) (time2-time0)/60.0/tc2;
1906                                                                         hours = floor(mintogo/60.0);
1907                                                                         minutes = mintogo - 60.0*hours;
1908                                                                         FPRINTF(STDOUTFILE "%2.2f%%", tc2);
1909                                                                         FPRINTF(STDOUTFILE " completed (remaining");
1910                                                                         FPRINTF(STDOUTFILE " time: %.0f", hours);
1911                                                                         FPRINTF(STDOUTFILE " hours %.0f", minutes);
1912                                                                         FPRINTF(STDOUTFILE " minutes)\n");
1913                                                                         fflush(STDOUT);
1914                                                                         time1 = time2;
1915                                                                 }
1916                                                         }
1917
1918                                 /* find out which edge has the lowest edgeinfo */
1919                                 minimumedgeinfo();
1920
1921                                 /* add the next leaf on minedge */
1922                                 addnextleaf(minedge);
1923                         }
1924         
1925                         /* compute bipartitions of current tree */
1926                         computebiparts();
1927                         makenewsplitentries();
1928
1929                         {
1930                                 int *ctree, startnode;
1931                                 char *trstr;
1932                                 treelistitemtype *treeitem;
1933                                 ctree = initctree();
1934                                 copytree(ctree);
1935                                 startnode = sortctree(ctree);
1936                                 trstr=sprintfctree(ctree, psteptreestrlen);
1937
1938
1939                                 treeitem = addtree2list(&trstr, 1, &psteptreelist, &psteptreenum, &psteptreesum);
1940
1941                                 if((listqptrees == PSTOUT_LIST) 
1942                                   || (listqptrees == PSTOUT_LISTORDER)) {
1943                                         /* print: order no/# topol per this id/tree id/sum of unique topologies/sum of trees so far */
1944                                         fprintf(qptlist, "%ld.\t1\t%d\t%d\t%d\t%d\n", 
1945                                                 Currtrial + 1, (*treeitem).count, (*treeitem).id, psteptreenum, psteptreesum);
1946                                 }
1947
1948 #                               ifdef VERBOSE1
1949                                         printf("%s\n", trstr);
1950                                         printfsortedpstrees(psteptreelist);
1951 #                               endif
1952                                 freectree(&ctree);
1953                         }
1954
1955
1956
1957                         /* free tree before building the next tree */
1958                         freetree();
1959                         
1960                         addtimes(PUZZLING, &tarr);
1961                 }
1962 #       endif /* PARALLEL */
1963                         
1964                         /* close file for list of puzzling step trees */
1965                         if((listqptrees == PSTOUT_LIST) || (listqptrees == PSTOUT_LISTORDER))
1966                                 closefile(qptlist);
1967                         
1968                         if (mflag == 1) FPRINTF(STDOUTFILE "\n");
1969
1970                         /* garbage collection */
1971                         free(splitcomp);
1972                         free_ivector(trueID);
1973
1974 #                       if ! PARALLEL
1975                                 free_cmatrix(biparts);
1976 #                       endif /* PARALLEL */
1977
1978                         freequartets();
1979
1980                         /* compute majority rule consensus tree */
1981                         makeconsensus();
1982                         
1983                         /* write consensus tree to tmp file */
1984                         tmpfp = tmpfile();
1985                         writeconsensustree(tmpfp);
1986 } /* recon_tree */
1987
1988 /***************************************************************/ 
1989
1990 void map_lklhd()
1991 {       
1992         int i, a, a1, a2, b, b1, b2, c, c1, c2, d;
1993         uli nq;
1994         double logs[3], d1, d2, d3, temp;
1995         ivector qts, mlorder, gettwo;
1996                 /* reset variables */
1997                 ar1 = ar2 = ar3 = 0;
1998                 reg1 = reg2 = reg3 = reg4 = reg5 = reg6 = reg7 = 0;
1999                 reg1l = reg1r = reg2u = reg2d = reg3u = reg3d = reg4u =
2000                         reg4d = reg5l = reg5r = reg6u = reg6d = 0;
2001                         
2002                 /* place for random quartet */
2003                         qts = new_ivector(4);
2004
2005                 /* initialize output file */
2006                 openfiletowrite(&trifp, TRIANGLE, "Postscript output");
2007                 initps(trifp);
2008                 FPRINTF(STDOUTFILE "Performing likelihood mapping analysis\n");
2009                 fflush(STDOUT);
2010                         
2011                 /* start timer */
2012                 starttimer();
2013                 nq = 0;
2014                 mflag = 0;
2015
2016                 addtimes(GENERAL, &tarr);
2017                 if (lmqts == 0) { /* all possible quartets */
2018                         
2019                         if (numclust == 4) { /* four-cluster analysis */
2020
2021                                 for (a = 0; a < clustA; a++)
2022                                         for (b = 0; b < clustB; b++)
2023                                                 for (c = 0; c < clustC; c++)
2024                                                         for (d = 0; d < clustD; d++) {
2025                                                                         
2026                                                                 nq++;
2027                                                                         
2028                                                                 /* check timer */
2029                                                                 checktimer(nq);
2030
2031                                                                 /* maximum likelihood values */
2032                                                                 /* approximate ML is sufficient */
2033                                                                 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c],clusterD[d],&d1,&d2,&d3, APPROX);
2034                                                                 
2035                                                                 /* draw point for LM analysis */
2036                                                                 makelmpoint(trifp, d1, d2, d3);
2037                                                                 addtimes(QUARTETS, &tarr);
2038
2039                                                         }                                       
2040                         }
2041                                 
2042                         if (numclust == 3) { /* three-cluster analysis */
2043
2044                                 gettwo = new_ivector(2);
2045
2046                                 for (a = 0; a < clustA; a++)
2047                                         for (b = 0; b < clustB; b++)
2048                                                 for (c1 = 0; c1 < clustC-1; c1++)
2049                                                         for (c2 = c1+1; c2 < clustC; c2++) {
2050
2051                                                                 nq++;
2052
2053                                                                 /* check timer */
2054                                                                 checktimer(nq);
2055                                                                 
2056                                                                 /* maximum likelihood values */
2057                                                                 /* approximate ML is sufficient */
2058                                                                 compute_quartlklhds(clusterA[a],clusterB[b],clusterC[c1],clusterC[c2],&d1,&d2,&d3, APPROX);
2059                                                                 
2060                                                                 /* randomize order of d2 and d3 */
2061                                                                 if (randominteger(2) == 1) {
2062                                                                         temp = d3;
2063                                                                         d3 = d2;
2064                                                                         d2 = temp;
2065                                                                 }
2066                                                 
2067                                                                 /* draw point for LM analysis */
2068                                                                 makelmpoint(trifp, d1, d2, d3);
2069                                                                 addtimes(QUARTETS, &tarr);
2070
2071                                 }                               
2072                                 free_ivector(gettwo);
2073                         }
2074                                 
2075                         if (numclust == 2) { /* two-cluster analysis */
2076                                         
2077                                 gettwo = new_ivector(2);
2078
2079                                 for (a1 = 0; a1 < clustA-1; a1++)
2080                                         for (a2 = a1+1; a2 < clustA; a2++)
2081                                                 for (b1 = 0; b1 < clustB-1; b1++)
2082                                                         for (b2 = b1+1; b2 < clustB; b2++) {
2083
2084                                                                 nq++;
2085
2086                                                                 /* check timer */
2087                                                                 checktimer(nq);
2088                                                                 
2089                                                                 /* maximum likelihood values */
2090                                                                 /* approximate ML is sufficient */
2091                                                                 compute_quartlklhds(clusterA[a1],clusterA[a2],clusterB[b1],clusterB[b2],&d1,&d2,&d3, APPROX);
2092                                                                 
2093                                                                 /* randomize order of d2 and d3 */
2094                                                                 if (randominteger(2) == 1) {
2095                                                                         temp = d3;
2096                                                                         d3 = d2;
2097                                                                         d2 = temp;
2098                                                                 }
2099                                                 
2100                                                                 /* draw point for LM analysis */
2101                                                                 makelmpoint(trifp, d1, d2, d3);
2102                                                                 addtimes(QUARTETS, &tarr);
2103
2104                                 }
2105                                         
2106                                 free_ivector(gettwo);
2107                         }
2108                         
2109                         if (numclust == 1) { /* normal likelihood mapping (one cluster) */
2110                         
2111                                 mlorder = new_ivector(3);
2112
2113 #if 0
2114                                 for (i = 3; i < Maxspc; i++) 
2115                                         for (a = 0; a < i - 2; a++) 
2116                                                 for (b = a + 1; b < i - 1; b++) 
2117                                                         for (c = b + 1; c < i; c++)
2118    for (d = 3; d < Maxspc; d++) 
2119       for (c = 2; c < d; c++) 
2120          for (b = 1; b < c; b++) 
2121             for (a = 0; a < b; a++) 
2122 #endif
2123
2124                                 for (i = 3; i < Maxspc; i++) 
2125                                         for (c = 2; c < i; c++) 
2126                                                 for (b = 1; b < c; b++) 
2127                                                         for (a = 0; a < b; a++) {
2128                                                         
2129                                                                 nq++;
2130
2131                                                                 /* check timer */
2132                                                                 checktimer(nq);
2133
2134                                                                 /* maximum likelihood values */
2135                                                                 /* approximate ML is sufficient */
2136                                                                 compute_quartlklhds(a,b,c,i,&logs[0],&logs[1],&logs[2], APPROX);
2137
2138                                                                 /* randomize order */
2139                                                                 chooser(3,3,mlorder);
2140                                                                 d1 = logs[mlorder[0]];
2141                                                                 d2 = logs[mlorder[1]];
2142                                                                 d3 = logs[mlorder[2]];
2143                                                                 
2144                                                                 /* draw point for LM analysis */
2145                                                                 makelmpoint(trifp, d1, d2, d3);
2146                                                                 addtimes(QUARTETS, &tarr);
2147                                 
2148                                 }
2149                                 free_ivector(mlorder);
2150                         }
2151                         
2152                 } else { /* randomly selected quartets */
2153                         
2154                         if (numclust == 4) { /* four-cluster analysis */
2155
2156                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2157                                 
2158                                         nq++;
2159
2160                                         /* check timer */
2161                                         checktimer(nq);
2162
2163                                         /* choose random quartet */     
2164                                         qts[0] = clusterA[ randominteger(clustA) ];
2165                                         qts[1] = clusterB[ randominteger(clustB) ];
2166                                         qts[2] = clusterC[ randominteger(clustC) ];
2167                                         qts[3] = clusterD[ randominteger(clustD) ];                     
2168
2169                                         /* maximum likelihood values */
2170                                         /* approximate ML is sufficient */
2171                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2172                                         
2173                                         /* draw point for LM analysis */
2174                                         makelmpoint(trifp, d1, d2, d3);
2175                                         addtimes(QUARTETS, &tarr);
2176
2177                                 }
2178                         }
2179                                 
2180                         if (numclust == 3) { /* three-cluster analysis */
2181
2182                                 gettwo = new_ivector(2);
2183
2184                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2185                                 
2186                                         nq++;
2187
2188                                         /* check timer */
2189                                         checktimer(nq);
2190
2191                                         /* choose random quartet */     
2192                                         qts[0] = clusterA[ randominteger(clustA) ];
2193                                         qts[1] = clusterB[ randominteger(clustB) ];
2194                                         chooser(clustC, 2, gettwo);
2195                                         qts[2] = clusterC[gettwo[0]];
2196                                         qts[3] = clusterC[gettwo[1]];           
2197
2198                                         /* maximum likelihood values */
2199                                         /* approximate ML is sufficient */
2200                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2201                                         
2202                                         /* order of d2 and d3 is already randomized! */
2203                                                 
2204                                         /* draw point for LM analysis */
2205                                         makelmpoint(trifp, d1, d2, d3);
2206                                         addtimes(QUARTETS, &tarr);
2207
2208                                 }
2209                                         
2210                                 free_ivector(gettwo);
2211                         }
2212                                 
2213                         if (numclust == 2) { /* two-cluster analysis */
2214
2215                                 gettwo = new_ivector(2);
2216
2217                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2218                                 
2219                                         nq++;
2220
2221                                         /* check timer */
2222                                         checktimer(nq);
2223
2224                                         /* choose random quartet */     
2225                                         chooser(clustA, 2, gettwo);
2226                                         qts[0] = clusterA[gettwo[0]];
2227                                         qts[1] = clusterA[gettwo[1]];           
2228                                         chooser(clustB, 2, gettwo);
2229                                         qts[2] = clusterB[gettwo[0]];
2230                                         qts[3] = clusterB[gettwo[1]];           
2231
2232                                         /* maximum likelihood values */
2233                                         /* approximate ML is sufficient */
2234                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2235                                         
2236                                         /* order of d2 and d3 is already randomized! */
2237                                                 
2238                                         /* draw point for LM analysis */
2239                                         makelmpoint(trifp, d1, d2, d3);
2240                                         addtimes(QUARTETS, &tarr);
2241
2242                                 }
2243                                 free_ivector(gettwo);
2244                         }
2245                                 
2246                         if (numclust == 1) { /* normal likelihood mapping (one cluster) */
2247                         
2248                                 for (lmqts = 0; lmqts < Numquartets; lmqts++) {
2249                                 
2250                                         nq++;
2251
2252                                         /* check timer */
2253                                         checktimer(nq);
2254
2255                                         /* choose random quartet */     
2256                                         chooser(Maxspc, 4, qts);                                
2257
2258                                         /* maximum likelihood values */
2259                                         /* approximate ML is sufficient */
2260                                         compute_quartlklhds(qts[0],qts[1],qts[2],qts[3],&d1,&d2,&d3, APPROX);
2261                                         
2262                                         /* order of d1, d2, and d3 is already randomized! */
2263                                 
2264                                         /* draw point for LM analysis */
2265                                         makelmpoint(trifp, d1, d2, d3);
2266                                         addtimes(QUARTETS, &tarr);
2267
2268                                 }
2269                         }
2270                 }
2271
2272                 finishps(trifp);
2273                 closefile(trifp);
2274                 free_ivector(qts);
2275
2276 } /* map_lklhd */
2277
2278 /***************************************************************/ 
2279
2280 void setdefaults() {
2281
2282   strcpy(INFILE,     INFILEDEFAULT);
2283   strcpy(OUTFILE,    OUTFILEDEFAULT);
2284   strcpy(TREEFILE,   TREEFILEDEFAULT);
2285   strcpy(INTREE,     INTREEDEFAULT);
2286   strcpy(DISTANCES,  DISTANCESDEFAULT);
2287   strcpy(TRIANGLE,   TRIANGLEDEFAULT);
2288   strcpy(UNRESOLVED, UNRESOLVEDDEFAULT);
2289   strcpy(ALLQUART,   ALLQUARTDEFAULT);
2290   strcpy(ALLQUARTLH, ALLQUARTLHDEFAULT);
2291   strcpy(OUTPTLIST,  OUTPTLISTDEFAULT);
2292   strcpy(OUTPTORDER, OUTPTORDERDEFAULT);
2293
2294   usebestq_optn    = FALSE;
2295   savequartlh_optn = FALSE;
2296   savequart_optn   = FALSE;
2297   readquart_optn   = FALSE;
2298
2299   randseed = -1;                  /* to set random random seed */
2300
2301 } /* setdefaults */
2302
2303 /***************************************************************/ 
2304
2305 void printversion()
2306 {
2307 #  if ! PARALLEL
2308    fprintf(stderr, "puzzle (%s) %s\n", PACKAGE, VERSION);
2309 #else
2310    fprintf(stderr, "ppuzzle (%s) %s\n", PACKAGE, VERSION);
2311 #  endif
2312    exit (0);
2313 }
2314 /***************************************************************/ 
2315
2316 void printusage(char *fname)
2317 {
2318    fprintf(stderr, "\n\nUsage: %s [-h] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
2319 #  if PARALLEL
2320         PP_SendDone();
2321         MPI_Finalize();
2322 #  endif
2323    exit (1);
2324 }
2325
2326 /***************************************************************/ 
2327
2328 #ifdef HHH
2329 void printusagehhh(char *fname)
2330 {
2331    fprintf(stderr, "\n\nUsage: %s [options] [ Infilename [ UserTreeFilename ] ]\n\n", fname);
2332    fprintf(stderr, "  -h           - print usage\n");
2333    fprintf(stderr, "  -wqf         - write quartet file to Infilename.allquart\n");
2334    fprintf(stderr, "  -rqf         - read quartet file from Infilename.allquart\n");
2335    fprintf(stderr, "  -wqlb        - write quart lhs to Infilename.allquartlh (binary)\n");
2336    fprintf(stderr, "  -wqla        - write quart lhs to Infilename.allquartlh (ASCII)\n");
2337    fprintf(stderr, "  -bestq       - use best quart, no basian weights\n");
2338    fprintf(stderr, "  -randseed<#> - use <#> as random number seed, for debug purposes only\n");
2339 #  if PARALLEL
2340         PP_SendDone();
2341         MPI_Finalize();
2342 #  endif
2343    exit (2);
2344 }
2345 #endif /* HHH */
2346
2347 /***************************************************************/ 
2348
2349
2350 void scancmdline(int *argc, char **argv[]) 
2351 {
2352    static short infileset     = 0;
2353    static short intreefileset = 0;
2354    short flagused;
2355    int n;
2356    int count, dummyint;
2357
2358    for (n = 1; n < *argc; n++) {
2359 #     ifdef VERBOSE1
2360          printf("argv[%d] = %s\n", n, (*argv)[n]);
2361 #     endif
2362  
2363       flagused = FALSE;
2364
2365 #     ifdef HHH
2366       dummyint = 0;
2367       count = sscanf((*argv)[n], "-wqlb%n", &dummyint);
2368       if (dummyint == 5) {
2369         savequartlh_optn = TRUE;
2370         saveqlhbin_optn  = TRUE;
2371         flagused         = TRUE;
2372       }
2373
2374       dummyint = 0;
2375       count = sscanf((*argv)[n], "-wqla%n", &dummyint);
2376       if (dummyint == 5) {
2377         savequartlh_optn = TRUE;
2378         saveqlhbin_optn  = FALSE;
2379         flagused         = TRUE;
2380       }
2381
2382       dummyint = 0;
2383       count = sscanf((*argv)[n], "-wqf%n", &dummyint);
2384       if (dummyint == 4) {
2385         savequart_optn = TRUE;
2386         flagused = TRUE;
2387       }
2388
2389       dummyint = 0;
2390       count = sscanf((*argv)[n],"-rqf%n", &dummyint);
2391       if (dummyint == 4) {
2392         readquart_optn = TRUE; 
2393         flagused = TRUE;
2394       }
2395
2396       dummyint = 0;
2397       count = sscanf((*argv)[n],"-bestq%n", &dummyint);
2398       if (dummyint == 6) {
2399         usebestq_optn = TRUE; 
2400         flagused = TRUE;
2401       }
2402
2403       dummyint = 0;
2404       count = sscanf((*argv)[n],"-hhh%n", &dummyint);
2405       if (dummyint==4) {
2406         printusagehhh((*argv)[0]); 
2407         flagused = TRUE;
2408       }
2409 #     endif /* HHH */
2410
2411       dummyint = 0;
2412       count = sscanf((*argv)[n],"-V%n", &dummyint);
2413       if (dummyint==2) {
2414         printversion((*argv)[0]); 
2415         flagused = TRUE;
2416       }
2417
2418       dummyint = 0;
2419       count = sscanf((*argv)[n],"-version%n", &dummyint);
2420       if (dummyint==8) {
2421         printversion((*argv)[0]); 
2422         flagused = TRUE;
2423       }
2424
2425       dummyint = 0;
2426       count = sscanf((*argv)[n],"--version%n", &dummyint);
2427       if (dummyint>=4) {
2428         printversion((*argv)[0]); 
2429         flagused = TRUE;
2430       }
2431
2432       dummyint = 0;
2433       count = sscanf((*argv)[n],"-h%n", &dummyint);
2434       if (dummyint==2) {
2435         printusage((*argv)[0]); 
2436         flagused = TRUE;
2437       }
2438
2439       count = sscanf((*argv)[n],"-randseed%d", &dummyint);
2440       if (count == 1) {
2441         randseed = dummyint; 
2442         flagused = TRUE;
2443       }
2444
2445 #if 0
2446       count = sscanf((*argv)[n],"-h%n", &dummyint);
2447       if ((count == 1) && (dummyint>=2)) printusage((*argv)[0]); 
2448
2449       count = sscanf((*argv)[n],"-writequarts%n", &dummyint);
2450       if (count == 1) writequartstofile = 1;; 
2451
2452       count = sscanf((*argv)[n],"-ws%d", &dummyint);
2453       if (count == 1) windowsize = dummyint; 
2454 #endif
2455
2456       if ((*argv)[n][0] != '-') {
2457          if (infileset == 0) {
2458             strcpy(INFILE, (*argv)[n]);
2459             infileset++;
2460             sprintf(OUTFILE    ,"%s.%s", INFILE, OUTFILEEXT);
2461             sprintf(TREEFILE   ,"%s.%s", INFILE, TREEFILEEXT);
2462             sprintf(DISTANCES  ,"%s.%s", INFILE, DISTANCESEXT);
2463             sprintf(TRIANGLE   ,"%s.%s", INFILE, TRIANGLEEXT);
2464             sprintf(UNRESOLVED ,"%s.%s", INFILE, UNRESOLVEDEXT);
2465             sprintf(ALLQUART   ,"%s.%s", INFILE, ALLQUARTEXT);
2466             sprintf(ALLQUARTLH ,"%s.%s", INFILE, ALLQUARTLHEXT);
2467             sprintf(OUTPTLIST  ,"%s.%s", INFILE, OUTPTLISTEXT);
2468             sprintf(OUTPTORDER ,"%s.%s", INFILE, OUTPTORDEREXT);
2469             FPRINTF(STDOUTFILE "Input file: %s\n", INFILE);
2470             flagused = TRUE;
2471          } else {
2472             if (intreefileset == 0) {
2473                strcpy(INTREE, (*argv)[n]);
2474                intreefileset++;
2475                sprintf(OUTFILE    ,"%s.%s", INTREE, OUTFILEEXT);
2476                sprintf(TREEFILE   ,"%s.%s", INTREE, TREEFILEEXT);
2477                sprintf(DISTANCES  ,"%s.%s", INTREE, DISTANCESEXT);
2478                FPRINTF(STDOUTFILE "Usertree file: %s\n", INTREE);
2479                flagused = TRUE;
2480             }
2481          }
2482       }
2483       if (flagused == FALSE) {
2484          fprintf(stderr, "WARNING: commandline parameter %d not recognized (\"%s\")\n", n, (*argv)[n]);
2485       }
2486       flagused = FALSE;
2487    }
2488
2489 } /* scancmdline */
2490
2491
2492 /***************************************************************/ 
2493
2494 void inputandinit(int *argc, char **argv[]) {
2495
2496         int ci;
2497
2498         /* vectors used in QP and LM analysis */
2499         qweight = new_dvector(3);
2500         sqdiff = new_dvector(3);
2501         qworder = new_ivector(3);
2502         sqorder = new_ivector(3);
2503         
2504         /* Initialization and parsing of Commandline */
2505         setdefaults();
2506         scancmdline(argc, argv);
2507
2508         /* initialize random numbers generator */
2509         if (randseed >= 0)
2510            fprintf(stderr, "WARNING: random seed set to %d for debugging!\n", randseed);
2511         randseed = initrandom(randseed);
2512
2513         psteptreelist = NULL;
2514         psteptreesum = 0;
2515         bestratefound = 0;
2516
2517 #       ifndef ALPHA
2518                 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s!\n\n\n", VERSION);
2519 #       else
2520                 FPRINTF(STDOUTFILE "\n\n\nWELCOME TO TREE-PUZZLE %s%s!\n\n\n", VERSION, ALPHA);
2521 #       endif
2522
2523
2524         /* get sequences */
2525         openfiletoread(&seqfp, INFILE, "sequence data");
2526         getsizesites(seqfp);
2527         FPRINTF(STDOUTFILE "\nInput data set contains %d sequences of length %d\n", Maxspc, Maxseqc);
2528         getdataset(seqfp);
2529         closefile(seqfp);               
2530         data_optn = guessdatatype();
2531         
2532         /* translate characters into format used by ML engine */
2533         nuc_optn = TRUE; 
2534         SH_optn = FALSE;
2535         Seqchar = NULL;
2536         translatedataset();
2537         
2538         /* estimate base frequencies from data set */
2539         Freqtpm = NULL;
2540         Basecomp = NULL;
2541         estimatebasefreqs();
2542         
2543         /* guess model of substitution */
2544         guessmodel();
2545
2546         /* initialize guess variables */
2547         auto_datatype = AUTO_GUESS;
2548         if (data_optn == AMINOACID) auto_aamodel = AUTO_GUESS;
2549         else                        auto_aamodel = AUTO_DEFAULT;
2550         /* save guessed amino acid options */
2551         guessDayhf_optn    = Dayhf_optn;
2552         guessJtt_optn      = Jtt_optn;
2553         guessmtrev_optn    = mtrev_optn;
2554         guesscprev_optn    = cprev_optn;
2555         guessblosum62_optn = blosum62_optn;
2556         guessvtmv_optn     = vtmv_optn;
2557         guesswag_optn     = wag_optn;
2558         guessauto_aamodel  = auto_aamodel;
2559
2560
2561         /* check for user specified tree */
2562         if ((utfp = fopen(INTREE, "r")) != NULL) {
2563                 fclose(utfp);
2564                 puzzlemode = USERTREE;
2565         } else {
2566                 puzzlemode = QUARTPUZ;
2567         }
2568
2569         /* reserve memory for cluster LM analysis */
2570         clusterA = new_ivector(Maxspc);
2571         clusterB = new_ivector(Maxspc);
2572         clusterC = new_ivector(Maxspc);
2573         clusterD = new_ivector(Maxspc);
2574
2575         /* set options interactively */
2576         setoptions();
2577
2578         /* open usertree file right after start */
2579         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
2580                 openfiletoread(&utfp, INTREE, "user trees");
2581         }
2582
2583         /* start main timer */
2584         time(&Starttime);
2585         Startcpu=clock();
2586         addtimes(OPTIONS, &tarr);
2587
2588         /* symmetrize doublet frequencies if specified */
2589         symdoublets();
2590
2591         /* initialise ML */
2592         mlstart();
2593
2594         /* determine how many usertrees */
2595         if (typ_optn == TREERECON_OPTN && puzzlemode == USERTREE) {
2596                 numutrees = 0;          
2597                 do {
2598                         ci = fgetc(utfp);
2599                         if ((char) ci == ';') numutrees++;
2600                 } while (ci != EOF);
2601                 rewind(utfp);
2602                 if (numutrees < 1) {
2603                         FPRINTF(STDOUTFILE "Unable to proceed (no tree in input tree file)\n\n\n");
2604                         exit(1);
2605                 }               
2606         }
2607         
2608         /* check fraction of invariable sites */
2609         if ((rhetmode == TWORATE || rhetmode == MIXEDRATE) && !fracinv_optim)
2610                 /* fraction of invariable site was specified manually */
2611                 if (fracinv > MAXFI)
2612                         fracinv = MAXFI;
2613
2614         addtimes(GENERAL, &tarr);
2615         /* estimate parameters */
2616         if (!(typ_optn == TREERECON_OPTN && puzzlemode == USERTREE)) {
2617                 /* no tree present */
2618                 estimateparametersnotree();
2619         } else {
2620                 if (utree_optn) {
2621                         /* use 1st user tree */
2622                         readusertree(utfp);
2623                         rewind(utfp);
2624                         estimateparameterstree();
2625                 } else {
2626                         /* don't use first user tree */
2627                         estimateparametersnotree();
2628                 }
2629         }
2630         addtimes(PARAMEST, &tarr);
2631
2632         /* compute expected Ts/Tv ratio */
2633         if (data_optn == NUCLEOTIDE) computeexpectations();
2634
2635 } /* inputandinit */
2636
2637
2638
2639 /***************************************************************/ 
2640
2641 void evaluatetree(FILE *intreefp, FILE *outtreefp, int pmode, int utreenum, int maxutree, int *oldlocroot)
2642 {
2643
2644         switch (pmode) {
2645                 case QUARTPUZ: /* read QP tree */
2646                         readusertree(intreefp);                 
2647                         FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock)\n");
2648                         fflush(STDOUT);
2649                         usertree_lklhd();
2650                         findbestratecombination();
2651                         break;
2652                 case USERTREE: /* read user tree */
2653                         readusertree(intreefp);
2654                         FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (without clock) for tree # %d\n", utreenum+1);
2655                         fflush(STDOUT);
2656                         usertree_lklhd();
2657                         if (maxutree > 1) {
2658                                 ulkl[utreenum] = Ctree->lklhd;
2659                                 allsitelkl(Ctree->condlkl, allsites[utreenum]);
2660                         }
2661                         if (utreenum==0) findbestratecombination();
2662                         break;
2663         }
2664
2665
2666         if (compclock) { /* clocklike branch length */
2667                 switch (pmode) {
2668                         case QUARTPUZ:
2669                                 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock)\n");
2670                                 fflush(STDOUT);
2671                                 break;
2672                         case USERTREE:
2673                                 FPRINTF(STDOUTFILE "Computing maximum likelihood branch lengths (with clock) for tree # %d\n", utreenum+1);
2674                                 fflush(STDOUT);
2675                                 break;
2676                 }
2677                                                                         
2678                 /* find best place for root */
2679                 rootsearch = 0;
2680
2681                 if (utreenum==0) locroot = *oldlocroot;
2682                 else             *oldlocroot = locroot;
2683
2684                 if (locroot < 0) {
2685                         locroot = findrootedge();
2686                         rootsearch = 1;
2687                 }
2688                 /* if user-specified edge for root does not exist use displayed outgroup */
2689                 if (!checkedge(locroot)) {
2690                         locroot = outgroup; 
2691                         rootsearch = 2;
2692                 }
2693                 /* compute likelihood */
2694                 clock_lklhd(locroot);
2695                 if (maxutree > 1) {
2696                         ulklc[utreenum] = Ctree->lklhdc;
2697                         allsitelkl(Ctree->condlkl, allsitesc[utreenum]);
2698                 }
2699
2700         }
2701
2702         if (clockmode == 0)
2703                 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhd);
2704         else
2705                 fprintf(outtreefp, "[ lh=%.6f ]", Ctree->lklhdc);
2706
2707         /* write ML branch length tree to outree file */
2708         clockmode = 0; /* nonclocklike branch lengths */
2709         fputphylogeny(outtreefp);
2710                 
2711         /* clocklike branch lengths */
2712         if (compclock) {
2713                 clockmode = 1;
2714                 fputrooted(outtreefp, locroot);
2715         }
2716 } /* evaluatetree */
2717
2718 /***************************************************************/ 
2719
2720 void memcleanup() { 
2721         if (puzzlemode == QUARTPUZ && typ_optn == TREERECON_OPTN) {
2722                 free(splitfreqs);
2723                 free(splitpatterns);
2724                 free(splitsizes);
2725                 free_ivector(consconfid);
2726                 free_ivector(conssizes);
2727                 free_cmatrix(consbiparts);
2728                 free_ulivector(badtaxon);
2729         }
2730         free_cmatrix(Identif);
2731         free_dvector(Freqtpm);
2732         free_imatrix(Basecomp);
2733         free_ivector(clusterA);
2734         free_ivector(clusterB);
2735         free_ivector(clusterC);
2736         free_ivector(clusterD);
2737         free_dvector(qweight);
2738         free_dvector(sqdiff);
2739         free_ivector(qworder);
2740         free_ivector(sqorder);
2741         freetreelist(&psteptreelist, &psteptreenum, &psteptreesum);
2742 } /* memcleanup */
2743
2744 /***************************************************************/ 
2745
2746
2747 /******************************************************************************/
2748 /* main part                                                                  */
2749 /******************************************************************************/
2750
2751 int main(int argc, char *argv[])
2752 {       
2753         int i, oldlocroot=0;
2754         
2755         /* start main timer */
2756         time(&walltimestart);
2757         cputimestart = clock();
2758         inittimearr(&tarr);
2759
2760
2761         
2762         inputandinit(&argc, &argv);
2763
2764         
2765
2766         /* write distance matrix */
2767         FPRINTF(STDOUTFILE "Writing pairwise distances to file %s\n", DISTANCES);
2768         openfiletowrite(&dfp, DISTANCES, "pairwise distances");
2769         putdistance(dfp);
2770         closefile(dfp);
2771
2772
2773
2774         free_cmatrix(Seqchar);
2775         free_cmatrix(seqchars);
2776
2777         
2778
2779
2780         /* write CPU/Wallclock times and parallel statistics */
2781         time(&walltimestop);
2782         cputimestop = clock();
2783         addtimes(OVERALL, &tarr);
2784
2785         fullcpu          = tarr.fullcpu;
2786         fulltime         = tarr.fulltime;
2787
2788
2789
2790         /* stop timer */
2791         
2792     time(&Stoptime);
2793         Stopcpu=clock();
2794     /*
2795         timestamp(ofp);
2796         closefile(ofp);
2797     CZ 05/16/01*/
2798
2799
2800      /* printbestratecombination(stderr); */
2801         mlfinish();
2802
2803         FPRINTF(STDOUTFILE "\nAll results written to disk:\n");
2804         /*FPRINTF(STDOUTFILE "       Puzzle report file:         %s\n", OUTFILE);*/
2805         FPRINTF(STDOUTFILE "       Likelihood distances:       %s\n", DISTANCES);
2806         
2807         if (typ_optn == TREERECON_OPTN && puzzlemode != PAIRDIST) 
2808                 FPRINTF(STDOUTFILE "       Phylip tree file:           %s\n", TREEFILE);
2809         if (typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) {
2810                 if ((listqptrees == PSTOUT_ORDER) ||(listqptrees == PSTOUT_LISTORDER)) 
2811                         FPRINTF(STDOUTFILE "       Unique puzzling step trees: %s\n", OUTPTORDER);      
2812                 if ((listqptrees == PSTOUT_LIST) ||(listqptrees == PSTOUT_LISTORDER)) 
2813                         FPRINTF(STDOUTFILE "       Puzzling step tree list:    %s\n", OUTPTLIST);       
2814         }
2815         if (show_optn && typ_optn == TREERECON_OPTN && puzzlemode == QUARTPUZ) 
2816                 FPRINTF(STDOUTFILE "       Unresolved quartets:        %s\n", UNRESOLVED);
2817         if (typ_optn == LIKMAPING_OPTN) 
2818                 FPRINTF(STDOUTFILE "       Likelihood mapping diagram: %s\n", TRIANGLE);
2819         FPRINTF(STDOUTFILE "\n");
2820
2821         /* runtime message */
2822         FPRINTF(STDOUTFILE 
2823                 "The computation took %.0f seconds (= %.1f minutes = %.1f hours)\n",
2824                         difftime(Stoptime, Starttime), difftime(Stoptime, Starttime)/60.,
2825                         difftime(Stoptime, Starttime)/3600.);
2826         FPRINTF(STDOUTFILE 
2827                 "     including input %.0f seconds (= %.1f minutes = %.1f hours)\n",
2828                         fulltime, fulltime/60., fulltime/3600.);
2829
2830
2831         /* free memory */
2832         memcleanup();
2833
2834
2835
2836         return 0;
2837 }
2838
2839
2840 /* compare function for uli - sort largest numbers first */
2841 int ulicmp(const void *ap, const void *bp)
2842 {
2843         uli a, b;
2844         
2845         a = *((uli *) ap);
2846         b = *((uli *) bp);
2847
2848         if (a > b) return -1;
2849         else if (a < b) return 1;
2850         else return 0;
2851 }
2852
2853 /* compare function for int - sort smallest numbers first */
2854 int intcmp(const void *ap, const void *bp)
2855 {
2856         int a, b;
2857         
2858         a = *((int *) ap);
2859         b = *((int *) bp);
2860
2861         if (a < b) return -1;
2862         else if (a > b) return 1;
2863         else return 0;
2864 }