initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_mod / src / puzzle2.c
1 /*
2  * puzzle2.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 /* Modified by Christian Zmasek to:
17    - Allow 8000 seqs (for pairwise dist. calc.).
18    - Names of 26 chars.
19    
20
21    !WARNING: Use ONLY together with FORESTER/RIO!
22    !For all other puposes download the excellent original!
23    
24    last modification: 05/19/01
25
26
27    void getsizesites(FILE *ifp):
28
29    257 -> 8000
30
31
32
33    void readid(FILE *infp, int t):
34
35    for (i = 0; i < 10; i++) {                   -> for (i = 0; i < 26; i++) {
36
37    for (i = 9; i > -1; i--) {                   -> for (i = 25; i > -1; i--) {
38
39    for (j = 0; (j < 10) && (flag == TRUE); j++) -> for (j = 0; (j < 26) && (flag == TRUE); j++)
40
41
42
43    void initid(int t):
44
45    Identif = new_cmatrix(t, 10);                -> Identif = new_cmatrix(t, 26);
46
47    for (j = 0; j < 10; j++)                     -> for (j = 0; j < 26; j++)
48
49
50
51    fputid10(FILE *ofp, int t):
52  
53    for (i = 0; i < 10; i++)                     -> for (i = 0; i < 26; i++)
54
55
56
57    int fputid(FILE *ofp, int t):
58
59    while (Identif[t][i] != ' ' && i < 10) {     -> while (Identif[t][i] != ' ' && i < 26) {
60
61
62
63
64 */
65
66 #define EXTERN extern
67
68 #include "puzzle.h"
69 #include <string.h>
70
71 #if PARALLEL
72 #       include "sched.h"
73 #endif /* PARALLEL */
74
75
76 /******************************************************************************/
77 /* sequences                                                                  */
78 /******************************************************************************/
79
80 /* read ten characters of current line as identifier */
81 void readid(FILE *infp, int t)
82 {
83         int i, j, flag, ci;
84
85         for (i = 0; i < 26; i++) { /* CZ 05/19/01 */
86                 ci = fgetc(infp);
87                 if (ci == EOF || !isprint(ci)) {
88                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1);
89                         exit(1);
90                 }
91                 Identif[t][i] = (char) ci;
92         }       
93         /* convert leading blanks in taxon name to underscores */
94         flag = FALSE;
95         for (i = 25; i > -1; i--) { /* CZ 05/19/01 */
96                 if (flag == FALSE) {
97                         if (Identif[t][i] != ' ') flag = TRUE; 
98                 } else {
99                         if (Identif[t][i] == ' ') Identif[t][i] = '_';
100                 }
101         }
102         /* check whether this name is already used */
103         for (i = 0; i < t; i++) { /* compare with all other taxa */
104                 flag = TRUE; /* assume identity */
105                 for (j = 0; (j < 26) && (flag == TRUE); j++) /* CZ 05/19/01 */
106                         if (Identif[t][j] != Identif[i][j])
107                                 flag = FALSE;
108                 if (flag) {
109                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence name '");
110                         fputid(STDOUT, t);
111                         FPRINTF(STDOUTFILE "')\n\n\n");
112                         exit(1);
113                 }
114         }
115 }
116
117 /* read next allowed character */
118 char readnextcharacter(FILE *ifp, int notu, int nsite)
119 {
120         char c;
121
122         /* ignore blanks and control characters except newline */
123         do {
124                 if (fscanf(ifp, "%c", &c) != 1) {
125                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
126                         fputid(STDOUT, notu);
127                         FPRINTF(STDOUTFILE "')\n\n\n");
128                         exit(1);
129                 }
130         } while (c == ' ' || (iscntrl((int) c) && c != '\n'));
131         return c;
132 }
133
134 /* skip rest of the line */
135 void skiprestofline(FILE* ifp, int notu, int nsite)
136 {
137         int ci;
138
139         /* read chars until the first newline */
140         do{
141                 ci = fgetc(ifp);
142                 if (ci == EOF) {
143                         FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1);
144                         fputid(STDOUT, notu);
145                         FPRINTF(STDOUTFILE "')\n\n\n");
146                         exit(1);
147                 }
148         } while ((char) ci != '\n');
149 }
150
151 /* skip control characters and blanks */
152 void skipcntrl(FILE *ifp, int notu, int nsite)
153 {
154         int ci;
155
156         /* read over all control characters and blanks */
157         do {
158                 ci = fgetc(ifp);
159                 if (ci == EOF) {
160                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
161                         fputid(STDOUT, notu);
162                         FPRINTF(STDOUTFILE "')\n\n\n");
163                         exit(1);
164                 }
165         } while (iscntrl(ci) || (char) ci == ' ');
166         /* go one character back */
167         if (ungetc(ci, ifp) == EOF) {
168                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1);
169                 fputid(STDOUT, notu);
170                 FPRINTF(STDOUTFILE "')\n\n\n");
171                 exit(1);
172         }
173 }
174
175 /* read sequences of one data set */
176 void getseqs(FILE *ifp)
177 {
178         int notu, nsite, endofline, linelength, i;
179         char c;
180         
181         seqchars = new_cmatrix(Maxspc, Maxseqc);
182         /* read all characters */
183         nsite = 0; /* next site to be read */
184         while (nsite < Maxseqc) {
185                 /* read first taxon */
186                 notu = 0;
187                 /* go to next true line */
188                 skiprestofline(ifp, notu, nsite);
189                 skipcntrl(ifp, notu, nsite);
190                 if (nsite == 0) readid(ifp, notu);
191                 endofline = FALSE;
192                 linelength = 0;         
193                 do {
194                         c = readnextcharacter(ifp, notu, nsite + linelength);
195                         if (c == '\n') endofline = TRUE;
196                         else if (c == '.') {
197                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position ");
198                                 FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1);
199                                 exit(1);
200                         } else if (nsite + linelength < Maxseqc) {
201                                 /* change to upper case */
202                                 seqchars[notu][nsite + linelength] = (char) toupper((int) c);
203                                 linelength++;
204                         } else {
205                                 endofline = TRUE;
206                                 skiprestofline(ifp, notu, nsite + linelength);
207                         }
208                 } while (!endofline);   
209                 if (linelength == 0) {
210                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1);
211                         fputid(STDOUT, notu);
212                         FPRINTF(STDOUTFILE "')\n\n\n");
213                         exit(1);
214                 }
215                 /* read other taxa */
216                 for (notu = 1; notu < Maxspc; notu++) {
217                         /* go to next true line */
218                         if (notu != 1) skiprestofline(ifp, notu, nsite);
219                         skipcntrl(ifp, notu, nsite);
220                         if (nsite == 0) readid(ifp, notu);
221                         for (i = nsite; i < nsite + linelength; i++) {
222                                 c = readnextcharacter(ifp, notu, i);
223                                 if (c == '\n') { /* too short */
224                                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1);
225                                         fputid(STDOUT, notu);
226                                         FPRINTF(STDOUTFILE "')\n\n\n");
227                                         exit(1);
228                                 } else if (c == '.') {
229                                         seqchars[notu][i] = seqchars[0][i];
230                                 } else {
231                                         /* change to upper case */
232                                         seqchars[notu][i] = (char) toupper((int) c);
233                                 }
234                         }
235                 }
236                 nsite = nsite + linelength;
237         }
238 }
239
240 /* initialize identifer array */
241 void initid(int t)
242 {
243         int i, j;
244
245         Identif = new_cmatrix(t, 26); /* CZ 05/19/01 */
246         for (i = 0; i < t; i++)
247                 for (j = 0; j < 26; j++) /* CZ 05/19/01 */
248                         Identif[i][j] = ' ';
249 }
250
251 /* print identifier of specified taxon in full 10 char length */
252 void fputid10(FILE *ofp, int t)
253 {       
254         int i;
255
256         for (i = 0; i < 26; i++) fputc(Identif[t][i], ofp); /* CZ 05/19/01 */
257 }
258
259 /* print identifier of specified taxon up to first space */
260 int fputid(FILE *ofp, int t)
261 {       
262         int i;
263         
264         i = 0;
265         while (Identif[t][i] != ' ' && i < 26) { /* CZ 05/19/01 */
266                 fputc(Identif[t][i], ofp);
267                 i++;
268         }
269         return i;
270 }
271
272 /* read first line of sequence data set */
273 void getsizesites(FILE *ifp)
274 {
275         if (fscanf(ifp, "%d", &Maxspc) != 1) {
276                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n");
277                         exit(1);
278         }
279         if (fscanf(ifp, "%d", &Maxseqc) != 1) {
280                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n");
281                         exit(1);
282         }
283         
284         if (Maxspc < 4) {
285                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n");
286                 exit(1);
287         }
288         if (Maxspc > 8000) { /* CZ 05/19/01 */
289                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 8000 sequences)\n\n\n");
290                 exit(1);
291         }
292         if (Maxseqc < 1) {
293                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n");
294                 exit(1);
295         }
296         Maxbrnch = 2*Maxspc - 3;
297 }
298
299 /* read one data set - PHYLIP interleaved */
300 void getdataset(FILE *ifp)
301 {
302         initid(Maxspc);
303         getseqs(ifp);
304 }
305
306 /* guess data type */
307 int guessdatatype()
308 {
309         uli numnucs, numchars, numbins;
310         int notu, nsite;
311         char c;
312         
313         /* count A, C, G, T, U, N */
314         numnucs = 0;
315         numchars = 0;
316         numbins = 0;
317         for (notu = 0; notu < Maxspc; notu++)
318                 for (nsite = 0; nsite < Maxseqc; nsite++) {
319                         c = seqchars[notu][nsite];
320                         if (c == 'A' || c == 'C' || c == 'G' ||
321                             c == 'T' || c == 'U' || c == 'N') numnucs++;
322                         if (c != '-' && c != '?') numchars++;
323                         if (c == '0' || c == '1') numbins++;
324                 }
325         if (numchars == 0) numchars = 1;
326         /* more than 85 % frequency means nucleotide data */
327         if ((double) numnucs / (double) numchars > 0.85) return 0;
328         else if ((double) numbins / (double) numchars > 0.2) return 2; 
329         else return 1;
330 }
331
332 /* translate characters into format used by ML engine */
333 void translatedataset()
334 {       
335         int notu, sn, co;
336         char c;
337         cvector code;
338         
339
340         /* determine Maxsite - number of ML sites per taxon */
341         if (data_optn == 0 && SH_optn) {
342                 if (SHcodon)
343                         Maxsite = Maxseqc / 3;
344                 else
345                         Maxsite = Maxseqc / 2; /* assume doublets */
346                 
347         } else
348                 Maxsite = Maxseqc;
349         if (data_optn == 0 && (Maxsite % 3) == 0  && !SH_optn) {        
350                 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
351                         Maxsite = Maxsite / 3; /* only one of the three codon positions */
352                 if (codon_optn == 4)
353                         Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */
354         }
355         
356         /* reserve memory */
357         if (Seqchar != NULL) free_cmatrix(Seqchar);
358         Seqchar = new_cmatrix(Maxspc, Maxsite);
359
360         /* code length */
361         if (data_optn == 0 && SH_optn)
362                 code = new_cvector(2);
363         else
364                 code = new_cvector(1);
365         
366         /* decode characters */
367         if (data_optn == 0 && SH_optn) { /* SH doublets */
368                 
369                 for (notu = 0; notu < Maxspc; notu++) {
370                         for (sn = 0; sn < Maxsite; sn++) {
371                                         for (co = 0; co < 2; co++) {
372                                                 if (SHcodon)
373                                                         c = seqchars[notu][sn*3 + co];
374                                                 else
375                                                         c = seqchars[notu][sn*2 + co];
376                                                 code[co] = c;
377                                         }
378                                 Seqchar[notu][sn] = code2int(code);
379                         }
380                 }
381                 
382         } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */
383
384                 for (notu = 0; notu < Maxspc; notu++) {
385                         for (sn = 0; sn < Maxsite; sn++) {
386                                 code[0] = seqchars[notu][sn];
387                                 Seqchar[notu][sn] = code2int(code);
388                         }
389                 }
390
391         } else { /* codons */
392                 
393                 for (notu = 0; notu < Maxspc; notu++) {
394                         for (sn = 0; sn < Maxsite; sn++) {
395                                 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
396                                         code[0] = seqchars[notu][sn*3+codon_optn-1];
397                                 else if (codon_optn == 4) {
398                                         if ((sn % 2) == 0)
399                                                 code[0] = seqchars[notu][(sn/2)*3];
400                                         else
401                                                 code[0] = seqchars[notu][((sn-1)/2)*3+1];
402                                 } else
403                                         code[0] = seqchars[notu][sn];
404                                 Seqchar[notu][sn] = code2int(code);
405                         }
406                 }
407         
408         }
409         free_cvector(code);
410 }
411
412 /* estimate mean base frequencies from translated data set */
413 void estimatebasefreqs()
414 {
415         int tpmradix, i, j;
416         uli all, *gene;
417         
418         tpmradix = gettpmradix();
419         
420         if (Freqtpm != NULL) free_dvector(Freqtpm);
421         Freqtpm = new_dvector(tpmradix);
422         
423         if (Basecomp != NULL) free_imatrix(Basecomp);
424         Basecomp = new_imatrix(Maxspc, tpmradix);
425         
426         gene = (uli *) malloc((unsigned) ((tpmradix + 1) * sizeof(uli)));
427         if (gene == NULL) maerror("gene in estimatebasefreqs");
428         
429         for (i = 0; i < tpmradix + 1; i++) gene[i] = 0;
430         for (i = 0; i < Maxspc; i++)
431                 for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0;
432         for (i = 0; i < Maxspc; i++)
433                 for (j = 0; j < Maxsite; j++) {
434                         gene[(int) Seqchar[i][j]]++;
435                         if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++;
436                         }
437
438         all = Maxspc * Maxsite - gene[tpmradix];
439         if (all != 0) { /* normal case */
440                 for (i = 0; i < tpmradix; i++)
441                         Freqtpm[i] = (double) gene[i] / (double) all;
442         } else { /* pathological case with no unique character in data set */
443                 for (i = 0; i < tpmradix; i++)
444                         Freqtpm[i] = 1.0 / (double) tpmradix;
445         }
446         
447         free(gene);
448         
449         Frequ_optn = TRUE;
450 }
451
452 /* guess model of substitution */
453 void guessmodel()
454 {
455         double c1, c2, c3, c4, c5, c6;
456         dvector f;
457         dmatrix a;
458         int i;
459
460         Dayhf_optn = FALSE;
461         Jtt_optn = TRUE;
462         mtrev_optn = FALSE;
463         cprev_optn = FALSE;
464         blosum62_optn = FALSE;
465         vtmv_optn = FALSE;
466         wag_optn = FALSE;
467         TSparam = 2.0;
468         YRparam = 1.0;
469         optim_optn = TRUE;
470         HKY_optn = TRUE;
471         TN_optn = FALSE;
472         
473         if (data_optn == 1) { /* amino acids */
474                 
475                 /* chi2 fit to amino acid frequencies */
476                 
477                 f = new_dvector(20);
478                 a = new_dmatrix(20,20);
479                 /* chi2 distance Dayhoff */
480                 dyhfdata(a, f);
481                 c1 = 0;
482                 for (i = 0; i < 20; i++)
483                         c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
484                 /* chi2 distance JTT */
485                 jttdata(a, f);
486                 c2 = 0;
487                 for (i = 0; i < 20; i++)
488                         c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
489                 /* chi2 distance mtREV */
490                 mtrevdata(a, f);
491                 c3 = 0;
492                 for (i = 0; i < 20; i++)
493                         c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
494                 /* chi2 distance VT */
495                 vtmvdata(a, f);
496                 c4 = 0;
497                 for (i = 0; i < 20; i++)
498                         c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
499                 /* chi2 distance WAG */
500                 wagdata(a, f);
501                 c5 = 0;
502                 for (i = 0; i < 20; i++)
503                         c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
504                 /* chi2 distance cpREV */
505                 cprev45data(a, f);
506                 c6 = 0;
507                 for (i = 0; i < 20; i++)
508                         c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
509
510                 free_dvector(f);
511                 free_dmatrix(a);
512
513 #ifndef CPREV
514                 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) {
515                         /* c1 -> Dayhoff */
516                         Dayhf_optn = TRUE;
517                         Jtt_optn = FALSE;
518                         mtrev_optn = FALSE;
519                         cprev_optn = FALSE;
520                         vtmv_optn = FALSE;
521                         wag_optn = FALSE;
522                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
523                 } else {
524                         if ((c2 < c3) && (c2 < c4) && (c2 < c5)) {
525                                 /* c2 -> JTT */
526                                 Dayhf_optn = FALSE;
527                                 Jtt_optn = TRUE;
528                                 mtrev_optn = FALSE;
529                                 cprev_optn = FALSE;
530                                 vtmv_optn = FALSE;
531                                 wag_optn = FALSE;
532                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
533                         } else {
534                                 if ((c3 < c4) && (c3 < c5)) {
535                                         /* c3 -> mtREV */
536                                         Dayhf_optn = FALSE;
537                                         Jtt_optn = FALSE;
538                                         mtrev_optn = TRUE;
539                                         cprev_optn = FALSE;
540                                         vtmv_optn = FALSE;
541                                         wag_optn = FALSE;
542                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
543                                 } else {
544                                         if ((c4 < c5)) {
545                                                 /* c4 -> VT */
546                                                 Dayhf_optn = FALSE;
547                                                 Jtt_optn = FALSE;
548                                                 mtrev_optn = FALSE;
549                                                 cprev_optn = FALSE;
550                                                 vtmv_optn = TRUE;
551                                                 wag_optn = FALSE;
552                                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
553                                         } else {
554                                                         /* c5 -> WAG */
555                                                         Dayhf_optn = FALSE;
556                                                         Jtt_optn = FALSE;
557                                                         mtrev_optn = FALSE;
558                                                         cprev_optn = FALSE;
559                                                         vtmv_optn = FALSE;
560                                                         wag_optn = TRUE;
561                                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
562                                         } /* if c4 else c5 */
563                                 } /* if c3 else c4 */
564                         } /* if c2 */
565                 } /* if c1 */
566
567 #else /* CPREV */
568
569                 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) {
570                         /* c1 -> Dayhoff */
571                         Dayhf_optn = TRUE;
572                         Jtt_optn = FALSE;
573                         mtrev_optn = FALSE;
574                         cprev_optn = FALSE;
575                         vtmv_optn = FALSE;
576                         wag_optn = FALSE;
577                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
578                 } else {
579                         if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) {
580                                 /* c2 -> JTT */
581                                 Dayhf_optn = FALSE;
582                                 Jtt_optn = TRUE;
583                                 mtrev_optn = FALSE;
584                                 cprev_optn = FALSE;
585                                 vtmv_optn = FALSE;
586                                 wag_optn = FALSE;
587                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
588                         } else {
589                                 if ((c3 < c4) && (c3 < c5) && (c3 < c6)) {
590                                         /* c3 -> mtREV */
591                                         Dayhf_optn = FALSE;
592                                         Jtt_optn = FALSE;
593                                         mtrev_optn = TRUE;
594                                         cprev_optn = FALSE;
595                                         vtmv_optn = FALSE;
596                                         wag_optn = FALSE;
597                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
598                                 } else {
599                                         if ((c4 < c5) && (c4 < c6)) {
600                                                 /* c4 -> VT */
601                                                 Dayhf_optn = FALSE;
602                                                 Jtt_optn = FALSE;
603                                                 mtrev_optn = FALSE;
604                                                 cprev_optn = FALSE;
605                                                 vtmv_optn = TRUE;
606                                                 wag_optn = FALSE;
607                                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
608                                         } else {
609                                                 if (c5 < c6) {
610                                                         /* c5 -> WAG */
611                                                         Dayhf_optn = FALSE;
612                                                         Jtt_optn = FALSE;
613                                                         mtrev_optn = FALSE;
614                                                         cprev_optn = FALSE;
615                                                         vtmv_optn = FALSE;
616                                                         wag_optn = TRUE;
617                                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
618                                                 } else {
619                                                         /* if (c6) */
620                                                         /* c6 -> cpREV */
621                                                         Dayhf_optn = FALSE;
622                                                         Jtt_optn = FALSE;
623                                                         mtrev_optn = FALSE;
624                                                         cprev_optn = TRUE;
625                                                         vtmv_optn = FALSE;
626                                                         wag_optn = FALSE;
627                                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n");
628                                                 } /* if c5 else c6 */
629                                         } /* if c4 else c5 */
630                                 } /* if c3 else c4 */
631                         } /* if c2 */
632                 } /* if c1 */
633 #endif /* CPREV */
634
635         } else if (data_optn == 0) {
636                 FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n");
637         } else {
638                 FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n");
639         }
640 } /* guessmodel */
641
642
643 /******************************************************************************/
644 /* functions for representing and building puzzling step trees                */
645 /******************************************************************************/
646
647 /* initialize tree with the following starting configuration
648
649                  2
650           0  +------- C(=2)
651   A(=0) -----+
652              +------- B(=1)
653                  1
654                                 */
655 void inittree()
656 {
657         int i;
658
659         /* allocate the memory for the whole tree */
660         
661         /* allocate memory for vector with all the edges of the tree */
662         edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) );
663         if (edge == NULL) maerror("edge in inittree");
664
665         /* allocate memory for vector with edge numbers of leaves */ 
666         edgeofleaf = (int *) calloc(Maxspc, sizeof(int) );
667         if (edgeofleaf == NULL) maerror("edgeofleaf in inittree");
668         
669         /* allocate memory for all the edges the edge map */
670         for (i = 0; i < Maxbrnch; i++) {
671                 edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) );
672                 if (edge[i].edgemap == NULL) maerror("edgemap in inittree");
673         }
674                 
675         /* number all edges */
676         for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i;
677         
678         /* initialize tree */
679         
680         nextedge = 3;
681         nextleaf = 3;
682         
683         /* edge maps */
684         (edge[0].edgemap)[0] = 0; /* you are on the right edge */
685         (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */
686         (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */
687         (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */
688         (edge[1].edgemap)[1] = 0; /* you are on the right edge */
689         (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */
690         (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */
691         (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */
692         (edge[2].edgemap)[2] = 0; /* you are on the right edge */
693         
694         /* interconnection */
695         edge[0].up = NULL;
696         edge[0].downleft = &edge[1];
697         edge[0].downright = &edge[2];
698         edge[1].up = &edge[0];
699         edge[1].downleft = NULL;
700         edge[1].downright = NULL;
701         edge[2].up = &edge[0];
702         edge[2].downleft = NULL;
703         edge[2].downright = NULL;
704         
705         /* edges of leaves */
706         edgeofleaf[0] = 0;
707         edgeofleaf[1] = 1;
708         edgeofleaf[2] = 2;
709 } /* inittree */
710
711 /* add next leaf on the specified edge */
712 void addnextleaf(int dockedge)
713 {       
714         int i;
715
716         if (dockedge >= nextedge) {
717                 /* Trying to add leaf nextleaf to nonexisting edge dockedge */
718                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n");
719                 exit(1);
720         }
721         
722         if (nextleaf >= Maxspc) {
723                 /* Trying to add leaf nextleaf to a tree with Maxspc leaves */
724                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n");
725                 exit(1);
726         }
727                 
728         /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */
729         if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge;
730         
731         /* adding nextedge to the tree */
732         edge[nextedge].up = edge[dockedge].up;
733         edge[nextedge].downleft = &edge[dockedge];
734         edge[nextedge].downright = &edge[nextedge+1];
735         edge[dockedge].up = &edge[nextedge];
736
737         if (edge[nextedge].up != NULL) {
738                 if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] ) 
739                         (edge[nextedge].up)->downleft  = &edge[nextedge];
740                 else   
741                         (edge[nextedge].up)->downright = &edge[nextedge];
742         }
743
744         /* adding nextedge + 1 to the tree */
745         edge[nextedge+1].up = &edge[nextedge];
746         edge[nextedge+1].downleft = NULL;
747         edge[nextedge+1].downright = NULL;
748         edgeofleaf[nextleaf] = nextedge+1;
749         
750         /* the two new edges get info about the old edges */
751         /* nextedge */
752         for (i = 0; i < nextedge; i++) {
753                 switch ( (edge[dockedge].edgemap)[i] ) {
754                         
755                         /* down right changes to down left */
756                         case 5:         (edge[nextedge].edgemap)[i] = 4;
757                                                 break;
758                                                 
759                         /* null changes to down left */
760                         case 0:         (edge[nextedge].edgemap)[i] = 4;
761                                                 break;
762                         
763                         default:        (edge[nextedge].edgemap)[i] =
764                                                         (edge[dockedge].edgemap)[i];
765                                                 break;
766                 }
767         }
768
769         /* nextedge + 1 */
770         for (i = 0; i < nextedge; i++) {
771                 switch ( (edge[dockedge].edgemap)[i] ) {
772                         
773                         /* up/down left changes to up */
774                         case 2:         (edge[nextedge+1].edgemap)[i] = 1;
775                                                 break;
776                         
777                         /* up/down right changes to up */               
778                         case 3:         (edge[nextedge+1].edgemap)[i] = 1;
779                                                 break;
780                                                 
781                         /* down left changes to up/down left */                 
782                         case 4:         (edge[nextedge+1].edgemap)[i] = 2;
783                                                 break;
784
785                         /* down right changes to up/down left */        
786                         case 5:         (edge[nextedge+1].edgemap)[i] = 2;
787                                                 break;
788                         
789                         /* null changes to up/down left */
790                         case 0:         (edge[nextedge+1].edgemap)[i] = 2;
791                                                 break;
792                         
793                         /* up stays up */
794                         default:        (edge[nextedge+1].edgemap)[i] =
795                                                         (edge[dockedge].edgemap)[i];
796                                                 break;
797                 }
798         }
799
800         /* dockedge */
801         for (i = 0; i < nextedge; i++) {
802                 switch ( (edge[dockedge].edgemap)[i] ) {
803                         
804                         /* up/down right changes to up */
805                         case 3:         (edge[dockedge].edgemap)[i] = 1;
806                                                 break;
807                                                 
808                         /* up/down left changes to up */
809                         case 2:         (edge[dockedge].edgemap)[i] = 1;
810                                                 break;
811                         
812                         default:        break;
813                 }
814         }
815
816         /* all edgemaps are updated for the two new edges */
817         /* nextedge */
818         (edge[nextedge].edgemap)[nextedge] = 0;
819         (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */
820         
821         /* nextedge + 1 */
822         (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */
823         (edge[nextedge+1].edgemap)[nextedge+1] = 0;
824         
825         /* all other edges */
826         for (i = 0; i < nextedge; i++) {
827                 (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge];
828                 (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge];
829         }
830         
831         /* an extra for dockedge */
832         (edge[dockedge].edgemap)[nextedge] = 1; /* up */
833         (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */
834
835         nextleaf++;
836         nextedge = nextedge + 2;
837 } /* addnextleaf */
838
839
840 /* free memory (to be called after inittree) */
841 void freetree()
842 {
843         int i;
844
845         for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap);
846         free(edge);
847         free(edgeofleaf);       
848 } /* freetree */
849
850 /* writes OTU sitting on edge ed */
851 void writeOTU(FILE *outfp, int ed)
852 {       
853         int i;
854
855         /* test whether we are on a leaf */
856         if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
857                 for (i = 1; i < nextleaf; i++) {
858                         if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
859                                 column += fputid(outfp, trueID[i]);
860                                 return;
861                         }
862                 }
863         }
864
865         /* we are NOT on a leaf */
866         fprintf(outfp, "(");
867         column++;
868         writeOTU(outfp, edge[ed].downleft->numedge);
869         fprintf(outfp, ",");
870         column++;
871         column++;
872         if (column > 55) {
873                 column = 2;
874                 fprintf(outfp, "\n  ");
875         }
876         writeOTU(outfp, edge[ed].downright->numedge);   
877         fprintf(outfp, ")");
878         column++;
879 } /* writeOTU */
880
881 /* write tree */
882 void writetree(FILE *outfp)
883 {       
884         column = 1;
885         fprintf(outfp, "(");
886         column += fputid(outfp, trueID[0]) + 3;
887         fprintf(outfp, ",");
888         writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge);
889         column++;
890         column++;
891         fprintf(outfp, ",");
892         writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge);        
893         fprintf(outfp, ");\n"); 
894 } /* writetree */
895
896
897 /* clear all edgeinfos */
898 void resetedgeinfo()
899 {
900         int i;
901         
902         for (i = 0; i < nextedge; i++)
903                 edge[i].edgeinfo = 0;
904 } /* resetedgeinfo */
905
906 /* increment all edgeinfo between leaf A and B */
907 void incrementedgeinfo(int A, int B)
908 {       
909         int curredge, finaledge, nextstep;
910         
911         if (A == B) return;
912         
913         finaledge = edgeofleaf[B];
914         
915         curredge = edgeofleaf[A];
916         edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
917         
918         while (curredge != finaledge) {
919                 nextstep = (edge[curredge].edgemap)[finaledge];
920                 switch (nextstep) {
921
922                         /* up */
923                         case 1: curredge = (edge[curredge].up)->numedge;
924                                         break;
925                                 
926                         /* up/down left */
927                         case 2: curredge = ((edge[curredge].up)->downleft)->numedge;
928                                         break;
929
930                         /* up/down right */
931                         case 3: curredge = ((edge[curredge].up)->downright)->numedge;
932                                         break;
933                                 
934                         /* down left */
935                         case 4: curredge = (edge[curredge].downleft)->numedge;
936                                         break;
937                                 
938                         /* down right */
939                         case 5: curredge = (edge[curredge].downright)->numedge;
940                                         break;
941                                 
942                 }
943                 edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
944         }       
945 } /* incrementedgeinfo */
946
947 /* checks which edge has the lowest edgeinfo
948    if there are several edges with the same lowest edgeinfo,
949    one of them will be selected randomly */
950 void minimumedgeinfo()
951 {
952         int i, k, howmany, randomnum;
953
954         howmany = 1;
955         minedge = 0;
956         mininfo = edge[0].edgeinfo;
957         for (i = 1; i < nextedge; i++)
958                 if (edge[i].edgeinfo <= mininfo) {
959                         if (edge[i].edgeinfo == mininfo) {
960                                 howmany++;
961                         } else {
962                                 minedge = i;
963                                 mininfo = edge[i].edgeinfo;
964                                 howmany = 1;
965                         }
966                 }
967         
968         if (howmany > 1) { /* draw random edge */
969                 randomnum = randominteger(howmany) + 1; /* 1 to howmany */
970                 i = -1;
971                 for (k = 0; k < randomnum; k++) {
972                         do {
973                                 i++;
974                         } while (edge[i].edgeinfo != mininfo);
975                         minedge = i;
976                 }
977         }
978 } /* minimumedgeinfo */
979
980
981
982
983 /*******************************************/
984 /* tree sorting                            */
985 /*******************************************/
986
987 /* compute address of the 4 int (sort key) in the 4 int node */
988 int ct_sortkeyaddr(int addr)
989 {
990   int a, res;
991   a = addr % 4;
992   res = addr - a + 3;
993   return res;
994 }
995
996
997 /**********/
998
999 /* compute address of the next edge pointer in a 4 int node (0->1->2->0) */
1000 int ct_nextedgeaddr(int addr)
1001 {
1002   int a, res;
1003   a = addr % 4;
1004   if ( a == 2 ) { res = addr - 2; }
1005   else          { res = addr + 1; }
1006   return res;
1007 }
1008
1009
1010 /**********/
1011
1012 /* compute address of 1st edge of a 4 int node from node number */
1013 int ct_1stedge(int node)
1014 {
1015   int res;
1016   res = 4 * node;
1017   return res;
1018 }
1019
1020
1021 /**********/
1022
1023 /* compute address of 2nd edge of a 4 int node from node number */
1024 int ct_2ndedge(int node)
1025 {
1026   int res;
1027   res = 4 * node +1;
1028   return res;
1029 }
1030
1031
1032 /**********/
1033
1034 /* compute address of 3rd edge of a 4 int node from node number */
1035 int ct_3rdedge(int node)
1036 {
1037   int res;
1038   res = 4 * node +2;
1039   return res;
1040 }
1041
1042
1043 /**********/
1044
1045 /* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */
1046 int ct_isleaf(int node, int *ctree)
1047 {
1048   return (ctree[ct_3rdedge(node)] < 0);
1049 }
1050
1051
1052 /**********/
1053
1054 /* compute node number of 4 int node from an edge addr. */
1055 int ct_addr2node(int addr)
1056 {
1057   int a, res;
1058   a = addr % 4;
1059   res = (int) ((addr - a) / 4);
1060   return res;
1061 }
1062
1063
1064 /**********/
1065
1066 /* print graph pointers for checking */
1067 void printctree(int *ctree)
1068 {
1069         int n;
1070         for (n=0; n < 2*Maxspc; n++) {
1071                 printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n,
1072                 (int) ctree[ct_1stedge(n)]/4,
1073                 (int) ctree[ct_1stedge(n)]%4,
1074                 (int) ctree[ct_2ndedge(n)]/4,
1075                 (int) ctree[ct_2ndedge(n)]%4,
1076                 (int) ctree[ct_3rdedge(n)]/4,
1077                 (int) ctree[ct_3rdedge(n)]%4,
1078                 ctree[ct_3rdedge(n)+1]);
1079         }
1080         printf("\n");
1081 } /* printctree */
1082
1083
1084 /**********/
1085
1086 /* allocate memory for ctree 3 ints pointer plus 1 check byte */
1087 int *initctree()
1088 {
1089   int *snodes;
1090   int n;
1091
1092   snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int));
1093   if (snodes == NULL) maerror("snodes in copytree");
1094
1095   for (n=0; n<(4 * 2 * Maxspc); n++) {
1096       snodes[n]=-1;
1097   }
1098   return snodes;
1099 }
1100
1101
1102 /**********/
1103
1104 /* free memory of a tree for sorting */
1105 void freectree(int **snodes)
1106 {
1107         free(*snodes);
1108         *snodes = NULL;
1109 }
1110
1111
1112 /**********/
1113
1114 /* copy subtree recursively */
1115 void copyOTU(int *ctree,                  /* tree array struct            */
1116              int *ct_nextnode,            /* next free node               */
1117              int ct_curredge,             /* currende edge to add subtree */
1118              int *ct_nextleaf,            /* next free leaf (0-maxspc)    */
1119              int ed)                      /* edge in puzzling step tree   */
1120 {
1121         int i, nextcurredge;
1122
1123         /* test whether we are on a leaf */
1124         if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
1125                 for (i = 1; i < nextleaf; i++) {
1126                         if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
1127                                 nextcurredge          = ct_1stedge(*ct_nextleaf);
1128                                 ctree[ct_curredge]    = nextcurredge;
1129                                 ctree[nextcurredge]   = ct_curredge;
1130                                 ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i];
1131                                 (*ct_nextleaf)++;
1132                                 return;
1133                         }
1134                 }
1135         }
1136
1137         /* we are NOT on a leaf */
1138         nextcurredge        = ct_1stedge(*ct_nextnode);
1139         ctree[ct_curredge]     = nextcurredge;
1140         ctree[nextcurredge] = ct_curredge;
1141         (*ct_nextnode)++;
1142         nextcurredge = ct_nextedgeaddr(nextcurredge);
1143         copyOTU(ctree, ct_nextnode, nextcurredge, 
1144                 ct_nextleaf, edge[ed].downleft->numedge);
1145
1146         nextcurredge = ct_nextedgeaddr(nextcurredge);
1147         copyOTU(ctree, ct_nextnode, nextcurredge, 
1148                 ct_nextleaf, edge[ed].downright->numedge);
1149 }
1150
1151
1152 /**********/
1153
1154 /* copy treestructure to sorting structure */
1155 void copytree(int *ctree)
1156 {
1157         int ct_curredge;
1158         int ct_nextleaf;
1159         int ct_nextnode;
1160
1161         ct_nextnode = Maxspc;
1162         ct_curredge = ct_1stedge(ct_nextnode);
1163         ct_nextleaf = 1;
1164
1165         ctree[ct_1stedge(0)] = ct_curredge;
1166         ctree[ct_curredge]   = ct_1stedge(0);
1167         ctree[ct_sortkeyaddr(0)] = trueID[0];
1168
1169         ct_nextnode++;
1170         
1171         ct_curredge = ct_nextedgeaddr(ct_curredge);
1172         copyOTU(ctree, &ct_nextnode, ct_curredge, 
1173                 &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge);
1174
1175         ct_curredge = ct_nextedgeaddr(ct_curredge);
1176         copyOTU(ctree, &ct_nextnode, ct_curredge, 
1177                 &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge);
1178 }
1179
1180
1181 /**********/
1182
1183 /* sort subtree from edge recursively by indices */
1184 int sortOTU(int edge, int *ctree)
1185 {
1186         int key1, key2;
1187         int edge1, edge2;
1188         int tempedge;
1189
1190         if (ctree[ct_2ndedge((int) (edge / 4))] < 0)
1191                 return ctree[ct_sortkeyaddr(edge)];
1192
1193         edge1 = ctree[ct_nextedgeaddr(edge)];
1194         edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1195
1196         /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */
1197         /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n", 
1198            (int)(edge/4), edge%4, (int)(edge1/4), edge1%4, 
1199            (int)(edge2/4), edge2%4); */
1200
1201         key1  = sortOTU(edge1, ctree); 
1202         key2  = sortOTU(edge2, ctree); 
1203         
1204         if (key2 < key1) {
1205                 tempedge            = ctree[ctree[edge1]];
1206                 ctree[ctree[edge1]] = ctree[ctree[edge2]];
1207                 ctree[ctree[edge2]] = tempedge;
1208                 tempedge            = ctree[edge1];
1209                 ctree[edge1]        = ctree[edge2];
1210                 ctree[edge2]        = tempedge;
1211                 ctree[ct_sortkeyaddr(edge)] = key2;
1212                 
1213         } else {
1214           ctree[ct_sortkeyaddr(edge)] = key1;
1215         }
1216         return ctree[ct_sortkeyaddr(edge)];
1217 }
1218
1219
1220 /**********/
1221
1222 /* sort ctree recursively by indices */
1223 int sortctree(int *ctree)
1224 {
1225         int n, startnode=-1;
1226         for(n=0; n<Maxspc; n++) {
1227                 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1228                         startnode = n;
1229         }
1230         sortOTU(ctree[startnode * 4], ctree);
1231         return startnode;
1232 }
1233
1234
1235 /**********/
1236
1237 /* print recursively subtree of edge of sorted tree ctree */
1238 void printfsortOTU(int edge, int *ctree)
1239 {
1240         int edge1, edge2;
1241
1242         if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1243                 printf("%d", ctree[ct_sortkeyaddr(edge)]);
1244                 return;
1245         }
1246
1247         edge1 = ctree[ct_nextedgeaddr(edge)];
1248         edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1249
1250         printf("(");
1251         printfsortOTU(edge1, ctree); 
1252         printf(",");
1253         printfsortOTU(edge2, ctree); 
1254         printf(")");
1255
1256 }
1257
1258
1259 /**********/
1260
1261 /* print recursively sorted tree ctree */
1262 int printfsortctree(int *ctree)
1263 {
1264         int n, startnode=-1;
1265         for(n=0; n<Maxspc; n++) {
1266                 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1267                         startnode = n;
1268         }
1269         printf ("(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1270         printfsortOTU(ctree[startnode * 4], ctree);
1271         printf (");\n");
1272         return startnode;
1273 }
1274
1275
1276 /**********/
1277
1278 /* print recursively subtree of edge of sorted tree ctree to string */
1279 void sprintfOTU(char *str, int *len, int edge, int *ctree)
1280 {
1281         int edge1, edge2;
1282
1283         if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1284                 *len+=sprintf(&(str[*len]), "%d", ctree[ct_sortkeyaddr(edge)]);
1285                 return;
1286         }
1287
1288         edge1 = ctree[ct_nextedgeaddr(edge)];
1289         edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1290
1291         sprintf(&(str[*len]), "(");
1292         (*len)++;
1293         sprintfOTU(str, len, edge1, ctree); 
1294         sprintf(&(str[*len]), ",");
1295         (*len)++;
1296         sprintfOTU(str, len, edge2, ctree); 
1297         sprintf(&(str[*len]), ")");
1298         (*len)++;
1299 }
1300
1301 /**********/
1302
1303 /* print recursively sorted tree ctree to string */
1304 char *sprintfctree(int *ctree, int strglen)
1305 {
1306         char *treestr,
1307              *tmpptr;
1308         int n,
1309             len=0,
1310             startnode=-1;
1311         treestr = (char *) malloc(strglen * sizeof(char));
1312         tmpptr  = treestr;
1313         for(n=0; n<Maxspc; n++) {
1314                 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1315                         startnode = n;
1316         }
1317         len+=sprintf (&(tmpptr[len]), "(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1318         sprintfOTU(tmpptr, &len, ctree[startnode * 4], ctree);
1319         len+=sprintf (&(tmpptr[len]), ");");
1320         return treestr;
1321 }
1322
1323
1324 /**********/
1325
1326
1327 /***********************************************/
1328 /* establish and handle a list of sorted trees */
1329 /***********************************************/
1330
1331 int itemcount;
1332
1333 /* initialize structure */
1334 treelistitemtype *inittreelist(int *treenum)
1335 {
1336         *treenum = 0;
1337         return    NULL;
1338 }
1339
1340
1341 /**********/
1342
1343 /* malloc new tree list item */
1344 treelistitemtype *gettreelistitem()
1345 {
1346         treelistitemtype *tmpptr;
1347         tmpptr = (treelistitemtype *)malloc(sizeof(treelistitemtype));
1348         if (tmpptr == NULL) maerror("item of intermediate tree stuctures");
1349         (*tmpptr).pred = NULL;
1350         (*tmpptr).succ = NULL;
1351         (*tmpptr).tree = NULL;
1352         (*tmpptr).count = 0;
1353         (*tmpptr).idx = itemcount++;
1354         return tmpptr;
1355 }
1356
1357 /**********/
1358
1359 /* free whole tree list */
1360 void freetreelist(treelistitemtype **list,
1361                   int               *numitems,
1362                   int               *numsum)
1363 {
1364         treelistitemtype *current; 
1365         treelistitemtype *next;
1366         current = *list;
1367         while (current != NULL) {
1368                 next = (*current).succ;
1369                 if ((*current).tree != NULL) {
1370                         free ((*current).tree);
1371                         (*current).tree = NULL;
1372                 }
1373                 free(current);
1374                 current = next;
1375         }
1376         *list = NULL;
1377         *numitems = 0;
1378         *numsum = 0;
1379 } /* freetreelist */
1380
1381
1382 /**********/
1383
1384 /* add tree to the tree list */
1385 treelistitemtype *addtree2list(char             **tree,         /* sorted tree string */
1386                                int                numtrees,     /* how many occurred, e.g. in parallel */
1387                                treelistitemtype **list,         /* addr. of tree list */
1388                                int               *numitems,     
1389                                int               *numsum)
1390 {
1391         treelistitemtype *tmpptr = NULL;
1392         treelistitemtype *newptr = NULL;
1393         int               result;
1394         int               done = 0;
1395
1396         if ((*list == NULL) || (numitems == 0)) {
1397                 newptr = gettreelistitem();
1398                 (*newptr).tree = *tree; 
1399                 *tree = NULL;
1400                 (*newptr).id    = *numitems;
1401                 (*newptr).count = numtrees;
1402                 *numitems = 1;
1403                 *numsum   = numtrees;
1404                 *list = newptr;
1405         } else {
1406                 tmpptr = *list;
1407                 while(done == 0) {
1408                         result = strcmp( (*tmpptr).tree, *tree);
1409                         if (result==0) {
1410                                 free(*tree); *tree = NULL;
1411                                 (*tmpptr).count += numtrees;
1412                                 *numsum += numtrees;
1413                                 done = 1;
1414                                 newptr = tmpptr;
1415                         } else { if (result < 0) {
1416                                         if ((*tmpptr).succ != NULL)
1417                                                 tmpptr = (*tmpptr).succ;
1418                                         else {
1419                                                 newptr = gettreelistitem();
1420                                                 (*newptr).tree = *tree; 
1421                                                 *tree = NULL;
1422                                                 (*newptr).id    = *numitems;
1423                                                 (*newptr).count = numtrees;
1424                                                 (*newptr).pred  = tmpptr;
1425                                                 (*tmpptr).succ  = newptr;
1426                                                 (*numitems)++;
1427                                                 *numsum += numtrees;
1428                                                 done = 1;
1429                                         }
1430                         } else { /* result < 0 */
1431                                 newptr = gettreelistitem();
1432                                 (*newptr).tree = *tree; 
1433                                 *tree = NULL;
1434                                 (*newptr).id    = *numitems;
1435                                 (*newptr).count = numtrees;
1436                                 (*newptr).succ  = tmpptr;
1437                                 (*newptr).pred  = (*tmpptr).pred;
1438                                 (*tmpptr).pred  = newptr;
1439                                 *numsum += numtrees;
1440
1441                                 if ((*newptr).pred != NULL) {
1442                                    (*(*newptr).pred).succ = newptr;
1443                                 } else {
1444                                    *list = newptr;
1445                                 }
1446                                 (*numitems)++;
1447                                 done = 1;
1448                         } /* end if result < 0 */
1449                         } /* end if result != 0 */
1450                 } /* while  searching in list */
1451         } /* if list empty, else */
1452         return (newptr);
1453 } /* addtree2list */
1454
1455
1456 /**********/
1457
1458 /* resort list of trees by number of occurences for output */
1459 void sortbynum(treelistitemtype *list, treelistitemtype **sortlist)
1460 {
1461         treelistitemtype *tmpptr = NULL;
1462         treelistitemtype *curr = NULL;
1463         treelistitemtype *next = NULL;
1464         int xchange = 1;
1465
1466         if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1467         tmpptr = list;
1468         *sortlist = list;
1469         while (tmpptr != NULL) {
1470                 (*tmpptr).sortnext = (*tmpptr).succ;
1471                 (*tmpptr).sortlast = (*tmpptr).pred;
1472                 tmpptr = (*tmpptr).succ;
1473         }
1474
1475         while (xchange > 0) {
1476                 curr = *sortlist;
1477                 xchange = 0;
1478                 if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1479                 while((*curr).sortnext != NULL) {
1480                         next = (*curr).sortnext;
1481                         if ((*curr).count >= (*next).count)
1482                                 curr = (*curr).sortnext;
1483                         else {
1484                                 if ((*curr).sortlast != NULL)
1485                                         (*((*curr).sortlast)).sortnext = next;
1486                                 if (*sortlist == curr)
1487                                         *sortlist = next;
1488                                 (*next).sortlast = (*curr).sortlast;
1489
1490                                 if ((*next).sortnext != NULL)
1491                                         (*((*next).sortnext)).sortlast = curr;
1492                                 (*curr).sortnext = (*next).sortnext;
1493
1494                                 (*curr).sortlast = next;
1495                                 (*next).sortnext = curr;
1496
1497                                 xchange++;
1498                         }
1499                 }
1500         }
1501 }  /* sortbynum */
1502
1503
1504 /**********/
1505
1506 /* print puzzling step tree stuctures for checking */
1507 void printfpstrees(treelistitemtype *list)
1508 {
1509         char ch;
1510         treelistitemtype *tmpptr = NULL;
1511         tmpptr = list;
1512         ch = '-';
1513         while (tmpptr != NULL) {
1514                 printf ("%c[%2d]  %5d     %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1515                 tmpptr = (*tmpptr).succ;
1516                 ch = ' ';
1517         }
1518 }
1519
1520 /**********/
1521
1522 /* print sorted puzzling step tree stucture with names */
1523 void fprintffullpstree(FILE *outf, char *treestr)
1524 {
1525         int count = 0;
1526         int idnum = 0;
1527         int n;
1528         for(n=0; treestr[n] != '\0'; n++){
1529                 while(isdigit((int)treestr[n])){
1530                         idnum = (10 * idnum) + ((int)treestr[n]-48);
1531                         n++;
1532                         count++;
1533                 }
1534                 if (count > 0){
1535 #                       ifdef USEQUOTES
1536                                 fprintf(outf, "'");
1537 #                       endif
1538                         (void)fputid(outf, idnum);
1539 #                       ifdef USEQUOTES
1540                                 fprintf(outf, "'");
1541 #                       endif
1542                         count = 0;
1543                         idnum = 0;
1544                 }
1545                 fprintf(outf, "%c", treestr[n]);
1546         }
1547 }
1548
1549
1550 /**********/
1551
1552 /* print sorted puzzling step tree stuctures with names */
1553 void fprintfsortedpstrees(FILE *output, 
1554                           treelistitemtype *list,  /* tree list */
1555                           int itemnum,             /* order number */
1556                           int itemsum,             /* number of trees */
1557                           int comment,             /* with statistics, or puzzle report ? */
1558                           float cutoff)            /* cutoff percentage */
1559 {
1560         treelistitemtype *tmpptr = NULL;
1561         treelistitemtype *slist = NULL;
1562         int num = 1;
1563         float percent;
1564
1565         if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1566         sortbynum(list, &slist); 
1567
1568         tmpptr = slist;
1569         while (tmpptr != NULL) {
1570                 percent = (float)(100.0 * (*tmpptr).count / itemsum);
1571                 if ((cutoff == 0.0) || (cutoff <= percent)) {
1572                         if (comment)
1573                                 fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum);
1574                         else {
1575                                 if (num == 1){
1576                                         fprintf (output, "\n");
1577                                         fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum);
1578                                         fprintf (output, "The trees are orderd descending by the number of occurences.\n");
1579                                         fprintf (output, "\n");
1580                                         fprintf (output, "\n       occurences    ID  Phylip tree\n");
1581                                 }
1582                                 fprintf (output, "%2d. %5d %6.2f%% %5d  ", num++, (*tmpptr).count, percent, (*tmpptr).id);
1583                         }
1584                         fprintffullpstree(output, (*tmpptr).tree);
1585                         fprintf (output, "\n");
1586                 }
1587                 tmpptr = (*tmpptr).sortnext;
1588         }
1589
1590         if (!comment) {
1591                 fprintf (output, "\n");
1592                 switch(num) {
1593                         case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break;
1594                         case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break;
1595                         default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break;
1596                 }
1597                 fprintf (output, "\n");
1598                 fprintf (output, "\n");
1599         }
1600         
1601 }  /* fprintfsortedpstrees */
1602
1603 /**********/
1604
1605 /* print sorted tree topologies for checking */
1606 void printfsortedpstrees(treelistitemtype *list)
1607 {
1608         treelistitemtype *tmpptr = NULL;
1609         treelistitemtype *slist = NULL;
1610
1611         sortbynum(list, &slist); 
1612
1613         tmpptr = slist;
1614         while (tmpptr != NULL) {
1615                 printf ("[%2d]  %5d     %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1616                 tmpptr = (*tmpptr).sortnext;
1617         }
1618 }  /* printfsortedpstrees */
1619
1620
1621 /*******************************************/
1622 /* end of tree sorting                     */
1623 /*******************************************/
1624
1625
1626
1627 /******************************************************************************/
1628 /* functions for computing the consensus tree                                 */
1629 /******************************************************************************/
1630
1631 /* prepare for consensus tree analysis */
1632 void initconsensus()
1633 {
1634 #       if ! PARALLEL
1635                 biparts = new_cmatrix(Maxspc-3, Maxspc);
1636 #       endif /* PARALLEL */
1637
1638         if (Maxspc % 32 == 0)
1639                 splitlength = Maxspc/32;
1640         else splitlength = (Maxspc + 32 - (Maxspc % 32))/32;
1641         numbiparts = 0; /* no pattern stored so far */
1642         maxbiparts = 0; /* no memory reserved so far */
1643         splitfreqs = NULL;
1644         splitpatterns = NULL;
1645         splitsizes = NULL;
1646         splitcomp = (uli *) malloc(splitlength * sizeof(uli) );
1647         if (splitcomp == NULL) maerror("splitcomp in initconsensus");
1648 }
1649
1650 /* prototype needed for recursive function */
1651 void makepart(int i, int curribrnch);
1652
1653 /* recursive function to get bipartitions */
1654 void makepart(int i, int curribrnch)
1655 {       
1656         int j;
1657         
1658         if ( edge[i].downright == NULL ||
1659                   edge[i].downleft == NULL) { /* if i is leaf */
1660                         
1661                         /* check out what leaf j sits on this edge i */         
1662                         for (j = 1; j < Maxspc; j++) {
1663                                 if (edgeofleaf[j] == i) {
1664                                         biparts[curribrnch][trueID[j]] = '*';   
1665                                         return;
1666                                 }       
1667                         }
1668         } else { /* still on inner branch */
1669                 makepart(edge[i].downleft->numedge, curribrnch);
1670                 makepart(edge[i].downright->numedge, curribrnch);
1671         }
1672 }
1673
1674 /* compute bipartitions of tree of current puzzling step */
1675 void computebiparts()
1676 {
1677         int i, j, curribrnch;
1678         
1679         curribrnch = -1;
1680         
1681         for (i = 0; i < Maxspc - 3; i++)
1682                 for (j = 0; j < Maxspc; j++)
1683                         biparts[i][j] = '.';
1684
1685         for (i = 0; i < Maxbrnch; i++) {
1686                 if (!(     edgeofleaf[0] == i    ||
1687                        edge[i].downright == NULL ||
1688                         edge[i].downleft == NULL) ) { /* check all inner branches */
1689                         curribrnch++;
1690                         makepart(i, curribrnch);
1691                         
1692                         /* make sure that the root is always a '*' */
1693                         if (biparts[curribrnch][outgroup] == '.') {
1694                                 for (j = 0; j < Maxspc; j++) {
1695                                         if (biparts[curribrnch][j] == '.')
1696                                                 biparts[curribrnch][j] = '*';
1697                                         else
1698                                                 biparts[curribrnch][j] = '.';
1699                                 }
1700                         }
1701                 }
1702         }
1703 }
1704
1705 /* print out the bipartition n of all different splitpatterns */
1706 void printsplit(FILE *fp, uli n)
1707 {
1708         int i, j, col;
1709         uli z;
1710         
1711         col = 0;
1712         for (i = 0; i < splitlength; i++) {
1713                 z = splitpatterns[n*splitlength + i];
1714                 for (j = 0; j < 32 && col < Maxspc; j++) {
1715                         if (col % 10 == 0 && col != 0) fprintf(fp, " ");
1716                         if (z & 1) fprintf(fp, ".");
1717                         else fprintf(fp, "*");
1718                         z = (z >> 1);
1719                         col++;
1720                 }
1721         }
1722 }
1723
1724 /* make new entries for new different bipartitions and count frequencies */
1725 void makenewsplitentries()
1726 {
1727         int i, j, bpc, identical, idflag, bpsize;
1728         uli nextentry, obpc;
1729
1730         /* where the next entry would be in splitpatterns */
1731         nextentry = numbiparts;
1732         
1733         for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */        
1734                 /* convert bipartition into a more compact format */
1735                 bpsize = 0;
1736                 for (i = 0; i < splitlength; i++) {
1737                         splitcomp[i] = 0;       
1738                         for (j = 0; j < 32; j++) {
1739                                 splitcomp[i] = splitcomp[i] >> 1;
1740                                 if (i*32 + j < Maxspc)
1741                                         if (biparts[bpc][i*32 + j] == '.') {
1742                                                 /* set highest bit */
1743                                                 splitcomp[i] = (splitcomp[i] | 2147483648UL);
1744                                                 bpsize++; /* count the '.' */
1745                                         }
1746                         }
1747                 }               
1748                 /* compare to the *old* patterns */
1749                 identical = FALSE;
1750                 for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) {
1751                         /* compare first partition size */
1752                         if (splitsizes[obpc] == bpsize) idflag = TRUE;
1753                         else idflag = FALSE;
1754                         /* if size is identical compare whole partition */
1755                         for (i = 0; (i < splitlength) && idflag; i++)
1756                                 if (splitcomp[i] != splitpatterns[obpc*splitlength + i])
1757                                         idflag = FALSE;
1758                         if (idflag) identical = TRUE;
1759                 }
1760                 if (identical) { /* if identical increase frequency */
1761                         splitfreqs[2*(obpc-1)]++;
1762                 } else { /* create new entry */
1763                         if (nextentry == maxbiparts) { /* reserve more memory */
1764                                 maxbiparts = maxbiparts + 2*Maxspc;
1765                                 splitfreqs = (uli *) myrealloc(splitfreqs,
1766                                         2*maxbiparts * sizeof(uli) );
1767                                 /* 2x: splitfreqs contains also an index (sorting!) */
1768                                 if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries");
1769                                 splitpatterns = (uli *) myrealloc(splitpatterns,
1770                                         splitlength*maxbiparts * sizeof(uli) );
1771                                 if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries");
1772                                 splitsizes = (int *) myrealloc(splitsizes,
1773                                         maxbiparts * sizeof(int) );
1774                                 if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries");
1775                         }
1776                         splitfreqs[2*nextentry] = 1; /* frequency */
1777                         splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */
1778                         for (i = 0; i < splitlength; i++)
1779                                 splitpatterns[nextentry*splitlength + i] = splitcomp[i];
1780                         splitsizes[nextentry] = bpsize;
1781                         nextentry++;
1782                 }
1783         }
1784         numbiparts = nextentry;
1785 }
1786
1787 /* general remarks:
1788
1789    - every entry in consbiparts is one node of the consensus tree
1790    - for each node one has to know which taxa and which other nodes
1791      are *directly* descending from it
1792    - for every taxon/node number there is a flag that shows
1793      whether it descends from the node or not
1794    - '0' means that neither a taxon nor another node with the
1795          corresponding number decends from the node
1796      '1' means that the corresponding taxon descends from the node
1797      '2' means that the corresponding node descends from the node
1798      '3' means that the corresponding taxon and node descends from the node
1799 */
1800
1801 /* copy bipartition n of all different splitpatterns to consbiparts[k] */
1802 void copysplit(uli n, int k)
1803 {
1804         int i, j, col;
1805         uli z;
1806         
1807         col = 0;
1808         for (i = 0; i < splitlength; i++) {
1809                 z = splitpatterns[n*splitlength + i];
1810                 for (j = 0; j < 32 && col < Maxspc; j++) {
1811                         if (z & 1) consbiparts[k][col] = '1';
1812                         else consbiparts[k][col] = '0';
1813                         z = (z >> 1);
1814                         col++;
1815                 }
1816         }
1817 }
1818
1819 /* compute majority rule consensus tree */
1820 void makeconsensus()
1821 {
1822         int i, j, k, size, subnode;
1823         char chari, charj;
1824
1825         /* sort bipartition frequencies */
1826         qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp);
1827         /* how many bipartitions are included in the consensus tree */
1828         consincluded = 0;
1829         for (i = 0; i < numbiparts && i == consincluded; i++) {
1830                 if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1;
1831         }
1832
1833         /* collect all info about majority rule consensus tree */
1834         /* the +1 is due to the edge with the root */
1835         consconfid = new_ivector(consincluded + 1);
1836         conssizes = new_ivector(2*consincluded + 2);
1837         consbiparts = new_cmatrix(consincluded + 1, Maxspc);
1838         
1839         for (i = 0; i < consincluded; i++) {
1840                 /* copy partition to consbiparts */
1841                 copysplit(splitfreqs[2*i+1], i);
1842                 /* frequency in percent (rounded to integer) */
1843                 consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5);
1844                 /* size of partition */
1845                 conssizes[2*i] = splitsizes[splitfreqs[2*i+1]];
1846                 conssizes[2*i+1] = i;
1847         }
1848         for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1';
1849         consbiparts[consincluded][outgroup] = '0';
1850         consconfid[consincluded] = 100;
1851         conssizes[2*consincluded] = Maxspc - 1;
1852         conssizes[2*consincluded + 1] = consincluded;
1853
1854         /* sort bipartitions according to cluster size */
1855         qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp);
1856
1857         /* reconstruct consensus tree */
1858         for (i = 0; i < consincluded; i++) { /* try every node */
1859                 size = conssizes[2*i]; /* size of current node */
1860                 for (j = i + 1; j < consincluded + 1; j++) {
1861                 
1862                         /* compare only with nodes with more descendants */
1863                         if (size == conssizes[2*j]) continue;
1864                         
1865                         /* check whether node i is a subnode of j */
1866                         subnode = FALSE;
1867                         for (k = 0; k < Maxspc && !subnode; k++) {
1868                                 chari = consbiparts[ conssizes[2*i+1] ][k];
1869                                 if (chari != '0') {
1870                                         charj = consbiparts[ conssizes[2*j+1] ][k];
1871                                         if (chari == charj || charj == '3') subnode = TRUE;
1872                                 }
1873                         }
1874                         
1875                         /* if i is a subnode of j change j accordingly */
1876                         if (subnode) {
1877                                 /* remove subnode i from j */
1878                                 for (k = 0; k < Maxspc; k++) {
1879                                         chari = consbiparts[ conssizes[2*i+1] ][k];
1880                                         if (chari != '0') {
1881                                                 charj = consbiparts[ conssizes[2*j+1] ][k];
1882                                                 if (chari == charj)
1883                                                         consbiparts[ conssizes[2*j+1] ][k] = '0';
1884                                                 else if (charj == '3') {
1885                                                         if (chari == '1')
1886                                                                 consbiparts[ conssizes[2*j+1] ][k] = '2';
1887                                                         else if (chari == '2')
1888                                                                 consbiparts[ conssizes[2*j+1] ][k] = '1';
1889                                                         else {
1890                                                                 /* Consensus tree [1] */
1891                                                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n");
1892                                                                 exit(1);
1893                                                         }       
1894                                                 } else {
1895                                                         /* Consensus tree [2] */
1896                                                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n");
1897                                                         exit(1);
1898                                                 }
1899                                         }
1900                                 }
1901                                 /* add link to subnode i in node j */
1902                                 charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ];
1903                                 if (charj == '0')
1904                                         consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2';
1905                                 else if (charj == '1')
1906                                         consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3';
1907                                 else {
1908                                         /* Consensus tree [3] */
1909                                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n");
1910                                         exit(1);
1911                                 }
1912                         }
1913                 }
1914         }
1915 }
1916
1917 /* prototype for recursion */
1918 void writenode(FILE *treefile, int node);
1919
1920 /* write node (writeconsensustree) */
1921 void writenode(FILE *treefile, int node)
1922 {
1923         int i, first;
1924         
1925         fprintf(treefile, "(");
1926         column++;
1927         /* write descending nodes */
1928         first = TRUE;
1929         for (i = 0; i < Maxspc; i++) {
1930                 if (consbiparts[node][i] == '2' ||
1931                 consbiparts[node][i] == '3') {
1932                         if (first) first = FALSE;
1933                         else {
1934                                 fprintf(treefile, ",");
1935                                 column++;
1936                         }
1937                         if (column > 60) {
1938                                 column = 2;
1939                                 fprintf(treefile, "\n");
1940                         }
1941                         /* write node i */
1942                         writenode(treefile, i);
1943
1944                         /* reliability value as internal label */
1945                         fprintf(treefile, "%d", consconfid[i]);
1946
1947                         column = column + 3;
1948                 }
1949         }
1950         /* write descending taxa */
1951         for (i = 0; i < Maxspc; i++) {
1952                 if (consbiparts[node][i] == '1' ||
1953                 consbiparts[node][i] == '3') {
1954                         if (first) first = FALSE;
1955                         else {
1956                                 fprintf(treefile, ",");
1957                                 column++;
1958                         }
1959                         if (column > 60) {
1960                                 column = 2;
1961                                 fprintf(treefile, "\n");
1962                         }
1963                         column += fputid(treefile, i);
1964                 }
1965         }
1966         fprintf(treefile, ")");
1967         column++;
1968 }
1969
1970 /* write consensus tree */
1971 void writeconsensustree(FILE *treefile)
1972 {
1973         int i, first;
1974         
1975         column = 1;
1976         fprintf(treefile, "(");
1977         column += fputid(treefile, outgroup) + 2;
1978         fprintf(treefile, ",");
1979         /* write descending nodes */
1980         first = TRUE;
1981         for (i = 0; i < Maxspc; i++) {
1982                 if (consbiparts[consincluded][i] == '2' ||
1983                 consbiparts[consincluded][i] == '3') {
1984                         if (first) first = FALSE;
1985                         else {
1986                                 fprintf(treefile, ",");
1987                                 column++;
1988                         }
1989                         if (column > 60) {
1990                                 column = 2;
1991                                 fprintf(treefile, "\n");
1992                         }
1993                         /* write node i */
1994                         writenode(treefile, i);
1995
1996                         /* reliability value as internal label */
1997                         fprintf(treefile, "%d", consconfid[i]);
1998
1999                         column = column + 3;
2000                 }
2001         }
2002         /* write descending taxa */
2003         for (i = 0; i < Maxspc; i++) {
2004                 if (consbiparts[consincluded][i] == '1' ||
2005                 consbiparts[consincluded][i] == '3') {
2006                         if (first) first = FALSE;
2007                         else {
2008                                 fprintf(treefile, ",");
2009                                 column++;
2010                         }
2011                         if (column > 60) {
2012                                 column = 2;
2013                                 fprintf(treefile, "\n");
2014                         }
2015                         column += fputid(treefile, i);
2016                 }
2017         }
2018         fprintf(treefile, ");\n");
2019 }
2020
2021 /* prototype for recursion */
2022 void nodecoordinates(int node);
2023
2024 /* establish node coordinates (plotconsensustree) */
2025 void nodecoordinates(int node)
2026 {
2027         int i, ymin, ymax, xcoordinate;
2028
2029         /* first establish coordinates of descending nodes */
2030         for (i = 0; i < Maxspc; i++) {
2031                 if (consbiparts[node][i] == '2' ||
2032                 consbiparts[node][i] == '3') 
2033                         nodecoordinates(i);
2034         }
2035         
2036         /* then establish coordinates of descending taxa */
2037         for (i = 0; i < Maxspc; i++) {
2038                 if (consbiparts[node][i] == '1' ||
2039                 consbiparts[node][i] == '3') {
2040                         /* y-coordinate of taxon i */
2041                         ycortax[i] = ytaxcounter;
2042                         ytaxcounter = ytaxcounter - 2;
2043                 }
2044         }
2045         
2046         /* then establish coordinates of this node */
2047         ymin = 2*Maxspc - 2;
2048         ymax = 0;
2049         xcoordinate = 0;
2050         for (i = 0; i < Maxspc; i++) {
2051                 if (consbiparts[node][i] == '2' ||
2052                 consbiparts[node][i] == '3') {
2053                         if (ycor[i] > ymax) ymax = ycor[i];
2054                         if (ycor[i] < ymin) ymin = ycor[i];
2055                         if (xcor[i] > xcoordinate) xcoordinate = xcor[i];
2056                 }
2057         }
2058         for (i = 0; i < Maxspc; i++) {
2059                 if (consbiparts[node][i] == '1' ||
2060                 consbiparts[node][i] == '3') {
2061                         if (ycortax[i] > ymax) ymax = ycortax[i];
2062                         if (ycortax[i] < ymin) ymin = ycortax[i];
2063                 }
2064         }
2065         ycormax[node] = ymax;
2066         ycormin[node] = ymin;
2067         ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5);
2068         if (xcoordinate == 0) xcoordinate = 9;
2069         xcor[node] = xcoordinate + 4;
2070 }
2071
2072 /* prototype for recursion */
2073 void drawnode(int node, int xold);
2074
2075 /* drawnode  (plotconsensustree) */
2076 void drawnode(int node, int xold)
2077 {
2078         int i, j;
2079         char buf[4];
2080         
2081         /* first draw vertical line */
2082         for (i = ycormin[node] + 1; i < ycormax[node]; i++)
2083                 treepict[xcor[node]][i] = ':';
2084                 
2085         /* then draw descending nodes */
2086         for (i = 0; i < Maxspc; i++) {
2087                 if (consbiparts[node][i] == '2' ||
2088                 consbiparts[node][i] == '3') 
2089                         drawnode(i, xcor[node]);
2090         }
2091         
2092         /* then draw descending taxa */
2093         for (i = 0; i < Maxspc; i++) {
2094                 if (consbiparts[node][i] == '1' ||
2095                 consbiparts[node][i] == '3') {
2096                         treepict[xcor[node]][ycortax[i]] = ':';
2097                         for (j = xcor[node] + 1; j < xsize-10; j++)
2098                                 treepict[j][ycortax[i]] = '-';
2099                         for (j = 0; j < 10; j++)
2100                                 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];       
2101                 }
2102         }
2103         
2104         /* then draw internal edge with consensus value */
2105         treepict[xold][ycor[node]] = ':';
2106         treepict[xcor[node]][ycor[node]] = ':';
2107         for (i = xold + 1; i < xcor[node]-3; i++)
2108                 treepict[i][ycor[node]] = '-';
2109         sprintf(buf, "%d", consconfid[node]);
2110         if (consconfid[node] == 100) {
2111                 treepict[xcor[node]-3][ycor[node]] = buf[0];
2112                 treepict[xcor[node]-2][ycor[node]] = buf[1];
2113                 treepict[xcor[node]-1][ycor[node]] = buf[2];    
2114         } else {
2115                 treepict[xcor[node]-3][ycor[node]] = '-';
2116                 treepict[xcor[node]-2][ycor[node]] = buf[0];
2117                 treepict[xcor[node]-1][ycor[node]] = buf[1];
2118         }
2119 }
2120
2121 /* plot consensus tree */
2122 void plotconsensustree(FILE *plotfp)
2123 {
2124         int i, j, yroot, startree;
2125
2126         /* star tree or no star tree */
2127         if (consincluded == 0) {
2128                 startree = TRUE;
2129                 consincluded = 1; /* avoids problems with malloc */
2130         } else
2131                 startree = FALSE;
2132         
2133         /* memory for x-y-coordinates of each bipartition */
2134         xcor = new_ivector(consincluded);
2135         ycor = new_ivector(consincluded);
2136         ycormax = new_ivector(consincluded);
2137         ycormin = new_ivector(consincluded);
2138         if (startree) consincluded = 0; /* avoids problems with malloc */
2139
2140         /* y-coordinates of each taxon */
2141         ycortax = new_ivector(Maxspc);
2142         ycortax[outgroup] = 0;
2143         
2144         /* establish coordinates */
2145         ytaxcounter = 2*Maxspc - 2;
2146         
2147         /* first establish coordinates of descending nodes */
2148         for (i = 0; i < Maxspc; i++) {
2149                 if (consbiparts[consincluded][i] == '2' ||
2150                 consbiparts[consincluded][i] == '3') 
2151                         nodecoordinates(i);
2152         }
2153         
2154         /* then establish coordinates of descending taxa */
2155         for (i = 0; i < Maxspc; i++) {
2156                 if (consbiparts[consincluded][i] == '1' ||
2157                 consbiparts[consincluded][i] == '3') {
2158                         /* y-coordinate of taxon i */
2159                         ycortax[i] = ytaxcounter;
2160                         ytaxcounter = ytaxcounter - 2;
2161                 }
2162         }
2163
2164         /* then establish length of root edge and size of whole tree */
2165         yroot = 0;
2166         xsize = 0;
2167         for (i = 0; i < Maxspc; i++) {
2168                 if (consbiparts[consincluded][i] == '2' ||
2169                 consbiparts[consincluded][i] == '3') {
2170                         if (ycor[i] > yroot) yroot = ycor[i];
2171                         if (xcor[i] > xsize) xsize = xcor[i];
2172                 }
2173         }
2174         for (i = 0; i < Maxspc; i++) {
2175                 if (consbiparts[consincluded][i] == '1' ||
2176                 consbiparts[consincluded][i] == '3') {
2177                         if (ycortax[i] > yroot) yroot = ycortax[i];
2178                 }
2179         }
2180         if (xsize == 0) xsize = 9;
2181         /* size in x direction inclusive one blank on the left */
2182         xsize = xsize + 6; 
2183         
2184         /* change all x-labels so that (0,0) is down-left */
2185         for (i = 0; i < consincluded; i++)
2186                 xcor[i] = xsize-1-xcor[i];
2187         
2188         /* draw tree */
2189         treepict = new_cmatrix(xsize, 2*Maxspc-1);
2190         for (i = 0; i < xsize; i++)
2191                 for (j = 0; j < 2*Maxspc-1; j++)
2192                         treepict[i][j] = ' ';
2193         
2194         /* draw root */
2195         for (i = 1; i < yroot; i++)
2196                 treepict[1][i] = ':';
2197         treepict[1][0] = ':';
2198         for (i = 2; i < xsize - 10; i++)
2199                 treepict[i][0] = '-';
2200         for (i = 0; i < 10; i++)
2201                 treepict[xsize-10+i][0] = Identif[outgroup][i];
2202         
2203         /* then draw descending nodes */
2204         for (i = 0; i < Maxspc; i++) {
2205                 if (consbiparts[consincluded][i] == '2' ||
2206                 consbiparts[consincluded][i] == '3') 
2207                         drawnode(i, 1);
2208         }
2209         
2210         /* then draw descending taxa */
2211         for (i = 0; i < Maxspc; i++) {
2212                 if (consbiparts[consincluded][i] == '1' ||
2213                 consbiparts[consincluded][i] == '3') {
2214                         treepict[1][ycortax[i]] = ':';
2215                         for (j = 2; j < xsize-10; j++)
2216                                 treepict[j][ycortax[i]] = '-';
2217                         for (j = 0; j < 10; j++)
2218                                 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];       
2219                 }
2220         }
2221         
2222         /* plot tree */
2223         for (i = 2*Maxspc-2; i > -1; i--) {
2224                 for (j = 0; j < xsize; j++)
2225                         fputc(treepict[j][i], plotfp);
2226                 fputc('\n', plotfp);
2227         }       
2228         
2229         free_ivector(xcor);
2230         free_ivector(ycor);
2231         free_ivector(ycormax);
2232         free_ivector(ycormin);
2233         free_ivector(ycortax);
2234         free_cmatrix(treepict);
2235 }
2236
2237
2238
2239 /******************************************************************************/
2240 /* storing and evaluating quartet branching information                       */
2241 /******************************************************************************/
2242
2243 /* general remarks:
2244
2245         for a quartet with the taxa a, b, c, d there are
2246         three possible binary trees:
2247         
2248                 1)  (a,b)-(c,d)
2249                 2)  (a,c)-(b,d)
2250                 3)  (a,d)-(b,c)
2251         
2252         For every quartet information about its branching structure is
2253         stored. With the functions  readquartet  and  writequartet
2254         this information can be accessed. For every quartet (a,b,c,d)
2255         with a < b < c < d (taxa) the branching information is encoded
2256         using 4 bits:
2257         
2258         value          8             4             2             1
2259                 +-------------+-------------+-------------+-------------+
2260                 |  not used   |   tree 3    |   tree 2    |   tree 1    |
2261                 +-------------+-------------+-------------+-------------+
2262
2263         If the branching structure of the taxa corresponds to one of the
2264         three trees the corresponding bit is set. If the branching structure
2265         is unclear because two of the three trees have the same maximum
2266         likelihood value the corresponding two bits are set. If the branching
2267         structure is completely unknown all the bits are set (the highest
2268         bit is always cleared because it is not used).
2269
2270 */
2271
2272 /* allocate memory for quartets */
2273 unsigned char *mallocquartets(int taxa)
2274 {
2275         uli nc, numch;
2276         unsigned char *qinfo;
2277         
2278         /* compute number of quartets */
2279         Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
2280         if (Numquartets % 2 == 0) { /* even number */
2281                 numch = Numquartets/2;
2282         } else { /* odd number */
2283                 numch = (Numquartets + 1)/2;
2284         }
2285         /* allocate memory */
2286         qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) );
2287         if (qinfo == NULL) maerror("quartetinfo in mallocquartets");
2288         for (nc = 0; nc < numch; nc++) qinfo[nc] = 0;
2289         return(qinfo);
2290 }
2291
2292 /* free quartet memory */
2293 void freequartets()
2294 {       
2295         free(quartetinfo);
2296 }
2297
2298 /* read quartet info - a < b < c < d */
2299 unsigned char readquartet(int a, int b, int c, int d)
2300 {
2301         uli qnum;
2302
2303         qnum = (uli) a
2304                         + (uli) b*(b-1)/2
2305                         + (uli) c*(c-1)*(c-2)/6
2306                         + (uli) d*(d-1)*(d-2)*(d-3)/24;
2307         if (qnum % 2 == 0) { /* even number */
2308                 /* bits 0 to 3 */
2309                 return (quartetinfo[qnum/2] & (unsigned char) 15);
2310         } else { /* odd number */
2311                 /* bits 4 to 7 */
2312                 return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4);
2313         }
2314 }
2315
2316 /* write quartet info - a < b < c < d, 0 <= info <= 15 */
2317 void writequartet(int a, int b, int c, int d, unsigned char info)
2318 {
2319         uli qnum;
2320
2321         qnum = (uli) a
2322                         + (uli) b*(b-1)/2
2323                         + (uli) c*(c-1)*(c-2)/6
2324                         + (uli) d*(d-1)*(d-2)*(d-3)/24;
2325         if (qnum % 2 == 0) { /* even number */
2326                 /* bits 0 to 3 */
2327                 quartetinfo[qnum/2] =
2328                         ((quartetinfo[qnum/2] & (unsigned char) 240) |
2329                         (info & (unsigned char) 15));
2330         } else { /* odd number */
2331                 /* bits 4 to 7 */
2332                 quartetinfo[(qnum-1)/2] =
2333                         ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) |
2334                         ((info & (unsigned char) 15)<<4));
2335         }
2336 }
2337
2338 /* prototypes */
2339 void openfiletowrite(FILE **, char[], char[]);
2340 void closefile(FILE *);
2341
2342 /* sorts three doubles in descending order */
2343 void sort3doubles(dvector num, ivector order)
2344 {
2345         if (num[0] > num[1]) {
2346                 if(num[2] > num[0]) {
2347                         order[0] = 2;
2348                         order[1] = 0;
2349                         order[2] = 1;           
2350                 } else if (num[2] < num[1]) {
2351                         order[0] = 0;
2352                         order[1] = 1;
2353                         order[2] = 2;           
2354                 } else {
2355                         order[0] = 0;
2356                         order[1] = 2;
2357                         order[2] = 1;           
2358                 }
2359         } else {
2360                 if(num[2] > num[1]) {
2361                         order[0] = 2;
2362                         order[1] = 1;
2363                         order[2] = 0;           
2364                 } else if (num[2] < num[0]) {
2365                         order[0] = 1;
2366                         order[1] = 0;
2367                         order[2] = 2;           
2368                 } else {
2369                         order[0] = 1;
2370                         order[1] = 2;
2371                         order[2] = 0;           
2372                 }
2373         }
2374 }
2375
2376 /* checks out all possible quartets */
2377 void computeallquartets()
2378 {       
2379         double onethird;
2380         uli nq;
2381         unsigned char treebits[3];
2382         FILE *lhfp;
2383 #       if ! PARALLEL
2384                 int a, b, c, i;
2385                 double qc2, mintogo, minutes, hours, temp;
2386                 double temp1, temp2, temp3;
2387                 unsigned char discreteweight[3];
2388 #       endif
2389         
2390         onethird = 1.0/3.0;
2391         treebits[0] = (unsigned char) 1;
2392         treebits[1] = (unsigned char) 2;
2393         treebits[2] = (unsigned char) 4;
2394         
2395         if (show_optn) { /* list all unresolved quartets */
2396                 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
2397                 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
2398         }
2399
2400         nq = 0;
2401         badqs = 0;
2402         
2403         /* start timer - percentage of completed quartets */
2404         time(&time0);
2405         time1 = time0;
2406         mflag = 0;
2407         
2408 #       if PARALLEL
2409         {
2410            schedtype sched; 
2411            int flag;
2412            MPI_Status stat;
2413            int dest = 1;
2414            uli qaddr  =0;
2415            uli qamount=0;
2416            int qblocksent = 0;
2417            int apr;
2418            uli sq, noq;
2419            initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4);
2420            qamount=sgss(&sched);
2421            while (qamount > 0) {
2422               if (PP_emptyslave()) {
2423                  PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2424                  qblocksent -= noq;
2425               }
2426               dest = PP_getslave();
2427               PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT));
2428               qblocksent += qamount;
2429               qaddr += qamount;
2430               qamount=sgss(&sched);
2431                  
2432               MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2433               while (flag) {
2434                  PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2435                  qblocksent -= noq;
2436                  MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2437               }
2438            }
2439            while (qblocksent > 0) {
2440               PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2441               qblocksent -= noq;
2442            }
2443         }
2444 #       else /* PARALLEL */
2445
2446         addtimes(GENERAL, &tarr);
2447         if (savequartlh_optn) {
2448                 openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods");
2449                 if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3);
2450                 else                 writetpqfheader(Maxspc, lhfp, 4); 
2451         }
2452
2453         for (i = 3; i < Maxspc; i++) 
2454                 for (c = 2; c < i; c++) 
2455                         for (b = 1; b < c; b++)
2456                                 for (a = 0; a < b; a++) {
2457                                                         nq++;
2458
2459                                                         /* generate message every 15 minutes */
2460                                                         /* check timer */
2461                                                         time(&time2);
2462                                                         if ( (time2 - time1) > 900) {
2463                                                                 /* every 900 seconds */
2464                                                                 /* percentage of completed quartets */
2465                                                                 if (mflag == 0) {
2466                                                                         FPRINTF(STDOUTFILE "\n");
2467                                                                         mflag = 1;
2468                                                                 }
2469                                                                 qc2 = 100.*nq/Numquartets;
2470                                                                 mintogo = (100.0-qc2) *
2471                                                                         (double) (time2-time0)/60.0/qc2;
2472                                                                 hours = floor(mintogo/60.0);
2473                                                                 minutes = mintogo - 60.0*hours;
2474                                                                 FPRINTF(STDOUTFILE "%.2f%%", qc2);
2475                                                                 FPRINTF(STDOUTFILE " completed (remaining");
2476                                                                 FPRINTF(STDOUTFILE " time: %.0f", hours);
2477                                                                 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2478                                                                 FPRINTF(STDOUTFILE " minutes)\n");
2479                                                                 fflush(STDOUT);
2480                                                                 time1 = time2;
2481                                                         }
2482                                                         
2483                                                         /* maximum likelihood values */
2484                                                            
2485                                                         /* exact or approximate maximum likelihood values */
2486                                                         compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT));
2487                                                         
2488                                                         if (savequartlh_optn) {
2489                                                                 if (saveqlhbin_optn)
2490                                                                         fwrite(qweight, sizeof(double), 3, lhfp);
2491                                                                 else
2492                                                                         fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i, 
2493                                                                         qweight[0], qweight[1], qweight[2]); 
2494                                                         }
2495
2496                                                         /* sort in descending order */
2497                                                         sort3doubles(qweight, qworder);
2498
2499                                                         if (usebestq_optn) {
2500                                                                 sqorder[2] = 2;
2501                                                                 discreteweight[sqorder[2]] = treebits[qworder[0]];
2502                                                                 if (qweight[qworder[0]] == qweight[qworder[1]]) {
2503                                                                    discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]];
2504                                                                    if (qweight[qworder[1]] == qweight[qworder[2]]) {
2505                                                                       discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]];
2506                                                                       discreteweight[sqorder[2]] = 7;
2507                                                                    } 
2508                                                                 }
2509                                                         } else {
2510
2511                                                                 /* compute Bayesian weights */
2512                                                                 qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
2513                                                                 qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
2514                                                                 qweight[qworder[0]] = 1.0;
2515                                                                 temp = qweight[0] + qweight[1] + qweight[2];
2516                                                                 qweight[0] = qweight[0]/temp;
2517                                                                 qweight[1] = qweight[1]/temp;
2518                                                                 qweight[2] = qweight[2]/temp;
2519                                                                 
2520                                                                 /* square deviations */
2521                                                                 temp1 = 1.0 - qweight[qworder[0]];
2522                                                                 sqdiff[0] = temp1 * temp1 +
2523                                                                            qweight[qworder[1]] * qweight[qworder[1]] +
2524                                                                            qweight[qworder[2]] * qweight[qworder[2]];
2525                                                                 discreteweight[0] = treebits[qworder[0]];
2526      
2527                                                                 temp1 = 0.5 - qweight[qworder[0]];
2528                                                                 temp2 = 0.5 - qweight[qworder[1]];
2529                                                                 sqdiff[1] = temp1 * temp1 + temp2 * temp2 +
2530                                                                            qweight[qworder[2]] * qweight[qworder[2]];           
2531                                                                 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
2532                                                                    
2533                                                                 temp1 = onethird - qweight[qworder[0]];
2534                                                                 temp2 = onethird - qweight[qworder[1]];
2535                                                                 temp3 = onethird - qweight[qworder[2]];
2536                                                                 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
2537                                                                 discreteweight[2] = (unsigned char) 7;                                                     
2538                                                         
2539                                                                 /* sort in descending order */
2540                                                                 sort3doubles(sqdiff, sqorder);
2541                                                         }
2542                                                         
2543                                                         /* determine best discrete weight */
2544                                                         writequartet(a, b, c, i, discreteweight[sqorder[2]]);
2545                                                 
2546                                                         /* counting completely unresolved quartets */
2547                                                         if (discreteweight[sqorder[2]] == 7) {
2548                                                                 badqs++;
2549                                                                 badtaxon[a]++;
2550                                                                 badtaxon[b]++;
2551                                                                 badtaxon[c]++;
2552                                                                 badtaxon[i]++;
2553                                                                 if (show_optn) {
2554                                                                         fputid10(unresfp, a);
2555                                                                         fprintf(unresfp, "  ");
2556                                                                         fputid10(unresfp, b);
2557                                                                         fprintf(unresfp, "  ");
2558                                                                         fputid10(unresfp, c);
2559                                                                         fprintf(unresfp, "  ");
2560                                                                         fputid(unresfp, i);
2561                                                                         fprintf(unresfp, "\n");
2562                                                                 }
2563                                                         }
2564                                                         addtimes(QUARTETS, &tarr);
2565                                                 }
2566         if (savequartlh_optn) {
2567                 closefile(lhfp);
2568         }
2569         if (show_optn)
2570                 closefile(unresfp);
2571         if (mflag == 1)
2572                 FPRINTF(STDOUTFILE "\n");
2573 #       endif /* PARALLEL */
2574
2575 }
2576                                                         
2577 /* check the branching structure between the leaves (not the taxa!)
2578    A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
2579    the two leaves that are closer related to each other than to leaf I
2580    are found in chooseA and chooseB. If the branching structure is
2581    not uniquely defined, ChooseA and ChooseB are chosen randomly
2582    from the possible taxa */
2583 void checkquartet(int A, int B, int C, int I)
2584 {
2585         int i, j, a, b, taxon[5], leaf[5], ipos;
2586         unsigned char qresult;
2587         int notunique = FALSE;
2588
2589         /* The relationship between leaves and taxa is defined by trueID */
2590         taxon[1] = trueID[A]; /* taxon number */
2591         leaf[1] = A;          /* leaf number  */
2592         taxon[2] = trueID[B];
2593         leaf[2] = B;
2594         taxon[3] = trueID[C];
2595         leaf[3] = C;
2596         taxon[4] = trueID[I];
2597         leaf[4] = I;
2598
2599         /* sort for taxa */
2600         /* Source: Numerical Recipes (PIKSR2.C) */
2601         for (j = 2; j <= 4; j++) {
2602                 a = taxon[j];
2603                 b = leaf[j];
2604                 i = j-1;
2605                 while (i > 0 && taxon[i] > a) {
2606                         taxon[i+1] = taxon[i];
2607                         leaf[i+1] = leaf[i];
2608                         i--;
2609                 }
2610                 taxon[i+1] = a;
2611                 leaf[i+1] = b;
2612         }
2613
2614         /* where is leaf I ? */
2615         ipos = 1;
2616         while (leaf[ipos] != I) ipos++;
2617
2618         /* look at sequence quartet */
2619         qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]);
2620
2621         /* chooseA and chooseB */
2622         do {    
2623                 switch (qresult) {
2624                 
2625                         /* one single branching structure */
2626                 
2627                         /* 001 */
2628                         case 1:         if (ipos == 1 || ipos == 2) {
2629                                                         chooseA = leaf[3];
2630                                                         chooseB = leaf[4];
2631                                                 } else {
2632                                                         chooseA = leaf[1];
2633                                                         chooseB = leaf[2];
2634                                                 }
2635                                                 notunique = FALSE;
2636                                                 break;
2637
2638                         /* 010 */
2639                         case 2:         if (ipos == 1 || ipos == 3) {
2640                                                         chooseA = leaf[2];
2641                                                         chooseB = leaf[4];
2642                                                 } else {
2643                                                         chooseA = leaf[1];
2644                                                         chooseB = leaf[3];
2645                                                 }
2646                                                 notunique = FALSE;
2647                                                 break;
2648
2649                         /* 100 */
2650                         case 4:         if (ipos == 1 || ipos == 4) {
2651                                                         chooseA = leaf[2];
2652                                                         chooseB = leaf[3];
2653                                                 } else {
2654                                                         chooseA = leaf[1];
2655                                                         chooseB = leaf[4];
2656                                                 }
2657                                                 notunique = FALSE;
2658                                                 break;
2659
2660                         /* two possible branching structures */         
2661
2662                         /* 011 */
2663                         case 3:         if (randominteger(2)) qresult = 1;
2664                                                 else qresult = 2;
2665                                                 notunique = TRUE;
2666                                                 break;
2667
2668                         /* 101 */
2669                         case 5:         if (randominteger(2)) qresult = 1;
2670                                                 else qresult = 4;
2671                                                 notunique = TRUE;
2672                                                 break;
2673
2674                         /* 110 */
2675                         case 6:         if (randominteger(2)) qresult = 2;
2676                                                 else qresult = 4;
2677                                                 notunique = TRUE;
2678                                                 break;
2679
2680                         /* three possible branching structures */
2681
2682                         /* 111 */
2683                         case 7:         qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
2684                                                 notunique = TRUE;
2685                                                 break;
2686
2687                         default:        /* Program error [checkquartet] */
2688 #if PARALLEL
2689                                                 FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n", 
2690                                                 PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
2691                                                 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
2692 #else
2693                                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n");
2694 #endif
2695                                                 
2696                 }
2697         } while (notunique);
2698
2699         return;
2700 }
2701